/*
* Copyright (C) 2009 by TunedIT
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package rsctc2010;
import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipInputStream;
import org.debellor.base.evaluator.TrainAndTest;
import org.debellor.core.Cell;
import org.debellor.core.exception.cell.CellException;
import org.debellor.weka.ArffReader;
import org.tunedit.core.EvaluationProcedure;
import org.tunedit.core.ResourceLoader;
import org.tunedit.core.ResourceName;
import org.tunedit.core.StandardLoader;
import org.tunedit.core.exception.AlgorithmErrorException;
import org.tunedit.core.exception.EvaluationSetupException;
import org.tunedit.core.exception.TunedTesterException;
/**
* @author Marcin Wojnarski
*
*/
public class EvalCode extends EvaluationProcedure {
/* (non-Javadoc)
* @see org.tunedit.core.EvaluationProcedure#run(org.tunedit.core.ResourceName, org.tunedit.core.ResourceName, org.tunedit.core.ResourceLoader)
*/
@Override
public Double[] run(ResourceName algorithm, ResourceName dataset, ResourceLoader loader)
throws TunedTesterException, EvaluationSetupException, AlgorithmErrorException
{
if(!dataset.isFile())
throw new EvaluationSetupException("Incorrect resource name: " + dataset + ". Expected file resource");
StandardLoader standardLoader = new StandardLoader(loader);
Cell learner = standardLoader.loadCell(algorithm);
try {
double resultSum = 0.0;
int resultCount = 0;
for(int i = 1; true; i++) {
// large amounts of data may have been loaded in previous run,
// so it's a good time to clean memory before the next dataset
System.gc();
// position ZIP input stream on i'th dataset
ZipInputStream dataZip = new ZipInputStream(loader.open(dataset));
ZipEntry entry = null;
for(int j = 0; j < i; j++)
entry = dataZip.getNextEntry();
if(entry == null) break;
System.out.println("Will evaluate on dataset: " + entry.getName());
// create ArffReader cell
ArffReader reader = new ArffReader();
reader.setInputStream(dataZip);
reader.set("decisionAttr", "last");
// create evaluator cell
TrainAndTest tt = new TrainAndTest(learner);
tt.setRandomSeed(321 + i + dataset.toString().hashCode()); // hashCode differentiates between preliminary and final datasets
tt.set("trainPercent", 50);
tt.set("repetitions", 10);
tt.setScore(new BalancedAccuracy());
tt.setSource(reader);
// run evaluation
tt.learn();
resultSum += tt.result()[0];
resultCount ++;
dataZip.close();
}
double result = resultSum / resultCount;
return new Double[] { result };
}
catch(CellException e) {
throw new AlgorithmErrorException(e);
}
catch(ZipException e) {
throw new EvaluationSetupException("Dataset file is not a correct ZIP file");
}
catch(IOException e) {
throw new TunedTesterException(e);
}
}
}