Repository /RSCTC/2010/Eval_RSCTC2010.jar:rsctc2010.EvalDecisions


Back

No file description

Source code

/*
 *  Copyright (C) 2009 by TunedIT
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package rsctc2010;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.TreeSet;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipInputStream;

import org.debellor.base.evaluator.score.Score;
import org.debellor.core.data.SymbolicFeature;
import org.debellor.core.exception.data.DataException;
import org.debellor.core.util.Permute;
import org.tunedit.core.EvaluationProcedure;
import org.tunedit.core.ResourceLoader;
import org.tunedit.core.ResourceName;
import org.tunedit.core.exception.AlgorithmErrorException;
import org.tunedit.core.exception.EvaluationSetupException;
import org.tunedit.core.exception.TunedTesterException;

/**
 * Evaluation procedure that takes two ZIP files with entry files that contain decisions.
 * One of the ZIP files contain predicted decisions, and another one - ground truth decisions.
 * Iterates through entry files and counts balanced accuracy of predictions.
 * All entries in the ground-truth ZIP must be matched by corresponding entries
 * in the predictions ZIP having the same names. 
 * We assume that the file sizes are small and we can load all their contents into memory.
 * 
 * @author Marcin Wojnarski
 *
 */
public class EvalDecisions extends EvaluationProcedure {

	/** All non-determinism in the evaluation procedure will come 
	 * from this deterministically-initiated random number generator.
	 * So, this "non-determinism" will be the same in every run of the procedure,
	 * if only the dataset with target decisions is the same. */
	private Random random = new Random(961);
	
	
	/**
	 * @param predictionsName - resource name of the file with predictions supplied by an algorithm
	 * @param targetsName - resource name of the file with true decisions
	 * 
	 * @see org.tunedit.core.EvaluationProcedure#run(org.tunedit.core.ResourceName, org.tunedit.core.ResourceName, org.tunedit.core.ResourceLoader)
	 */
	public Double[] run(ResourceName predictionsName, ResourceName targetsName, ResourceLoader loader)
			throws TunedTesterException, EvaluationSetupException, AlgorithmErrorException
	{
		if(!predictionsName.isFile())
			throw new AlgorithmErrorException("Incorrect resource name: " + predictionsName + ". Expected file resource");
		if(!targetsName.isFile())
			throw new EvaluationSetupException("Incorrect resource name: " + targetsName + ". Expected file resource");

		ZipInputStream targetsZip = new ZipInputStream(loader.open(targetsName));
		ZipInputStream predictionsZip = new ZipInputStream(loader.open(predictionsName));

		try {
			Map<String, ArrayList<String>> targets, predictions;
			try { 
				targets = loadContents(targetsZip); 
			}
			catch(ZipException e) { throw new EvaluationSetupException("File with target decisions is not a correct ZIP file"); }
			try {
				predictions = loadContents(predictionsZip);
			}
			catch(ZipException e) { throw new AlgorithmErrorException("File with predicted decisions is not a correct ZIP file"); }

			boolean isFinal = targetsName.toString().endsWith("final.zip");
			double res = compare(targets, predictions, isFinal);
			
			predictionsZip.close();
			targetsZip.close();
			
			return new Double[] { res };
		}
		catch(IOException e) {
			throw new TunedTesterException(e);
		}
	}

	private double compare(
			Map<String, ArrayList<String>> targets,
			Map<String, ArrayList<String>> predictions, 
			boolean isFinal) throws AlgorithmErrorException 
	{
		double sumResult = 0.0;
		for(String file : new TreeSet<String>(targets.keySet())) {	// TreeSet sorts keys alphabetically
			ArrayList<String> t = targets.get(file);
			ArrayList<String> p = predictions.get(file);
			if(p == null) throw new AlgorithmErrorException(
					"ZIP file with predicted decisions doesn't contain a file: " + file);
			if(p.size() < t.size()) throw new AlgorithmErrorException(
					"File " + file + " in the ZIP with predicted decisions contains too few lines: " 
					+ p.size() + " instead of " + t.size());
			
			Score score = new BalancedAccuracy();
			boolean[] mask = getMask(t.size(), isFinal);
			try {
				for(int i = 0; i < t.size(); i++)
					if(mask[i])
						score.add(new SymbolicFeature(t.get(i)), new SymbolicFeature(p.get(i)));
			}
			catch (DataException e) { e.printStackTrace(); }
			
			sumResult += score.result();
		}
		return sumResult / targets.size();
	}

	private boolean[] getMask(int len, boolean isFinal) 
	{
		// Indices of objects that will be used in final testing (isFinal = true)
		// The rest will be used in preliminary testing
		int[] indices = Permute.indices(len, len / 2, random);

		boolean[] mask = new boolean[len];
		Arrays.fill(mask, !isFinal);
		for(int ind : indices)
			mask[ind] = isFinal;

		System.out.print("Will evaluate on samples number:");
		for(int i = 0; i < len; i++)
			if(mask[i]) System.out.print(" " + i);
		System.out.println();

		return mask;
	}

	private static Map<String, ArrayList<String>> loadContents(ZipInputStream zip) throws IOException 
	{
		Map<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>();
		ZipEntry entry;
		while((entry = zip.getNextEntry()) != null) {
            System.out.println("Will load contents of: " + entry.getName());
            map.put(entry.getName(), loadLines(zip));
        }
		return map;
	}

	private static ArrayList<String> loadLines(InputStream fileStream) throws IOException 
	{
		BufferedReader reader = new BufferedReader(new InputStreamReader(fileStream));
		ArrayList<String> lines = new ArrayList<String>();
		String line;
		while((line = reader.readLine()) != null)
			lines.add(line.trim());		// leading and trailing whitespaces are omitted
		return lines;
	}

}

Copyright © 2008-2011 by TunedIT
Design by luksite