Mentions légales du service

Skip to content
Snippets Groups Projects
ExtGroupEval.java 15.68 KiB
/*
 * $Id$
 *
 * Copyright (C) 2003 The University of Manchester
 * Copyright (C) 2003 The University of Karlsruhe
 * Copyright (C) 2003-2005, 2007-2012 INRIA
 * Copyright (C) 2004, Universit de Montral
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA.
 */

/* This program evaluates the results of several ontology aligners in a row.
   It uses the generalisations of precision and recall described in
   [Ehrig & Euzenat 2005].
*/
package fr.inrialpes.exmo.align.cli;

import org.semanticweb.owl.align.Alignment;
import org.semanticweb.owl.align.Evaluator;

import fr.inrialpes.exmo.align.impl.ObjectAlignment;
import fr.inrialpes.exmo.align.impl.URIAlignment;
import fr.inrialpes.exmo.align.impl.eval.ExtPREvaluator;
import fr.inrialpes.exmo.align.parser.AlignmentParser;

import fr.inrialpes.exmo.ontowrap.OntologyFactory;
import fr.inrialpes.exmo.ontowrap.OntowrapException;

import java.io.File;
import java.io.PrintStream;
import java.io.FileOutputStream;
import java.lang.Integer;
import java.util.Hashtable;
import java.util.Vector;
import java.util.Enumeration;
import java.util.Arrays;
import java.util.Formatter;
import java.util.Properties;

import org.xml.sax.SAXException;

import gnu.getopt.LongOpt;
import gnu.getopt.Getopt;

/** A basic class for synthesizing the results of a set of alignments provided
    by different algorithms. The output is a table showing various generalisations
    of precision and recall for each test and for each algorithm.
    Average is also computed as Harmonic means.
    
    <pre>
    java -cp procalign.jar fr.inrialpes.exmo.align.util.ExtGroupEval [options]
    </pre>

    where the options are:
    <pre>
    -o filename --output=filename
    -f format = sepr (symetric/effort-based/precision-oriented/recall-oriented) --format=sepr
    -d debug --debug=level
    -r filename --reference=filename
    -s algo/measure
    -l list of compared algorithms
    -t output --type=output: xml/tex/html/ascii
   </pre>

   The input is taken in the current directory in a set of subdirectories
   (one per test which will be rendered by a line) each directory contains
   a number of alignment files (one per algorithms which will be renderer
   as a column).

    If output is requested (<CODE>-o</CODE> flags), then output will be
    written to <CODE>output</CODE> if present, stdout by default.

<pre>
$Id$
</pre>

@author Sean K. Bechhofer
@author Jrme Euzenat
    */

public class ExtGroupEval {

    Properties params = null;
    String filename = null;
    String reference = "refalign.rdf";
    String format = "s";
    int fsize = 2;
    String type = "html";
    boolean embedded = false;
    String dominant = "s";
    Vector<String> listAlgo = null;
    int debug = 0;
    String color = null;
    String ontoDir = null;

    public static void main(String[] args) {
	try { new ExtGroupEval().run( args ); }
	catch (Exception ex) { ex.printStackTrace(); };
    }

    public void run(String[] args) throws Exception {
	String listFile = "";
	LongOpt[] longopts = new LongOpt[10];

 	longopts[0] = new LongOpt("help", LongOpt.NO_ARGUMENT, null, 'h');
	longopts[1] = new LongOpt("output", LongOpt.REQUIRED_ARGUMENT, null, 'o');
	longopts[2] = new LongOpt("format", LongOpt.REQUIRED_ARGUMENT, null, 'f');
	longopts[3] = new LongOpt("type", LongOpt.REQUIRED_ARGUMENT, null, 't');
	longopts[4] = new LongOpt("debug", LongOpt.OPTIONAL_ARGUMENT, null, 'd');
	longopts[5] = new LongOpt("sup", LongOpt.REQUIRED_ARGUMENT, null, 's');
	longopts[6] = new LongOpt("list", LongOpt.REQUIRED_ARGUMENT, null, 'l');
	longopts[7] = new LongOpt("color", LongOpt.OPTIONAL_ARGUMENT, null, 'c');
	longopts[8] = new LongOpt("reference", LongOpt.REQUIRED_ARGUMENT, null, 'r');
	longopts[9] = new LongOpt("directory", LongOpt.REQUIRED_ARGUMENT, null, 'w');

	Getopt g = new Getopt("", args, "ho:a:d::l:f:t:r:w:c::", longopts);
	int c;
	String arg;

	while ((c = g.getopt()) != -1) {
	    switch (c) {
	    case 'h' :
		usage();
		return;
	    case 'o' :
		/* Write output here */
		filename = g.getOptarg();
		break;
	    case 'r' :
		/* File name for the reference alignment */
		reference = g.getOptarg();
		break;
	    case 'f' :
		/* Sequence of results to print */
		format = g.getOptarg();
		break;
	    case 't' :
		/* Type of output (tex/html/xml/ascii) */
		type = g.getOptarg();
		break;
	    case 's' :
		/* Print per type or per algo */
		dominant = g.getOptarg();
		break;
	    case 'c' :
		/* Print colored lines */
		color = "lightblue";
		    //dominant = g.getOptarg();
		break;
	    case 'l' :
		/* List of filename */
		listFile = g.getOptarg();
		break;
	    case 'd' :
		/* Debug level  */
		arg = g.getOptarg();
		if ( arg != null ) debug = Integer.parseInt(arg.trim());
		else debug = 4;
		break;
	    case 'w' :
		/* Use the given ontology directory */
	    arg = g.getOptarg();
	    if ( arg != null ) ontoDir = g.getOptarg();
	    else ontoDir = null;
		break;
	    }
	}

	listAlgo = new Vector<String>();
	for ( String s : listFile.split(",") ) {
	    listAlgo.add( s );	    
	}

	params = new Properties();
	if (debug > 0) params.setProperty( "debug", Integer.toString( debug-1 ) );

	print( iterateDirectories() );
    }

    public Vector<Vector> iterateDirectories (){
	Vector<Vector> result = null;
	File [] subdir = null;
	try {
	    if (ontoDir == null) {
		subdir = (new File(System.getProperty("user.dir"))).listFiles(); 
	    } else {
		subdir = (new File(ontoDir)).listFiles();
	    }
	} catch (Exception e) {
	    System.err.println("Cannot stat dir "+ e.getMessage());
	    usage();
	}
	int size = subdir.length;
        Arrays.sort(subdir);
	result = new Vector<Vector>(size);
	int i = 0;
	for ( int j=0 ; j < size; j++ ) {
	    if( subdir[j].isDirectory() ) {
		if ( debug > 0 ) System.err.println("\nEntering directory "+subdir[j]);
		// eval the alignments in a subdirectory
		// store the result
		Vector vect = (Vector)iterateAlignments( subdir[j] );
		if ( vect != null ){
		    result.add(i, vect);
		    i++;
		}
	    }
	}
	return result;
    }

    public Vector<Object> iterateAlignments ( File dir ) {
	String prefix = dir.toURI().toString()+"/";
	Vector<Object> result = new Vector<Object>();
	boolean ok = false;
	result.add(0,(Object)dir.getName().toString());
	int i = 0;
	// for all alignments there,
	for ( String m : listAlgo ) {
	    i++;
	    // call eval
	    // store the result in a record
	    // return the record.
	    if ( debug > 1) System.err.println("  Considering result "+i);
	    Evaluator evaluator = eval( prefix+reference, prefix+m+".rdf");
	    if ( evaluator != null ) ok = true;
	    result.add( i, evaluator );
	}
	// Unload the ontologies.
	try {
	    OntologyFactory.clear();
	} catch ( OntowrapException owex ) { // only report
	    owex.printStackTrace();
	}

	if ( ok == true ) return result;
	else return null;
    }

    public Evaluator eval( String alignName1, String alignName2 ) {
	Evaluator eval = null;
	try {
	    int nextdebug;
	    if ( debug < 2 ) nextdebug = 0;
	    else nextdebug = debug - 2;
	    // Load alignments
	    AlignmentParser aparser = new AlignmentParser( nextdebug );
	    Alignment align1 = aparser.parse( alignName1 );
	    if ( debug > 1 ) System.err.println(" Alignment structure1 parsed");
	    aparser.initAlignment( null );
	    Alignment align2 = aparser.parse( alignName2 );
	    if ( debug > 1 ) System.err.println(" Alignment structure2 parsed");
	    // Create evaluator object
	    eval = new ExtPREvaluator(ObjectAlignment.toObjectAlignment( (URIAlignment)align1 ), 
				      ObjectAlignment.toObjectAlignment( (URIAlignment)align2 ) );
	    // Compare
	    params.setProperty( "debug", Integer.toString( nextdebug ) );
	    eval.eval( params ) ;
	} catch (Exception ex) {
	    if ( debug > 1 ) {
		ex.printStackTrace();
	    } else {
		System.err.println("ExtGroupEval: "+ex);
		System.err.println(alignName1+ " - "+alignName2 );
	    }
	};
	return eval;
    }

    /**
     * This does not only print the results but compute the average as well
     */
    public void print( Vector<Vector> result ) {
	// variables for computing iterative harmonic means
	int expected = 0; // expected so far
	int foundVect[]; // found so far
	double symVect[]; // symmetric similarity
	double effVect[]; // effort-based similarity
	double precOrVect[]; // precision-oriented similarity
	double recOrVect[]; // recall-oriented similarity
	PrintStream writer = null;

	fsize = format.length();
	try {
	    // Print result
	    if ( filename == null ) {
		writer = System.out;
	    } else {
		writer = new PrintStream(new FileOutputStream( filename ));
	    }
	    Formatter formatter = new Formatter(writer);
	    // Print the header
	    writer.println("<html><head></head><body>");
	    writer.println("<table border='2' frame='sides' rules='groups'>");
	    writer.println("<colgroup align='center' />");
	    // for each algo <td spancol='2'>name</td>
	    for ( String m : listAlgo ) {
		writer.println("<colgroup align='center' span='"+2*fsize+"' />");
	    }
	    // For each file do a
	    writer.println("<thead valign='top'><tr><th>algo</th>");
	    // for each algo <td spancol='2'>name</td>
	    for ( String m : listAlgo ) {
		writer.println("<th colspan='"+((2*fsize))+"'>"+m+"</th>");
	    }
	    writer.println("</tr></thead><tbody><tr><td>test</td>");
	    // for each algo <td>Prec.</td><td>Rec.</td>
	    for ( String m : listAlgo ) {
		for ( int i = 0; i < fsize; i++){
		    if ( format.charAt(i) == 's' ) {
			writer.println("<td colspan='2'><center>Symmetric</center></td>");
		    } else if ( format.charAt(i) == 'e' ) {
			writer.println("<td colspan='2'><center>Effort</center></td>");
		    } else if ( format.charAt(i) == 'p' ) {
			writer.println("<td colspan='2'><center>Prec. orient.</center></td>");
		    } else if ( format.charAt(i) == 'r' ) {
			writer.println("<td colspan='2'><center>Rec. orient.</center></td>");
		    }
		}
		//writer.println("<td>Prec.</td><td>Rec.</td>");
	    }
	    writer.println("</tr></tbody><tbody>");
	    foundVect = new int[ listAlgo.size() ];
	    symVect = new double[ listAlgo.size() ];
	    effVect = new double[ listAlgo.size() ];
	    precOrVect = new double[ listAlgo.size() ];
	    recOrVect = new double[ listAlgo.size() ];
	    for( int k = listAlgo.size()-1; k >= 0; k-- ) {
		foundVect[k] = 0;
		symVect[k] = 0.;
		effVect[k] = 0.;
		precOrVect[k] = 0.;
		recOrVect[k] = 0.;
	    }
	    // </tr>
	    // For each directory <tr>
	    boolean colored = false;
	    for ( Vector test : result ) {
		int nexpected = -1;
		if ( colored == true && color != null ){
		    colored = false;
		    writer.println("<tr bgcolor=\""+color+"\">");
		} else {
		    colored = true;
		    writer.println("<tr>");
		};
		// Print the directory <td>bla</td>
		writer.println("<td>"+(String)test.get(0)+"</td>");
		// For each record print the values <td>bla</td>
		Enumeration f = test.elements();
		f.nextElement();
		for( int k = 0 ; f.hasMoreElements() ; k++) {
		    ExtPREvaluator eval = (ExtPREvaluator)f.nextElement();
		    if ( eval != null ){
			// iterative H-means computation
			if ( nexpected == -1 ){
			    nexpected = eval.getExpected();
			    expected += nexpected;
			}
			// If foundVect is -1 then results are invalid
			if ( foundVect[k] != -1 ) foundVect[k] += eval.getFound();
			for ( int i = 0 ; i < fsize; i++){
			    writer.print("<td>");
			    if ( format.charAt(i) == 's' ) {
				formatter.format("%1.2f", eval.getSymPrecision());
				System.out.print("</td><td>");
				formatter.format("%1.2f", eval.getSymRecall());
				symVect[k] += eval.getSymSimilarity();
			    } else if ( format.charAt(i) == 'e' ) {
				formatter.format("%1.2f", eval.getEffPrecision());
				System.out.print("</td><td>");
				formatter.format("%1.2f", eval.getEffRecall());
				effVect[k] += eval.getEffSimilarity();
			    } else if ( format.charAt(i) == 'p' ) {
				formatter.format("%1.2f", eval.getPrecisionOrientedPrecision());
				System.out.print("</td><td>");
				formatter.format("%1.2f", eval.getPrecisionOrientedRecall());
				precOrVect[k] += eval.getPrecisionOrientedSimilarity();
			    } else if ( format.charAt(i) == 'r' ) {
				formatter.format("%1.2f", eval.getRecallOrientedPrecision());
				System.out.print("</td><td>");
				formatter.format("%1.2f", eval.getRecallOrientedRecall());
				recOrVect[k] += eval.getRecallOrientedSimilarity();
			    }
			    writer.println("</td>");
			}
		    } else {
			writer.println("<td>n/a</td><td>n/a</td>");
		    }
		}
		writer.println("</tr>");
	    }
	    writer.print("<tr bgcolor=\"yellow\"><td>H-mean</td>");
	    int k = 0;
	    for ( String m : listAlgo ) {
		if ( foundVect[k] != -1 ){
		    for ( int i = 0 ; i < fsize; i++){
			writer.print("<td>");
			if ( format.charAt(i) == 's' ) {
			    formatter.format("%1.2f", symVect[k]/foundVect[k]);
			    System.out.print("</td><td>");
			    formatter.format("%1.2f", symVect[k]/expected);
			} else if ( format.charAt(i) == 'e' ) {
			    formatter.format("%1.2f", effVect[k]/foundVect[k]);
			    System.out.print("</td><td>");
			    formatter.format("%1.2f", effVect[k]/expected);
			} else if ( format.charAt(i) == 'p' ) {
			    formatter.format("%1.2f", precOrVect[k]/foundVect[k]);
			    System.out.print("</td><td>");
			    formatter.format("%1.2f", precOrVect[k]/expected);
			} else if ( format.charAt(i) == 'r' ) {
			    formatter.format("%1.2f", recOrVect[k]/foundVect[k]);
			    System.out.print("</td><td>");
			    formatter.format("%1.2f", recOrVect[k]/expected);
			}
			writer.println("</td>");
		    }
		} else {
		    writer.println("<td colspan='2'><center>Error</center></td>");
		}
		//};
		k++;
	    }
	    writer.println("</tr>");
	    writer.println("</tbody></table>");
	    writer.println("<p><small>n/a: result alignment not provided or not readable<br />");
	    writer.println("NaN: division per zero, likely due to empty alignent.</small></p>");
	    writer.println("</body></html>");
	} catch (Exception ex) {
	    ex.printStackTrace();
	} finally {
	    writer.flush();
	    writer.close();
	}
    }

    public void usage() {
	System.out.println("usage: ExtGroupEval [options]");
	System.out.println("options are:");
	System.out.println("\t--format=sepr -f sepr\tSpecifies the extended measures used (symetric/effort-based/precision-oriented/recall-oriented)");
	System.out.println("\t--reference=filename -r filename\tSpecifies the name of the reference alignment file (default: refalign.rdf)");
	System.out.println("\t--output=filename -o filename\tSpecifies a file to which the output will go");
	// Apparently not implemented
	//System.out.println("\t--dominant=algo -s algo\tSpecifies if dominant columns are algorithms or measure");
	System.out.println("\t--type=html|xml|tex|ascii -t html|xml|tex|ascii\tSpecifies the output format");
	System.out.println("\t--list=algo1,...,algon -l algo1,...,algon\tSequence of the filenames to consider");
	System.out.println("\t--color=color -c color\tSpecifies if the output must color even lines of the output");
	System.out.println("\t--debug[=n] -d [n]\t\tReport debug info at level n");
	System.out.println("\t--help -h\t\t\tPrint this message");
	System.err.print("\n"+ExtGroupEval.class.getPackage().getImplementationTitle()+" "+ExtGroupEval.class.getPackage().getImplementationVersion());
	System.err.println(" ($Id$)\n");
    }
}