diff --git a/html/relnotes.html b/html/relnotes.html index 95ec2c6036ea3f83dad00d3b8b7adaa9a9567d63..f1b7112627ab7b847e5dbbd6e31c00159c19a966 100644 --- a/html/relnotes.html +++ b/html/relnotes.html @@ -67,8 +67,9 @@ The development of 4 versions continue. <li>Integration of skoslite (home made SKOS API) (ontowrap)</li> <li>Integration of SKOS API (ontowrap)</li> <li>Extended ontowrap so that all methods can raise exceptions (ontowrap)</li> -<li><tt>DistanceAligment</tt> can now deal indiferently with similarity or distances (impl)</li> -<li>Implement fully relaxed precision and recall (impl)</li> +<li><tt>DistanceAligment</tt> can now deal indifferently with similarity or distances (impl)</li> +<li>Implemented fully relaxed precision and recall (impl)</li> +<li>Implemented FMeasure/threshold plotting <tt>ThresholdGraphEvaluator</tt> (eval)</li> <li>Added Wu-Palmer and gloss overlap similarity over WordNet to <tt>JWNLAlignment</tt> (impl)</li> <li>Added <tt>InstanceBasedMatrixMeasure</tt> for instance based alignments (impl)</li> <li>Added a <tt>getResult()</tt> method to <tt>Evaluator</tt> (api)</li> @@ -118,8 +119,7 @@ The development of 4 versions continue. REST are fully aligned. This is now <a href="rest.html#version4">documented</a> (serv)</li> <li>Added a <tt>listevaluators</tt> primitive to the service interface (serv)</li> -<li>Added computation of MAP (Mean Average Precision) - in <tt>PRGraphEvaluator</tt> (impl).</li> +<li>Added computation of MAP (Mean Average Precision) in <tt>PRGraphEvaluator</tt> (impl).</li> <li>Added pretty-printing of names in server (server)</li> <li>Added provenance tracking in server (server)</li> <li>Added initialisation through initial alignment diff --git a/src/fr/inrialpes/exmo/align/impl/eval/AveragePRGraphEvaluator.java b/src/fr/inrialpes/exmo/align/impl/eval/AveragePRGraphEvaluator.java index 9409b9c0484cd29fb6c88de803b5be0559113a22..2fce49742929c96d82c41c0d2247a240677e9499 100644 --- a/src/fr/inrialpes/exmo/align/impl/eval/AveragePRGraphEvaluator.java +++ b/src/fr/inrialpes/exmo/align/impl/eval/AveragePRGraphEvaluator.java @@ -39,6 +39,8 @@ import java.net.URI; /** * Compute the precision recall graph on 11 points * The first alignment is thus the expected one. + * This is for legacy reason the first implementation of this function. + * It does not follow the new standard GraphEvaluator protocol. * * @author Jerome Euzenat * @version $Id: AveragePRGraphEvaluator.java 1196 2010-01-10 19:58:52Z euzenat $ @@ -126,7 +128,7 @@ public class AveragePRGraphEvaluator extends GraphEvaluator { Vector<Pair> inflexion = new Vector<Pair>(); // Create a sorted structure in which putting the cells - initCellSet(); + initCellSet( true ); if ( align2 == null ) return; //no increase of precisions for ( Cell c : align2 ) { if ( invalid && c.getStrength() != 1. && c.getStrength() != 0. ) invalid = false; @@ -228,5 +230,8 @@ public class AveragePRGraphEvaluator extends GraphEvaluator { public double getGlobalResult(){ return map; } + + public String xlabel() { return "recall"; } + public String ylabel() { return "precision"; }; } diff --git a/src/fr/inrialpes/exmo/align/impl/eval/GraphEvaluator.java b/src/fr/inrialpes/exmo/align/impl/eval/GraphEvaluator.java index 9fe2a6a5a91ed90bf28c5c827f53e2a3cc54cb28..e96157ca459102b551949758c4cf7d7dd826802c 100644 --- a/src/fr/inrialpes/exmo/align/impl/eval/GraphEvaluator.java +++ b/src/fr/inrialpes/exmo/align/impl/eval/GraphEvaluator.java @@ -93,18 +93,21 @@ public abstract class GraphEvaluator { * However, it will not work if these are not of the same type. **/ public GraphEvaluator() { - initCellSet(); + initCellSet( true ); } - protected void initCellSet () { + public GraphEvaluator( boolean ascending ) { + initCellSet( ascending ); + } + + protected void initCellSet ( boolean ascending ) { // Create a sorted structure in which putting the cells // TreeSet could be replaced by something else - cellSet = new TreeSet<EvalCell>( + if ( ascending ) { + cellSet = new TreeSet<EvalCell>( new Comparator<EvalCell>() { public int compare( EvalCell o1, EvalCell o2 ) throws ClassCastException { - //try { - //System.err.println(((Cell)o1).getObject1()+" -- "+((Cell)o1).getObject2()+" // "+o2.getObject1()+" -- "+o2.getObject2()); if ( o1.cell instanceof Cell && o2.cell instanceof Cell ) { if ( o1.cell.getStrength() > o2.cell.getStrength() ){ return -1; @@ -114,25 +117,31 @@ public abstract class GraphEvaluator { } else if ( o1.correct ) { return -1; } - /*else if ( (o1.cell.getObject1AsURI(align1).getFragment() == null) - || (o2.cell.getObject1AsURI(align2).getFragment() == null) ) { - return -1; - } else if ( o1.cell.getObject1AsURI(align1).getFragment().compareTo(o2.cell.getObject1AsURI(align2).getFragment()) > 0) { + else { return 1; } + } else { throw new ClassCastException(); } + } + } + ); + } else { + cellSet = new TreeSet<EvalCell>( + new Comparator<EvalCell>() { + public int compare( EvalCell o1, EvalCell o2 ) + throws ClassCastException { + if ( o1.cell instanceof Cell && o2.cell instanceof Cell ) { + if ( o1.cell.getStrength() < o2.cell.getStrength() ){ return -1; - } else if ( o1.cell.getObject1AsURI(align1).getFragment().compareTo(o2.cell.getObject1AsURI(align2).getFragment()) < 0 ) { + } else if ( o1.cell.getStrength() > o2.cell.getStrength() ){ return 1; - } else if ( (o1.cell.getObject2AsURI(align1).getFragment() == null) - || (o2.cell.getObject2AsURI(align2).getFragment() == null) ) { - return -1; - } else if ( o1.cell.getObject2AsURI(align1).getFragment().compareTo(o2.cell.getObject2AsURI(align2).getFragment()) > 0) { + //The comparator must always tell that things are different! + } else if ( o1.correct ) { return -1; - // We assume that they have different names - } */ else { return 1; } + } + else { return 1; } } else { throw new ClassCastException(); } - //} catch ( AlignmentException e) { e.printStackTrace(); return 0;} } } ); + } } /* @@ -200,6 +209,9 @@ public abstract class GraphEvaluator { } } + public abstract String xlabel(); + public abstract String ylabel(); + } class EvalCell { diff --git a/src/fr/inrialpes/exmo/align/impl/eval/PRGraphEvaluator.java b/src/fr/inrialpes/exmo/align/impl/eval/PRGraphEvaluator.java index 90a6a4299de9b23cf56ce062d4b3586ea35c8e27..ce023eb33c520ebbfc73d212d3c2cda609524312 100644 --- a/src/fr/inrialpes/exmo/align/impl/eval/PRGraphEvaluator.java +++ b/src/fr/inrialpes/exmo/align/impl/eval/PRGraphEvaluator.java @@ -45,13 +45,6 @@ import java.net.URI; * @author Jerome Euzenat * @version $Id$ * - * The computation is remotely inspired from the sample programme of - * Raymond J. Mooney - * available under GPL from http://www.cs.utexas.edu/users/mooney/ir-course/ - * - * Mooney also provides the averaging of these graphs over several queries: - * unfortunatelly, the resulting graph is not anymore a Precision/Recall graph - * * This works perfectly correctly. I mention below the point which are * mentionned as design points in a forecoming Exmotto entry: * [R=0%] What should be P when R is 0% (obviously 100%) @@ -108,7 +101,6 @@ public class PRGraphEvaluator extends GraphEvaluator { * From an ordered vector of cells with their correctness status */ public Vector<Pair> evalOpenEnded() { - double[] precisions = new double[STEP+1]; // Determine what the increment is // Get the increment int nbcorrect = 0; @@ -124,7 +116,7 @@ public class PRGraphEvaluator extends GraphEvaluator { nbcorrect++; double precision = (double)nbcorrect / (double)nbfound; sumprecisions += precision; // For MAP - if ( nbcorrect == next ) { // increrement achieved + if ( nbcorrect == next ) { // increment achieved //record value double recall = (double)nbcorrect / (double)nbexpected; // Here the recall could be computed more directly @@ -218,79 +210,6 @@ public class PRGraphEvaluator extends GraphEvaluator { return points; } - /** - * Compute precision and recall graphs. - public double eval( Properties params ) throws AlignmentException { - return eval( params, (Object)null ); - } - public double eval( Properties params, Object cache ) throws AlignmentException { - // Local variables - int nbexpected = align1.nbCells(); - int nbfound = 0; - int nbcorrect = 0; - double sumprecisions = 0.; // For MAP - - // unchecked - if( params.getProperty("step") != null ){ - STEP = Integer.parseInt( params.getProperty("step") ); - } - points = new double[ STEP+1 ]; - - // Create a sorted structure in which putting the cells - // TreeSet could be replaced by something else - SortedSet<Cell> cellSet = orderAlignment(); - - // Collect the points that change recall - // (the other provide lower precision from the same recall and are not considered) - points.add( new Pair( 0., 1. ) ); // [R=0%] - for( Cell c2 : cellSet ){ - nbfound++; - if ( correctCell( c2, align2, align1 ) > 0. ) { - nbcorrect++; - double recall = (double)nbcorrect / (double)nbexpected; - double precision = (double)nbcorrect / (double)nbfound; - sumprecisions += precision; // For MAP - // Create a new pair to put in the list - // It records real precision and recall at that point - points.add( new Pair( recall, precision ) ); - c2 = null; // out of the loop. - } - } - - // Now if we want to have a regular curve we must penalize those system - // that do not reach 100% recall. - // for that purpose, and for each other bound we add a point with the worse - // precision which is the required recall level divided with the maximum - // cardinality possible (i.e., the multiplication of the ontology sizes). - // JE[R=100%]: that's a fine idea! Unfortunately SIZEOFO1 and SIZEOFO2 are undefined values - //points.add( new Pair( 1., (double)nbexpected/(double)(SIZEOFO1*SIZEOFA2) ) ); - points.add( new Pair( 1.0, 0. ) ); // useless because - - // [Interp.] Interpolate curve points at each n-recall level - // This is inspired form Ray Mooney's program - // It works backward in the vector, - // (in the same spirit as before, the maximum value so far -best- is retained) - int j = points.size()-1; // index in recall-ordered vector of points - int i = STEP; // index of the current recall interval - double level = (double)i/STEP; // max level of that interval - double best = 0.; // best value found for that interval - while( j >= 0 ){ - Pair precrec = points.get(j); - while ( precrec.getX() < level ){ - points[i] = best; - i--; - level = (double)i/STEP; - }; - if ( precrec.getY() > best ) best = precrec.getY(); - j--; - } - points[0] = best; // It should be 1. that's why it is now added in points. [R=0%] - - map = sumprecisions / nbexpected; // For MAP - return map; - } - */ - /** * This output the result */ @@ -309,5 +228,8 @@ public class PRGraphEvaluator extends GraphEvaluator { public double getGlobalResult(){ return map; } + + public String xlabel() { return "recall"; } + public String ylabel() { return "precision"; }; } diff --git a/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java b/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java index c5f01f60637c2a4d77dbf16b154bf1257ab4637b..cd322181963158a97b176a7c300d4de5a98fa5f6 100644 --- a/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java +++ b/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java @@ -185,5 +185,7 @@ public class ROCCurveEvaluator extends GraphEvaluator { public double getAUC(){ return auc; } + public String xlabel() { return "noise"; } + public String ylabel() { return "recall"; }; } diff --git a/src/fr/inrialpes/exmo/align/impl/eval/ThresholdGraphEvaluator.java b/src/fr/inrialpes/exmo/align/impl/eval/ThresholdGraphEvaluator.java new file mode 100644 index 0000000000000000000000000000000000000000..2cde197978cdc089ce5785a2fbfc4f1e86105953 --- /dev/null +++ b/src/fr/inrialpes/exmo/align/impl/eval/ThresholdGraphEvaluator.java @@ -0,0 +1,158 @@ +/* + * $Id: PRGraphEvaluator.java 1433 2010-04-13 07:07:58Z euzenat $ + * + * Copyright (C) INRIA, 2004-2005, 2007-2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package fr.inrialpes.exmo.align.impl.eval; + +import org.semanticweb.owl.align.Alignment; +import org.semanticweb.owl.align.AlignmentException; +import org.semanticweb.owl.align.Cell; + +import fr.inrialpes.exmo.align.impl.Namespace; +import fr.inrialpes.exmo.align.parser.SyntaxElement; + +import java.util.Enumeration; +import java.util.Properties; +import java.util.Iterator; +import java.util.TreeSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.Comparator; +import java.util.Vector; +import java.io.PrintWriter; +import java.net.URI; + +/** + * Compute the F-measure/precision/recall at various thresholds + * + * @author Jerome Euzenat + * @version $Id$ + * + */ + +public class ThresholdGraphEvaluator extends GraphEvaluator { + + private int STEP = 50; + + private double opt = 0.; // Optimum recorded value + + public ThresholdGraphEvaluator() { + super(); + } + + /** + * Compute threshold graph + */ + public Vector<Pair> eval() { // throws AlignmentException + return eval( (Properties)null ); + } + + /** + * Returns a list of Measure at threshold points (Pairs) + * From an ordered vector of cells with their correctness status + * + * The basic strategy would be: + * Take the alignment/(Compute P/R/Apply threshold)+ + * But it is better to: take the cells in reverse order + * Compute the measures on the fly + * + */ + public Vector<Pair> eval( Properties params ) { // throws AlignmentException + points = new Vector<Pair>(STEP+1); // 2010evaluate the size + opt = 0.; + int nbcorrect = 0; + int nbfound = 0; + double precision = 1.; + double recall = 0.; + double fmeasure = 0.; + double prevt = cellSet.first().cell().getStrength(); // threshold for previous fellow + double prevm = fmeasure; + points.add( new Pair( 1., prevm ) ); // [T=100%] + /* + // This is the continuous version + for ( EvalCell c : cellSet ) { + nbfound++; + if ( c.correct() ) nbcorrect++; + if ( c.cell().getStrength() != prevt ) { // may record a new point + fmeasure = 2*(double)nbcorrect/(double)(nbfound+nbexpected); // alternate formula + if ( fmeasure != prevm ) { + points.add( new Pair( prevt, prevm ) ); + points.add( new Pair( c.cell().getStrength(), fmeasure ) ); + prevm = fmeasure; + if ( fmeasure > opt ) opt = fmeasure; // for real optimal + } + prevt = c.cell().getStrength(); + } + } + fmeasure = 2*(double)nbcorrect/(double)(nbfound+nbexpected); + */ + // This is the version with increment + // Determine what the increment is + double increment = 1./(double)STEP; + //System.err.println(" INCREMENT SET "+increment ); + double next = 1.; + next -= increment; + for ( EvalCell c : cellSet ) { + fmeasure = 2*(double)nbcorrect/(double)(nbfound+nbexpected); // alternate formula + if ( fmeasure > opt && c.cell().getStrength() < prevt ) { + opt = fmeasure; // for real optimal + } else { // but only when all correspondences with same strength have been processed + prevt = c.cell().getStrength(); + } + while ( next >= 0.001 && c.cell().getStrength() <= next ) { // increment achieved + points.add( new Pair( next, fmeasure ) ); + next -= increment; + } + nbfound++; + if ( c.correct() ) { + nbcorrect++; + } + } + fmeasure = 2*(double)nbcorrect/(double)(nbfound+nbexpected); + if ( fmeasure > opt ) opt = fmeasure; // for real optimal + // In the end, it should exhaust all the thresholds + while ( next >= 0.001 ) { // The bound is necessary for avoiding tikz problem + points.add( new Pair( next, fmeasure ) ); // same measure + next -= increment; + } + points.add( new Pair( 0., fmeasure ) ); // [T=0%] + return points; + } + + /** + * This output the result + */ + public void write(PrintWriter writer) throws java.io.IOException { + writer.println("<?xml version='1.0' encoding='utf-8' standalone='yes'?>"); + writer.println("<"+SyntaxElement.RDF.print()+" xmlns:"+Namespace.RDF.shortCut+"='"+Namespace.RDF.prefix+"'>"); + writer.println(" <output "+SyntaxElement.RDF_ABOUT.print()+"=''>"); + writeXMLMap( writer ); + writer.print(" <OPTIMUM>"+opt+"</OPTIMIM>\n"); + writer.print(" </output>\n</"+SyntaxElement.RDF.print()+">\n"); + } + + public double getGlobalResult(){ // can only be the full measure + return opt; + } + + public String xlabel() { return "threshold"; } + public String ylabel() { return "fmeasure"; } +} + diff --git a/src/fr/inrialpes/exmo/align/util/GenPlot.java b/src/fr/inrialpes/exmo/align/util/GenPlot.java index dc400dc09dde35478bf0c93242635bc022b5095f..63566750e62ac013a7e34b43a95d0112641d2b7b 100644 --- a/src/fr/inrialpes/exmo/align/util/GenPlot.java +++ b/src/fr/inrialpes/exmo/align/util/GenPlot.java @@ -103,8 +103,8 @@ public class GenPlot { String outFile = null; Constructor evalConstructor = null; Constructor graphConstructor = null; - String ylabel = "precision"; - String xlabel = "recall"; + String xlabel; + String ylabel; String type = "tsv"; int debug = 0; int size = 0; // the set of algo to compare @@ -151,10 +151,6 @@ public class GenPlot { case 'g' : /* Name of the graph display to use */ graphCN = g.getOptarg(); - if ( graphCN.equals("fr.inrialpes.exmo.align.impl.eval.ROCCurveEvaluator") ) { - xlabel = "noise"; - ylabel = "recall"; - } break; case 't' : /* Type of output (tex/tsv(/html/xml/ascii)) */ @@ -209,7 +205,7 @@ public class GenPlot { // . -> Vector<EvalCell> listEvaluators = iterateDirectories(); - // + // Find the largest value int max = 0; for( GraphEvaluator e : listEvaluators ) { int n = e.nbCells(); @@ -217,6 +213,9 @@ public class GenPlot { } params.setProperty( "scale", Integer.toString( max ) ); + xlabel = listEvaluators.get(0).xlabel(); + ylabel = listEvaluators.get(0).ylabel(); + // Vector<EvalCell> -> Vector<Pair> // Convert the set of alignments into the list of required point pairs // We must convert the