diff --git a/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java b/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java new file mode 100644 index 0000000000000000000000000000000000000000..ee3a085518aa8dbad126f84b9098e0e6b9ec26f1 --- /dev/null +++ b/src/fr/inrialpes/exmo/align/impl/eval/ROCCurveEvaluator.java @@ -0,0 +1,141 @@ +/* + * $Id$ + * + * Copyright (C) INRIA, 2004-2005, 2007-2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package fr.inrialpes.exmo.align.impl.eval; + +import org.semanticweb.owl.align.Alignment; +import org.semanticweb.owl.align.AlignmentException; +import org.semanticweb.owl.align.Cell; + +import java.util.Enumeration; +import java.util.Properties; +import java.util.Iterator; +import java.util.TreeSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.Comparator; +import java.util.Vector; +import java.io.PrintWriter; +import java.net.URI; + +/** + * Compute ROCCurves + * The first alignment is thus the expected one + * + * @author Jerome Euzenat + * @version $Id$ + * + * ROCCurves traverse the ranked list of correspondences + * Hence, like PRGraphs, it is only relevant to this case. + * X-axis is the number of incorrect correspondences + * Y-axis is the number of correct correspondences + * It is expected that the curve grows fast first and then much slower + * This indicates the accuracy of the matcher. + * + * The "Surface Under Curve" (AUC) is returned as a result. + * AUC is in fact the percentage of surface under the curve (given by N the size of the reference alignment and P the size of all pairs of ontologies, N*P is the full size), Area/N*P. + * It is ususally interpreted as: + * 0.9 - 1.0 excellent + * 0.8 - 0.9 good + * 0.7 - 0.8 fair + * 0.6 - 0.7 poor + * 0.0 - 0.6 bad + * + * The problem with these measures is that they assume that the provided alignment + * is a complete alignment: it contains all pairs of entities hence auc are comparable + * because they depend on the same sizes... This is not the case when alignment are + * incomplete. Hence, this should be normalised. + * + * There are two ways of doing this: + * - simply Area/N*P, but this would advantage + * - considering the current subpart Area/N*| + * => both would advantage matchers with high precision + * penalising them Area/N*P + * - interpolating the curve: + * NOT EASY, TO BE IMPLEMENTED + */ + +public class ROCCurveEvaluator extends GraphEvaluator { + + private double auc = 0.0; + + public ROCCurveEvaluator( Alignment align1, Alignment align2 ) { + super(align1, align2); + } + + /** + * Compute ROCCurve + */ + public double eval( Properties params ) throws AlignmentException { + return eval( params, (Object)null ); + } + public double eval( Properties params, Object cache ) throws AlignmentException { + // Local variables + int nbfound = 0; + int area = 0; + int x = 0; + int y = 0; + + int scale = align2.nbCells(); + + points = new Vector<Pair>(); + + // Create a sorted structure in which putting the cells + // TreeSet could be replaced by something else + SortedSet<Cell> cellSet = orderAlignment(); + + // Collect the points in the curve + points.add( new Pair( 0., 0. ) ); // [Origin] + for( Cell c2 : cellSet ) { + nbfound++; + if ( correctCell( c2, align2, align1 ) > 0. ) { + y++; + } else { + x++; area += y; + } + points.add( new Pair( x, y ) ); + } + auc = (double)area / (double)nbfound; + + return auc; + } + + /** + * This output the result + */ + public void write(PrintWriter writer) throws java.io.IOException { + writer.println("<?xml version='1.0' encoding='utf-8' standalone='yes'?>"); + writer.println("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>"); + writer.println(" <output rdf:about=''>"); + writeXMLMap( writer ); + writer.print(" <AUC>"+auc+"</AUC>\n"); + writer.print(" </output>\n</rdf:RDF>\n"); + } + + public void writePlot(PrintWriter writer) { + writeFullPlot( writer ); + } + + public double getAUC(){ + return auc; + } +} +