Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7720bbbf authored by Jérôme Euzenat's avatar Jérôme Euzenat
Browse files

- reengineered evaluators with new multiple alignments

parent 381fb626
No related branches found
No related tags found
No related merge requests found
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
* You should have received a copy of the GNU Lesser General Public License * You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/ */
package fr.inrialpes.exmo.align.impl.eval; package fr.inrialpes.exmo.align.impl.eval;
...@@ -34,6 +35,13 @@ import org.semanticweb.owl.model.OWLException; ...@@ -34,6 +35,13 @@ import org.semanticweb.owl.model.OWLException;
import java.lang.Math; import java.lang.Math;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.HashSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.Comparator;
import java.util.Vector;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.io.IOException; import java.io.IOException;
...@@ -48,123 +56,202 @@ import org.xml.sax.SAXException; ...@@ -48,123 +56,202 @@ import org.xml.sax.SAXException;
* *
* @author Jerome Euzenat * @author Jerome Euzenat
* @version $Id$ * @version $Id$
*
* The computation is remotely inspired from the sample programme of
* Raymond J. Mooney
* available under GPL from http://www.cs.utexas.edu/users/mooney/ir-course/
*
* Mooney also provides the averaging of these graphs over several queries:
* unfortunatelly, the resulting graph is not anymore a Precision/Recall graph
*/ */
public class PRGraphEvaluator extends BasicEvaluator { public class PRGraphEvaluator extends BasicEvaluator {
private int STEP = 10;
// The eleven values of precision and recall // The eleven values of precision and recall
private double precision[]; private double[] precisions = null;
private double recall[];
private Vector points;
/** Creation **/ /** Creation **/
public PRGraphEvaluator(Alignment align1, Alignment align2) { public PRGraphEvaluator(Alignment align1, Alignment align2) {
super(align1, align2); super(align1, align2);
precision = new double[11]; points = new Vector();
recall = new double[11];
} }
/** /**
* * Compute precision and recall graphs.
* The formulas of P and R are standard: * The algorithm is as follows:
* given a reference alignment A * 1) Order the pairs of the found alignment.
* given an obtained alignment B * 2) For
* which are sets of cells (linking one entity of ontology O to another of ontolohy O').
*
* P = |A inter B| / |B|
* R = |A inter B| / |A|
* F = 2PR/(P+R)
* with inter = set intersection and |.| cardinal.
*
* They now depend not on all the results but on the results with
* confidence above each unit.
* |A| never varies
* |B| varies each time (and can be decremented when we decrement the
* set of alignments in A inter B.
*
* In the implementation |B|=nbfound, |A|=nbexpected and |A inter B|=nbcorrect.
*/ */
public double eval(Parameters params) throws AlignmentException { public double eval(Parameters params) throws AlignmentException {
// Local variables
int nbexpected = align1.nbCells(); int nbexpected = align1.nbCells();
int nbfound[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int nbfound = 0;
int nbcorrect[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int nbcorrect = 0;
// Record the number of found slice by slice
for (Enumeration e = align2.getElements(); e.hasMoreElements();) {
Cell c2 = (Cell) e.nextElement();
int j = (int)( c2.getStrength() / .1 );
System.err.println(">>>> " + c2.getObject1() + " : " + c2.getObject1() + " : " + c2.getStrength()+" ("+j);
//increment the found corresponding; // unchecked
(nbfound[j])++; if( params.getParameter("step") != null ){
} STEP = ((Integer)params.getParameter("step")).intValue();
// Record the number of correct slice by slice
for (Enumeration e = align1.getElements(); e.hasMoreElements();) {
Cell c1 = (Cell) e.nextElement();
try {
Cell c2 = (Cell) align2.getAlignCell1((OWLEntity) c1.getObject1());
if (c2 != null) {
URI uri1 = ((OWLEntity) c1.getObject2()).getURI();
URI uri2 = ((OWLEntity) c2.getObject2()).getURI();
// if (c1.getobject2 == c2.getobject2)
if (uri1.toString().equals(uri2.toString())) {
int j = (int)( c2.getStrength() / .1 );
//increment the correct corresponding;
(nbcorrect[j])++;
}
}
} catch (Exception exc) {
// Bad URI should not happen there
}
} }
precisions = new double[ STEP+1 ];
// Compute precision record for each slice //TreeSet could be replaced by something else
// What is the definition if: //The comparator must always tell that things are different!
// nbfound is 0 (p, r are 0) /*SortedSet cellSet = new TreeSet(
// nbexpected is 0 [=> nbcorrect is 0] (r=NaN, p=0[if nbfound>0, NaN otherwise]) new Comparator() {
// precision+recall is 0 [= nbcorrect is 0] public int compare( Object o1, Object o2 )
// precision is 0 [= nbcorrect is 0] throws ClassCastException{
for ( int i = 10; i >= 0; i-- ){ if ( o1 instanceof Cell
System.err.println(">>>> " + nbcorrect[i] + " : " + nbfound[i] + " : " + nbexpected); && o2 instanceof Cell ) {
precision[i] = (double) nbcorrect[i] / (double) nbfound[i]; if ( ((Cell)o1).getStrength() > ((Cell)o2).getStrength() ){
recall[i] = (double) nbcorrect[i] / (double) nbexpected; return -1;
if ( i > 0 ) { } else { return 1; }
nbcorrect[i-1] = nbcorrect[i-1] + nbcorrect[i]; } else {
nbfound[i-1] = nbfound[i-1] + nbfound[i]; throw new ClassCastException();
} }}});*/
} SortedSet cellSet = new TreeSet(
new Comparator() {
public int compare( Object o1, Object o2 )
throws ClassCastException{
try {
//System.err.println(((Cell)o1).getObject1()+" -- "+((Cell)o1).getObject2()+" // "+((Cell)o2).getObject1()+" -- "+((Cell)o2).getObject2());
if ( o1 instanceof Cell
&& o2 instanceof Cell ) {
if ( ((Cell)o1).getStrength() > ((Cell)o2).getStrength() ){
return -1;
} else if ( ((Cell)o1).getStrength() < ((Cell)o2).getStrength() ){
return 1;
} else if ( (((OWLEntity)((Cell)o1).getObject1()).getURI().getFragment() == null)
|| (((OWLEntity)((Cell)o2).getObject1()).getURI().getFragment() == null) ) {
return -1;
} else if ( ((OWLEntity)((Cell)o1).getObject1()).getURI().getFragment().compareTo(((OWLEntity)((Cell)o2).getObject1()).getURI().getFragment()) > 0) {
return -1;
} else if ( ((OWLEntity)((Cell)o1).getObject1()).getURI().getFragment().compareTo(((OWLEntity)((Cell)o2).getObject1()).getURI().getFragment()) < 0 ) {
return 1;
} else if ( (((OWLEntity)((Cell)o1).getObject2()).getURI().getFragment() == null)
|| (((OWLEntity)((Cell)o2).getObject2()).getURI().getFragment() == null) ) {
return -1;
} else if ( ((OWLEntity)((Cell)o1).getObject2()).getURI().getFragment().compareTo(((OWLEntity)((Cell)o2).getObject2()).getURI().getFragment()) > 0) {
return -1;
// On va supposer qu'ils n'ont pas le meme nom
} else { return 1; }
} else {
throw new ClassCastException();
}
} catch ( OWLException e) {
e.printStackTrace(); return 0;}
}
}
);
return (result); // Set the found cells in the sorted structure
} for (Enumeration e = align2.getElements(); e.hasMoreElements();) {
cellSet.add( e.nextElement() );
}
// Collect the points that change recall
// (the other provide lower precision from the same recall
// and are not considered)
for( Iterator it = cellSet.iterator(); it.hasNext(); ){
nbfound++;
Cell c2 = (Cell)it.next();
Set s1 = (Set)align1.getAlignCells1((OWLEntity)c2.getObject1());
if( s1 != null ){
for( Iterator it1 = s1.iterator(); it1.hasNext() && c2 != null; ){
Cell c1 = (Cell)it1.next();
try {
URI uri1 = ((OWLEntity)c1.getObject2()).getURI();
URI uri2 = ((OWLEntity)c2.getObject2()).getURI();
// if (c1.getobject2 == c2.getobject2)
if (uri1.toString().equals(uri2.toString())) {
nbcorrect++;
double recall = (double)nbcorrect / (double)nbexpected;
double precision = (double)nbcorrect / (double)nbfound;
// Create a new pair to put in the list
points.add( new Pair( recall, precision ) );
c2 = null; // out of the loop.
}
} catch (Exception exc) { exc.printStackTrace(); }
}
}
}
// Interpolate curve points at each n-recall level
// This is inspired form Ray Mooney's program
// It works backward in the vector,
// (in the same spirit as before, the maximum value so far is retained)
int j = points.size()-1; // index in recall-ordered vector of points
int i = STEP; // index of the current recall interval
double level = (double)i/STEP; // max level of that interval
double best = 0.; // best value found for that interval
while( j >= 0 ){
Pair precrec = (Pair)points.get(j);
while ( precrec.getX() < level ){
precisions[i] = best;
i--;
level = (double)i/STEP;
};
if ( precrec.getY() > best ) best = precrec.getY();
j--;
}
precisions[0] = best;
return 0.0; // useless
}
/** /**
* This now output the Lockheed format. However, the lookheed format * This output the result
* was intended to compare two merged ontologies instead of two alignment.
* So it refered to the:
* - input ontology A
* - input ontology B
* - alignement algorithm (used for obtaining what ????).
* While we compare two alignments (so the source and the reference to these
* algorithms should be within the alignment structure.
*/ */
public void write(PrintWriter writer) throws java.io.IOException { public void write(PrintWriter writer) throws java.io.IOException {
writer.println("<?xml version='1.0' encoding='utf-8' standalone='yes'?>"); writer.println("<?xml version='1.0' encoding='utf-8' standalone='yes'?>");
writer.println("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'\n xmlns:map='http://www.atl.external.lmco.com/projects/ontology/ResultsOntology.n3#'>"); writer.println("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>");
writer.println(" <map:output rdf:about=''>"); writer.println(" <output rdf:about=''>");
// Missing items: for( int i=0; i <= STEP; i++ ){
// writer.println(" <map:algorithm rdf:resource=\"\">"); writer.print(" <step>\n <recall>");
// writer.println(" <map:intutA rdf:resource=\"\">"); writer.print((double)i/STEP);
// writer.println(" <map:inputB rdf:resource=\"\">"); writer.print("</recall>\n <precision>");
for( int i=0; i <= 10; i++ ){ writer.print(precisions[i]);
writer.print(" <map:step>\n <map:precision>"); writer.print("</precision>\n </step>\n");
writer.print(precision[i]);
writer.print("</map:precision>\n <map:recall>");
writer.print(recall[i]);
writer.print("</map:recall>\n </map:step>\n");
} }
writer.print(" </map:output>\n</rdf:RDF>\n"); writer.print(" </output>\n</rdf:RDF>\n");
writePlot( writer );
} }
public double getPrecision(int i) { return precision[i]; } /**
public double getRecall(int i) { return recall[i]; } * This output the result
*/
public void writeFullPlot(PrintWriter writer) throws java.io.IOException {
for( int j = 0; j < points.size(); j++ ){
Pair precrec = (Pair)points.get(j);
writer.println( precrec.getX()+" "+precrec.getY() );
}
}
/* Write out the final interpolated recall/precision graph data.
* One line for each recall/precision point in the form: 'R-value P-value'.
* This is the format needed for GNUPLOT.
*/
public void writePlot(PrintWriter writer) throws java.io.IOException {
for(int i = 0; i < STEP+1; i++){
writer.println( (double)i/10 + "\t" + precisions[i]);
}
}
public double getPrecision( int i ){
return precisions[i];
}
} }
class Pair {
private double x;
private double y;
public Pair( double x, double y ){
this.x = x;
this.y = y;
}
public double getX(){ return x; }
public double getY(){ return y; }
}
...@@ -34,6 +34,9 @@ import org.semanticweb.owl.model.OWLException; ...@@ -34,6 +34,9 @@ import org.semanticweb.owl.model.OWLException;
import java.lang.Math; import java.lang.Math;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Iterator;
import java.util.HashSet;
import java.util.Set;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.io.IOException; import java.io.IOException;
...@@ -94,6 +97,8 @@ public class PRecEvaluator extends BasicEvaluator { ...@@ -94,6 +97,8 @@ public class PRecEvaluator extends BasicEvaluator {
precision = 0.; precision = 0.;
recall = 0.; recall = 0.;
// mult.
/*
for (Enumeration e = align1.getElements(); e.hasMoreElements();) { for (Enumeration e = align1.getElements(); e.hasMoreElements();) {
Cell c1 = (Cell) e.nextElement(); Cell c1 = (Cell) e.nextElement();
try { try {
...@@ -109,6 +114,29 @@ public class PRecEvaluator extends BasicEvaluator { ...@@ -109,6 +114,29 @@ public class PRecEvaluator extends BasicEvaluator {
} catch (Exception exc) { } catch (Exception exc) {
} }
} }
*/
//if ( align1 instanceof MultipleAlignment )
// System.err.print("align1["+nbexpected+"] OK");
//if ( align2 instanceof MultipleAlignment )
// System.err.println(" align2["+nbfound+"] OK");
for ( Enumeration e = align1.getElements(); e.hasMoreElements();) {
Cell c1 = (Cell)e.nextElement();
Set s2 = (Set)align2.getAlignCells1((OWLEntity)c1.getObject1());
if( s2 != null ){
for( Iterator it2 = s2.iterator(); it2.hasNext() && c1 != null; ){
Cell c2 = (Cell)it2.next();
try {
URI uri1 = ((OWLEntity)c1.getObject2()).getURI();
URI uri2 = ((OWLEntity)c2.getObject2()).getURI();
// if (c1.getobject2 == c2.getobject2)
if (uri1.toString().equals(uri2.toString())) {
nbcorrect++;
c1 = null; // out of the loop.
}
} catch (Exception exc) { exc.printStackTrace(); }
}
}
}
// What is the definition if: // What is the definition if:
// nbfound is 0 (p, r are 0) // nbfound is 0 (p, r are 0)
...@@ -126,23 +154,19 @@ public class PRecEvaluator extends BasicEvaluator { ...@@ -126,23 +154,19 @@ public class PRecEvaluator extends BasicEvaluator {
} }
/** /**
* This now output the Lockheed format. However, the lookheed format * This now output the results in Lockheed format.
* was intended to compare two merged ontologies instead of two alignment.
* So it refered to the:
* - input ontology A
* - input ontology B
* - alignement algorithm (used for obtaining what ????).
* While we compare two alignments (so the source and the reference to these
* algorithms should be within the alignment structure.
*/ */
public void write(PrintWriter writer) throws java.io.IOException { public void write(PrintWriter writer) throws java.io.IOException {
writer.println("<?xml version='1.0' encoding='utf-8' standalone='yes'?>"); writer.println("<?xml version='1.0' encoding='utf-8' standalone='yes'?>");
writer.println("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'\n xmlns:map='http://www.atl.external.lmco.com/projects/ontology/ResultsOntology.n3#'>"); writer.println("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'\n xmlns:map='http://www.atl.external.lmco.com/projects/ontology/ResultsOntology.n3#'>");
writer.println(" <map:output rdf:about=''>"); writer.println(" <map:output rdf:about=''>");
// Missing items: //if ( ) {
// writer.println(" <map:algorithm rdf:resource=\"\">"); // writer.println(" <map:algorithm rdf:resource=\"http://co4.inrialpes.fr/align/algo/"+align1.get+"\">");
// writer.println(" <map:intutA rdf:resource=\"\">"); //}
// writer.println(" <map:inputB rdf:resource=\"\">"); try {
writer.println(" <map:intutA rdf:resource=\""+((OWLOntology)(align1.getOntology1())).getURI()+"\">");
writer.println(" <map:inputB rdf:resource=\""+((OWLOntology)(align1.getOntology2())).getURI()+"\">");
} catch (OWLException e) { e.printStackTrace(); };
// Other missing items (easy to get) // Other missing items (easy to get)
// writer.println(" <map:falseNegative>"); // writer.println(" <map:falseNegative>");
// writer.println(" <map:falsePositive>"); // writer.println(" <map:falsePositive>");
...@@ -160,21 +184,6 @@ public class PRecEvaluator extends BasicEvaluator { ...@@ -160,21 +184,6 @@ public class PRecEvaluator extends BasicEvaluator {
writer.print(result); writer.print(result);
writer.print("</result>\n </map:output>\n</rdf:RDF>\n"); writer.print("</result>\n </map:output>\n</rdf:RDF>\n");
} }
/* public void write( PrintStream writer ) throws java.io.IOException {
writer.print("<rdf:RDF>\n <Evaluation class=\"PRecEvaluator\">\n <precision>");
writer.print(precision);
writer.print("</precision>\n <recall>");
writer.print(recall);
writer.print("</recall>\n <fallout>");
writer.print(fallout);
writer.print("</fallout>\n <fmeasure>");
writer.print(fmeasure);
writer.print("</fmeasure>\n <overall>");
writer.print(overall);
writer.print("</overall>\n <result>");
writer.print(result);
writer.print("</result>\n </Evaluation>\n</rdf:RDF>\n");
}*/
public double getPrecision() { return precision; } public double getPrecision() { return precision; }
public double getRecall() { return recall; } public double getRecall() { return recall; }
......
...@@ -37,6 +37,8 @@ import org.semanticweb.owl.model.OWLProperty; ...@@ -37,6 +37,8 @@ import org.semanticweb.owl.model.OWLProperty;
import java.lang.Math; import java.lang.Math;
import java.lang.ClassNotFoundException; import java.lang.ClassNotFoundException;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Iterator;
import java.util.Set;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.io.IOException; import java.io.IOException;
...@@ -82,39 +84,54 @@ public class SymMeanEvaluator extends BasicEvaluator ...@@ -82,39 +84,54 @@ public class SymMeanEvaluator extends BasicEvaluator
for (Enumeration e = align1.getElements() ; e.hasMoreElements() ;) { for (Enumeration e = align1.getElements() ; e.hasMoreElements() ;) {
Cell c1 = (Cell)e.nextElement(); Cell c1 = (Cell)e.nextElement();
if ( c1.getObject1() instanceof OWLClass ) Set s2 = (Set)align2.getAlignCells1((OWLEntity)c1.getObject1());
nbClassCell++; if( s2 != null ){
else if ( c1.getObject1() instanceof OWLProperty ) // should be put to the length...of the set
nbPropCell++; if ( c1.getObject1() instanceof OWLClass )
else nbIndCell++; nbClassCell = nbClassCell + s2.size() ;
Cell c2 = (Cell)align2.getAlignCell1((OWLEntity)c1.getObject1()); else if ( c1.getObject1() instanceof OWLProperty )
if ( c2 != null ){ nbPropCell = nbPropCell + s2.size();
if ( c1.getObject2() == c2.getObject2() ) { else nbIndCell = nbIndCell + s2.size();
if ( c1.getObject2() instanceof OWLClass ) { for( Iterator it2 = s2.iterator(); it2.hasNext() && c1 != null; ){
classScore = classScore + 1 - Math.abs(c2.getStrength() - c1.getStrength()); Cell c2 = (Cell)it2.next();
} else if ( c1.getObject2() instanceof OWLProperty ) { //try {
propScore = propScore + 1 - Math.abs(c2.getStrength() - c1.getStrength()); //URI uri1 = ((OWLEntity)c1.getObject2()).getURI();
} else { //URI uri2 = ((OWLEntity)c2.getObject2()).getURI();
indScore = indScore + 1 - Math.abs(c2.getStrength() - c1.getStrength());}}} // if (c1.getobject2 == c2.getobject2)
} //if (uri1.toString().equals(uri2.toString())) {
if ( c1.getObject2() == c2.getObject2() ) {
if ( c1.getObject2() instanceof OWLClass ) {
classScore = classScore + 1 - Math.abs(c2.getStrength() - c1.getStrength());
} else if ( c1.getObject2() instanceof OWLProperty ) {
propScore = propScore + 1 - Math.abs(c2.getStrength() - c1.getStrength());
} else {
indScore = indScore + 1 - Math.abs(c2.getStrength() - c1.getStrength());}}}}}
for (Enumeration e = align2.getElements() ; e.hasMoreElements() ;) { for (Enumeration e = align2.getElements() ; e.hasMoreElements() ;) {
Cell c2 = (Cell)e.nextElement(); Cell c2 = (Cell)e.nextElement();
if ( c2.getObject1() instanceof OWLClass ) Set s1 = (Set)align1.getAlignCells2((OWLEntity)c2.getObject1());
nbClassCell++; if( s1 != null ){
else if ( c2.getObject1() instanceof OWLProperty ) // should be put to the length...of the set
nbPropCell++; if ( c2.getObject1() instanceof OWLClass )
else nbIndCell++; nbClassCell = nbClassCell + s1.size() ;
Cell c1 = (Cell)align1.getAlignCell1((OWLEntity)c2.getObject1()); else if ( c2.getObject1() instanceof OWLProperty )
if ( c1 != null ){ nbPropCell = nbPropCell + s1.size();
if ( c2.getObject2() == c1.getObject2() ) { else nbIndCell = nbIndCell + s1.size();
if ( c2.getObject2() instanceof OWLClass ) { for( Iterator it1 = s1.iterator(); it1.hasNext() && c2 != null; ){
classScore = classScore + 1 - Math.abs(c1.getStrength() - c2.getStrength()); Cell c1 = (Cell)it1.next();
} else if ( c2.getObject2() instanceof OWLProperty ) { //try {
propScore = propScore + 1 - Math.abs(c1.getStrength() - c2.getStrength()); //URI uri2 = ((OWLEntity)c2.getObject1()).getURI();
} else { //URI uri1 = ((OWLEntity)c1.getObject1()).getURI();
indScore = indScore + 1 - Math.abs(c1.getStrength() - c2.getStrength());}}} // if (c2.getobject1 == c1.getobject1)
} //if (uri2.toString().equals(uri1.toString())) {
if ( c2.getObject1() == c1.getObject1() ) {
if ( c2.getObject1() instanceof OWLClass ) {
classScore = classScore + 1 - Math.abs(c1.getStrength() - c2.getStrength());
} else if ( c2.getObject1() instanceof OWLProperty ) {
propScore = propScore + 1 - Math.abs(c1.getStrength() - c2.getStrength());
} else {
indScore = indScore + 1 - Math.abs(c1.getStrength() - c2.getStrength());}}}}}
// Beware, this must come first // Beware, this must come first
result = (classScore+propScore+indScore) / (nbClassCell+nbPropCell+nbIndCell); result = (classScore+propScore+indScore) / (nbClassCell+nbPropCell+nbIndCell);
classScore = classScore / nbClassCell; classScore = classScore / nbClassCell;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment