Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 5d49db87 authored by Jérôme Euzenat's avatar Jérôme Euzenat
Browse files

- Pre-4.0 tutorial upgrade

parent 446ff40c
No related branches found
No related tags found
No related merge requests found
/* /*
* $Id$ * $Id$
* *
* Copyright (C) INRIA, 2006-2009, 2010 * Copyright (C) INRIA, 2006-2010
* *
* Modifications to the initial code base are copyright of their * Modifications to the initial code base are copyright of their
* respective authors, or their employers as appropriate. Authorship * respective authors, or their employers as appropriate. Authorship
...@@ -48,11 +48,16 @@ import java.io.BufferedWriter; ...@@ -48,11 +48,16 @@ import java.io.BufferedWriter;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.io.File; import java.io.File;
import java.net.URI; import java.net.URI;
import java.util.Properties;
/** /**
* MyApp * MyApp
* *
* Takes two files as arguments and align them. * Takes two files as arguments and align them.
* Match them with different matching methods
* Merge the results
* Selects the threshold that provide the best F-measure
* Return the alignment trimmed at that threshold as OWL Axioms
*/ */
public class MyApp { public class MyApp {
...@@ -61,6 +66,7 @@ public class MyApp { ...@@ -61,6 +66,7 @@ public class MyApp {
URI onto1 = null; URI onto1 = null;
URI onto2 = null; URI onto2 = null;
Properties params = new BasicParameters(); Properties params = new BasicParameters();
int question = 1;
try { try {
// Loading ontologies // Loading ontologies
...@@ -72,6 +78,7 @@ public class MyApp { ...@@ -72,6 +78,7 @@ public class MyApp {
return ; return ;
} }
// Run two different alignment methods (e.g., ngram distance and smoa) // Run two different alignment methods (e.g., ngram distance and smoa)
AlignmentProcess a1 = new StringDistAlignment(); AlignmentProcess a1 = new StringDistAlignment();
params.setProperty("stringFunction","smoaDistance"); params.setProperty("stringFunction","smoaDistance");
...@@ -83,41 +90,59 @@ public class MyApp { ...@@ -83,41 +90,59 @@ public class MyApp {
params.setProperty("stringFunction","ngramDistance"); params.setProperty("stringFunction","ngramDistance");
a2.align( (Alignment)null, params ); a2.align( (Alignment)null, params );
// Merge the two results. if ( question == 2 ) {
((BasicAlignment)a1).ingest(a2); // Clone a1
System.err.println( a1.nbCells() );
// Threshold at various thresholds BasicAlignment a3 = (BasicAlignment)(a1.clone());
// Evaluate them against the references System.err.println( a3.nbCells() );
// and choose the one with the best F-Measure
AlignmentParser aparser = new AlignmentParser(0); // Merge the two results.
// Changed by Angel for Windows a3.ingest( a2 );
//Alignment reference = aparser.parse( "file://"+(new File ( "refalign.rdf" ) . getAbsolutePath()) ); System.err.println( a3.nbCells() );
Alignment reference = aparser.parse( (new File ( "refalign.rdf" ) ) . toURL() . toString());
Evaluator evaluator = new PRecEvaluator( reference, a1 );
double best = 0.; // Invert the alignement
Alignment result = null; Alignment a4 = a3.inverse();
Properties p = new BasicParameters(); System.err.println( a4.nbCells() );
for ( int i = 0; i <= 10 ; i += 2 ){
a1.cut( ((double)i)/10 ); // Trim above .5
// JE: I do not understand why I must create a new one! a4.cut( .5 );
evaluator = new PRecEvaluator( reference, a1 ); System.err.println( a4.nbCells() );
evaluator.eval( p ); } else {
System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); // Merge the two results.
if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { ((BasicAlignment)a1).ingest(a2);
result = (BasicAlignment)((BasicAlignment)a1).clone();
best = ((PRecEvaluator)evaluator).getFmeasure(); // Trim at various thresholds
// Evaluate them against the references
// and choose the one with the best F-Measure
AlignmentParser aparser = new AlignmentParser(0);
// Changed by Angel for Windows
//Alignment reference = aparser.parse( "file://"+(new File ( "../refalign.rdf" ) . getAbsolutePath()) );
Alignment reference = aparser.parse( new File( "../refalign.rdf" ).toURI() );
double best = 0.;
Alignment result = null;
Properties p = new BasicParameters();
for ( int i = 0; i <= 10 ; i += 2 ){
a1.cut( ((double)i)/10 );
// This operation must be repeated because the modifications in a1
// are not taken into account otherwise
Evaluator evaluator = new PRecEvaluator( reference, a1 );
evaluator.eval( p );
System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells");
if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) {
result = (BasicAlignment)((BasicAlignment)a1).clone();
best = ((PRecEvaluator)evaluator).getFmeasure();
}
} }
// Displays it as OWL Rules
PrintWriter writer = new PrintWriter (
new BufferedWriter(
new OutputStreamWriter( System.out, "UTF-8" )), true);
AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer);
result.render(renderer);
writer.flush();
writer.close();
} }
// Displays it as OWL Rules
PrintWriter writer = new PrintWriter (
new BufferedWriter(
new OutputStreamWriter( System.out, "UTF-8" )), true);
AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer);
a1.render(renderer);
writer.flush();
writer.close();
} catch (Exception e) { e.printStackTrace(); }; } catch (Exception e) { e.printStackTrace(); };
} }
} }
...@@ -44,6 +44,7 @@ import java.io.BufferedWriter; ...@@ -44,6 +44,7 @@ import java.io.BufferedWriter;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.io.File; import java.io.File;
import java.net.URI; import java.net.URI;
import java.util.Properties;
/** /**
* The Skeleton of code for embeding the alignment API * The Skeleton of code for embeding the alignment API
......
...@@ -56,11 +56,11 @@ div.logic { ...@@ -56,11 +56,11 @@ div.logic {
<p>A skeleton of program using the Alignment <abbr>API</abbr> is <a href="Skeleton.java">Skeleton.java</a>. It can be compiled by invoking:</p> <p>A skeleton of program using the Alignment <abbr>API</abbr> is <a href="Skeleton.java">Skeleton.java</a>. It can be compiled by invoking:</p>
<div class="fragment"> <div class="fragment">
$ javac -classpath ../../lib/align.jar:../../lib/procalign.jar -d results Skeleton.java $ javac -classpath ../../../lib/align.jar:../../../lib/procalign.jar -d results Skeleton.java
</div> </div>
<p>and run by:</p> <p>and run by:</p>
<div class="fragment"> <div class="fragment">
$ java -cp ../../lib/Procalign.jar:results Skeleton file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl $ java -cp ../../../lib/Procalign.jar:results Skeleton file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl
</div> </div>
<p>Now considering the <abbr>API</abbr> (that can be consulted through its <p>Now considering the <abbr>API</abbr> (that can be consulted through its
...@@ -116,6 +116,7 @@ parameters corresponding to "smoaDistance" and "ngramDistance". ...@@ -116,6 +116,7 @@ parameters corresponding to "smoaDistance" and "ngramDistance".
a1.align( (Alignment)null, params ); a1.align( (Alignment)null, params );
AlignmentProcess a2 = new StringDistAlignment(); AlignmentProcess a2 = new StringDistAlignment();
a2.init ( onto1, onto2 ); a2.init ( onto1, onto2 );
params = new BasicParameters();
params.setProperty("stringFunction","ngramDistance"); params.setProperty("stringFunction","ngramDistance");
a2.align( (Alignment)null, params ); a2.align( (Alignment)null, params );
</pre> </pre>
...@@ -159,15 +160,15 @@ corresondences in the resulting alignment. ...@@ -159,15 +160,15 @@ corresondences in the resulting alignment.
System.err.println( a1.nbCells() ); System.err.println( a1.nbCells() );
BasicAlignment a3 = (BasicAlignment)(a1.clone()); BasicAlignment a3 = (BasicAlignment)(a1.clone());
System.err.println( a3.nbCells() ); System.err.println( a3.nbCells() );
// Merge the two results. // Merge the two results.
a3.ingest( a2 ); a3.ingest( a2 );
System.err.println( a3.nbCells() ); System.err.println( a3.nbCells() );
// Invert the alignement // Invert the alignement
BasicAlignment a4 = a3.inverse(); Alignment a4 = a3.inverse();
System.err.println( a4.nbCells() ); System.err.println( a4.nbCells() );
// Trim above .5 // Trim above .5
a4.cut( .5 ); a4.cut( .5 );
System.err.println( a4.nbCells() ); System.err.println( a4.nbCells() );
...@@ -197,10 +198,12 @@ reference alignment, then creates an instance ...@@ -197,10 +198,12 @@ reference alignment, then creates an instance
of <tt>PRecEvaluator</tt> for computing precision and recall between of <tt>PRecEvaluator</tt> for computing precision and recall between
the alignment <tt>a1</tt> above with respects to the reference alignment. the alignment <tt>a1</tt> above with respects to the reference alignment.
<div class="fragment"> <div class="fragment">
// Load the reference alignment
AlignmentParser aparser = new AlignmentParser(0); AlignmentParser aparser = new AlignmentParser(0);
Alignment reference = aparser.parse( (new File ( "refalign.rdf" ) ).toURL().toString() ); Alignment reference = aparser.parse( new File( "../refalign.rdf" ).toURI() );
Evaluator evaluator = new PRecEvaluator( reference, a1 ); Evaluator evaluator = new PRecEvaluator( reference, a1 );
evaluator.eval( new BasicParameters() ); evaluator.eval();
</div> </div>
As previously, results are stored within the <tt>Evaluator</tt> object As previously, results are stored within the <tt>Evaluator</tt> object
and are accessed through specific accessors. and are accessed through specific accessors.
...@@ -218,17 +221,22 @@ F-measure. ...@@ -218,17 +221,22 @@ F-measure.
</div> </div>
<div class="explain" id="qu5"> <div class="explain" id="qu5">
<pre> <pre>
// Trim at various thresholds
// Evaluate them against the references
// and choose the one with the best F-Measure
double best = 0.; double best = 0.;
Alignment result = null; Alignment result = null;
Properties p = new BasicParameters(); Properties p = new BasicParameters();
for ( int i = 0; i <= 10 ; i += 2 ){ for ( int i = 0; i <= 10 ; i += 2 ){
a1.cut( ((double)i)/10 ); a1.cut( ((double)i)/10 );
evaluator = new PRecEvaluator( reference, a1 ); // This operation must be repeated because the modifications in a1
// are not taken into account otherwise
Evaluator evaluator = new PRecEvaluator( reference, a1 );
evaluator.eval( p ); evaluator.eval( p );
System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells");
if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) {
result = (BasicAlignment)((BasicAlignment)a1).clone(); result = (BasicAlignment)((BasicAlignment)a1).clone();
best = ((PRecEvaluator)evaluator).getFmeasure(); best = ((PRecEvaluator)evaluator).getFmeasure();
} }
} }
</pre> </pre>
...@@ -260,8 +268,8 @@ alignment selected at the previous exercise as a set of OWL axioms. ...@@ -260,8 +268,8 @@ alignment selected at the previous exercise as a set of OWL axioms.
<pre> <pre>
// Displays it as OWL Rules // Displays it as OWL Rules
PrintWriter writer = new PrintWriter ( PrintWriter writer = new PrintWriter (
new BufferedWriter( new BufferedWriter(
new OutputStreamWriter( System.out, "UTF-8" )), true); new OutputStreamWriter( System.out, "UTF-8" )), true);
AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer); AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer);
result.render(renderer); result.render(renderer);
writer.flush(); writer.flush();
...@@ -292,32 +300,37 @@ alignment selected at the previous exercise as a set of OWL axioms. ...@@ -292,32 +300,37 @@ alignment selected at the previous exercise as a set of OWL axioms.
// Merge the two results. // Merge the two results.
((BasicAlignment)a1).ingest(a2); ((BasicAlignment)a1).ingest(a2);
// Threshold at various thresholds // Load the reference alignment
// Evaluate them against the references
// and choose the one with the best F-Measure
AlignmentParser aparser = new AlignmentParser(0); AlignmentParser aparser = new AlignmentParser(0);
Alignment reference = aparser.parse( (new File ( "refalign.rdf" ) ) . toURL() . toString()); // Changed by Angel for Windows
Evaluator evaluator = new PRecEvaluator( reference, a1 ); //Alignment reference = aparser.parse( "file://"+(new File ( "../refalign.rdf" ) . getAbsolutePath()) );
Alignment reference = aparser.parse( new File( "../refalign.rdf" ).toURI() );
// Trim at various thresholds
// Evaluate them against the references
// and choose the one with the best F-Measure
double best = 0.; double best = 0.;
Alignment result = null; Alignment result = null;
Properties p = new BasicParameters(); Properties p = new BasicParameters();
for ( int i = 0; i <= 10 ; i += 2 ){ for ( int i = 0; i <= 10 ; i += 2 ){
a1.cut( ((double)i)/10 ); a1.cut( ((double)i)/10 );
evaluator = new PRecEvaluator( reference, a1 ); // This operation must be repeated because the modifications in a1
// are not taken into account otherwise
Evaluator evaluator = new PRecEvaluator( reference, a1 );
evaluator.eval( p ); evaluator.eval( p );
System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells");
if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) {
result = (BasicAlignment)((BasicAlignment)a1).clone(); result = (BasicAlignment)((BasicAlignment)a1).clone();
best = ((PRecEvaluator)evaluator).getFmeasure(); best = ((PRecEvaluator)evaluator).getFmeasure();
} }
} }
// Displays it as OWL Rules // Displays it as OWL Rules
PrintWriter writer = new PrintWriter ( PrintWriter writer = new PrintWriter (
new BufferedWriter( new BufferedWriter(
new OutputStreamWriter( System.out, "UTF-8" )), true); new OutputStreamWriter( System.out, "UTF-8" )), true);
AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer); AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer);
a1.render(renderer); result.render(renderer);
writer.flush(); writer.flush();
writer.close(); writer.close();
</pre></div> </pre></div>
...@@ -326,22 +339,24 @@ alignment selected at the previous exercise as a set of OWL axioms. ...@@ -326,22 +339,24 @@ alignment selected at the previous exercise as a set of OWL axioms.
This can be compiled and used through: This can be compiled and used through:
</p> </p>
<div class="fragment"> <div class="fragment">
$ javac -classpath ../../lib/align.jar:../../lib/procalign.jar -d results MyApp.java $ javac -classpath ../../../lib/align.jar:../../../lib/procalign.jar -d results MyApp.java
$ java -cp ../../lib/Procalign.jar:results MyApp file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl > results/MyApp.owl $ java -cp ../../../lib/Procalign.jar:results MyApp file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl > results/MyApp.owl
</div> </div>
<p>The execution provides an insight about the best threshold: <p>The execution provides an insight about the best threshold:
<pre> <pre>
Threshold 0.0 : 0.4693877551020408 over 148 cells Threshold 0.0 : 0.4999999999999999 over 140 cells
Threshold 0.2 : 0.5227272727272727 over 128 cells Threshold 0.2 : 0.5529411764705882 over 122 cells
Threshold 0.4 : 0.5476190476190476 over 120 cells Threshold 0.4 : 0.5802469135802468 over 114 cells
Threshold 0.6 : 0.6478873239436619 over 94 cells Threshold 0.6 : 0.6861313868613137 over 89 cells
Threshold 0.8 : 0.75 over 72 cells Threshold 0.8 : 0.7692307692307693 over 69 cells
Threshold 1.0 : 0.5151515151515151 over 18 cells Threshold 1.0 : 0.5230769230769231 over 17 cells
</pre> </pre>
<p> <p>
<p>A full working solution is <a href="MyApp.java">MyApp.java</a>.</p> <p>A full working solution is <a href="MyApp.java">MyApp.java</a>.</p>
<div class="logic"><p><b>Advanced question:</b> Can you tell why the stored alignment does not seem to contain 69 cells? (Hint: try to render the alignments in RDF and see what happens)</p></div>
<div class="logic"><p><b>More work:</b> You can add a switch like the <tt>-i</tt> switch of <tt>Procalign</tt> so that the main class of the application can be passed at commant-line.</p></div> <div class="logic"><p><b>More work:</b> You can add a switch like the <tt>-i</tt> switch of <tt>Procalign</tt> so that the main class of the application can be passed at commant-line.</p></div>
<div class="logic"><p><b>Advanced:</b> What about writing an editor for the alignment <abbr>API</abbr>?</p></div> <div class="logic"><p><b>Advanced:</b> What about writing an editor for the alignment <abbr>API</abbr>?</p></div>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment