diff --git a/html/tutorial/tutorial3/MyApp.java b/html/tutorial/tutorial3/MyApp.java index 176960a7703425721d4544e100dc3fc7b652a97b..995099e619973fc11938ef14d42b7d2cb748e0b7 100644 --- a/html/tutorial/tutorial3/MyApp.java +++ b/html/tutorial/tutorial3/MyApp.java @@ -1,7 +1,7 @@ /* * $Id$ * - * Copyright (C) INRIA, 2006-2009, 2010 + * Copyright (C) INRIA, 2006-2010 * * Modifications to the initial code base are copyright of their * respective authors, or their employers as appropriate. Authorship @@ -48,11 +48,16 @@ import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.io.File; import java.net.URI; +import java.util.Properties; /** * MyApp * * Takes two files as arguments and align them. + * Match them with different matching methods + * Merge the results + * Selects the threshold that provide the best F-measure + * Return the alignment trimmed at that threshold as OWL Axioms */ public class MyApp { @@ -61,6 +66,7 @@ public class MyApp { URI onto1 = null; URI onto2 = null; Properties params = new BasicParameters(); + int question = 1; try { // Loading ontologies @@ -72,6 +78,7 @@ public class MyApp { return ; } + // Run two different alignment methods (e.g., ngram distance and smoa) AlignmentProcess a1 = new StringDistAlignment(); params.setProperty("stringFunction","smoaDistance"); @@ -83,41 +90,59 @@ public class MyApp { params.setProperty("stringFunction","ngramDistance"); a2.align( (Alignment)null, params ); - // Merge the two results. - ((BasicAlignment)a1).ingest(a2); - - // Threshold at various thresholds - // Evaluate them against the references - // and choose the one with the best F-Measure - AlignmentParser aparser = new AlignmentParser(0); - // Changed by Angel for Windows - //Alignment reference = aparser.parse( "file://"+(new File ( "refalign.rdf" ) . getAbsolutePath()) ); - Alignment reference = aparser.parse( (new File ( "refalign.rdf" ) ) . toURL() . toString()); - Evaluator evaluator = new PRecEvaluator( reference, a1 ); + if ( question == 2 ) { + // Clone a1 + System.err.println( a1.nbCells() ); + BasicAlignment a3 = (BasicAlignment)(a1.clone()); + System.err.println( a3.nbCells() ); + + // Merge the two results. + a3.ingest( a2 ); + System.err.println( a3.nbCells() ); - double best = 0.; - Alignment result = null; - Properties p = new BasicParameters(); - for ( int i = 0; i <= 10 ; i += 2 ){ - a1.cut( ((double)i)/10 ); - // JE: I do not understand why I must create a new one! - evaluator = new PRecEvaluator( reference, a1 ); - evaluator.eval( p ); - System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); - if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { - result = (BasicAlignment)((BasicAlignment)a1).clone(); - best = ((PRecEvaluator)evaluator).getFmeasure(); + // Invert the alignement + Alignment a4 = a3.inverse(); + System.err.println( a4.nbCells() ); + + // Trim above .5 + a4.cut( .5 ); + System.err.println( a4.nbCells() ); + } else { + // Merge the two results. + ((BasicAlignment)a1).ingest(a2); + + // Trim at various thresholds + // Evaluate them against the references + // and choose the one with the best F-Measure + AlignmentParser aparser = new AlignmentParser(0); + // Changed by Angel for Windows + //Alignment reference = aparser.parse( "file://"+(new File ( "../refalign.rdf" ) . getAbsolutePath()) ); + Alignment reference = aparser.parse( new File( "../refalign.rdf" ).toURI() ); + + double best = 0.; + Alignment result = null; + Properties p = new BasicParameters(); + for ( int i = 0; i <= 10 ; i += 2 ){ + a1.cut( ((double)i)/10 ); + // This operation must be repeated because the modifications in a1 + // are not taken into account otherwise + Evaluator evaluator = new PRecEvaluator( reference, a1 ); + evaluator.eval( p ); + System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); + if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { + result = (BasicAlignment)((BasicAlignment)a1).clone(); + best = ((PRecEvaluator)evaluator).getFmeasure(); + } } + // Displays it as OWL Rules + PrintWriter writer = new PrintWriter ( + new BufferedWriter( + new OutputStreamWriter( System.out, "UTF-8" )), true); + AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer); + result.render(renderer); + writer.flush(); + writer.close(); } - // Displays it as OWL Rules - PrintWriter writer = new PrintWriter ( - new BufferedWriter( - new OutputStreamWriter( System.out, "UTF-8" )), true); - AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer); - a1.render(renderer); - writer.flush(); - writer.close(); - } catch (Exception e) { e.printStackTrace(); }; } } diff --git a/html/tutorial/tutorial3/Skeleton.java b/html/tutorial/tutorial3/Skeleton.java index cc47774b1c8556efcaed36add0531ff0a1a5ce12..5744cb6e6923ce36cb12aaf6c1c2118d752e5280 100644 --- a/html/tutorial/tutorial3/Skeleton.java +++ b/html/tutorial/tutorial3/Skeleton.java @@ -44,6 +44,7 @@ import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.io.File; import java.net.URI; +import java.util.Properties; /** * The Skeleton of code for embeding the alignment API diff --git a/html/tutorial/tutorial3/embed.html b/html/tutorial/tutorial3/embed.html index 3cc5b146abd521f95068ecbb1b8224124188727a..163eb65ad74d4642f95ef5fc622d39caf2cd324d 100644 --- a/html/tutorial/tutorial3/embed.html +++ b/html/tutorial/tutorial3/embed.html @@ -56,11 +56,11 @@ div.logic { <p>A skeleton of program using the Alignment <abbr>API</abbr> is <a href="Skeleton.java">Skeleton.java</a>. It can be compiled by invoking:</p> <div class="fragment"> -$ javac -classpath ../../lib/align.jar:../../lib/procalign.jar -d results Skeleton.java +$ javac -classpath ../../../lib/align.jar:../../../lib/procalign.jar -d results Skeleton.java </div> <p>and run by:</p> <div class="fragment"> -$ java -cp ../../lib/Procalign.jar:results Skeleton file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl +$ java -cp ../../../lib/Procalign.jar:results Skeleton file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl </div> <p>Now considering the <abbr>API</abbr> (that can be consulted through its @@ -116,6 +116,7 @@ parameters corresponding to "smoaDistance" and "ngramDistance". a1.align( (Alignment)null, params ); AlignmentProcess a2 = new StringDistAlignment(); a2.init ( onto1, onto2 ); + params = new BasicParameters(); params.setProperty("stringFunction","ngramDistance"); a2.align( (Alignment)null, params ); </pre> @@ -159,15 +160,15 @@ corresondences in the resulting alignment. System.err.println( a1.nbCells() ); BasicAlignment a3 = (BasicAlignment)(a1.clone()); System.err.println( a3.nbCells() ); - + // Merge the two results. a3.ingest( a2 ); System.err.println( a3.nbCells() ); // Invert the alignement - BasicAlignment a4 = a3.inverse(); + Alignment a4 = a3.inverse(); System.err.println( a4.nbCells() ); - + // Trim above .5 a4.cut( .5 ); System.err.println( a4.nbCells() ); @@ -197,10 +198,12 @@ reference alignment, then creates an instance of <tt>PRecEvaluator</tt> for computing precision and recall between the alignment <tt>a1</tt> above with respects to the reference alignment. <div class="fragment"> + // Load the reference alignment AlignmentParser aparser = new AlignmentParser(0); - Alignment reference = aparser.parse( (new File ( "refalign.rdf" ) ).toURL().toString() ); + Alignment reference = aparser.parse( new File( "../refalign.rdf" ).toURI() ); + Evaluator evaluator = new PRecEvaluator( reference, a1 ); - evaluator.eval( new BasicParameters() ); + evaluator.eval(); </div> As previously, results are stored within the <tt>Evaluator</tt> object and are accessed through specific accessors. @@ -218,17 +221,22 @@ F-measure. </div> <div class="explain" id="qu5"> <pre> + // Trim at various thresholds + // Evaluate them against the references + // and choose the one with the best F-Measure double best = 0.; Alignment result = null; Properties p = new BasicParameters(); for ( int i = 0; i <= 10 ; i += 2 ){ a1.cut( ((double)i)/10 ); - evaluator = new PRecEvaluator( reference, a1 ); + // This operation must be repeated because the modifications in a1 + // are not taken into account otherwise + Evaluator evaluator = new PRecEvaluator( reference, a1 ); evaluator.eval( p ); System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { - result = (BasicAlignment)((BasicAlignment)a1).clone(); - best = ((PRecEvaluator)evaluator).getFmeasure(); + result = (BasicAlignment)((BasicAlignment)a1).clone(); + best = ((PRecEvaluator)evaluator).getFmeasure(); } } </pre> @@ -260,8 +268,8 @@ alignment selected at the previous exercise as a set of OWL axioms. <pre> // Displays it as OWL Rules PrintWriter writer = new PrintWriter ( - new BufferedWriter( - new OutputStreamWriter( System.out, "UTF-8" )), true); + new BufferedWriter( + new OutputStreamWriter( System.out, "UTF-8" )), true); AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer); result.render(renderer); writer.flush(); @@ -292,32 +300,37 @@ alignment selected at the previous exercise as a set of OWL axioms. // Merge the two results. ((BasicAlignment)a1).ingest(a2); - // Threshold at various thresholds - // Evaluate them against the references - // and choose the one with the best F-Measure + // Load the reference alignment AlignmentParser aparser = new AlignmentParser(0); - Alignment reference = aparser.parse( (new File ( "refalign.rdf" ) ) . toURL() . toString()); - Evaluator evaluator = new PRecEvaluator( reference, a1 ); + // Changed by Angel for Windows + //Alignment reference = aparser.parse( "file://"+(new File ( "../refalign.rdf" ) . getAbsolutePath()) ); + Alignment reference = aparser.parse( new File( "../refalign.rdf" ).toURI() ); + // Trim at various thresholds + // Evaluate them against the references + // and choose the one with the best F-Measure double best = 0.; Alignment result = null; Properties p = new BasicParameters(); for ( int i = 0; i <= 10 ; i += 2 ){ a1.cut( ((double)i)/10 ); - evaluator = new PRecEvaluator( reference, a1 ); + // This operation must be repeated because the modifications in a1 + // are not taken into account otherwise + Evaluator evaluator = new PRecEvaluator( reference, a1 ); evaluator.eval( p ); System.err.println("Threshold "+(((double)i)/10)+" : "+((PRecEvaluator)evaluator).getFmeasure()+" over "+a1.nbCells()+" cells"); if ( ((PRecEvaluator)evaluator).getFmeasure() > best ) { - result = (BasicAlignment)((BasicAlignment)a1).clone(); - best = ((PRecEvaluator)evaluator).getFmeasure(); + result = (BasicAlignment)((BasicAlignment)a1).clone(); + best = ((PRecEvaluator)evaluator).getFmeasure(); } } + // Displays it as OWL Rules PrintWriter writer = new PrintWriter ( - new BufferedWriter( - new OutputStreamWriter( System.out, "UTF-8" )), true); + new BufferedWriter( + new OutputStreamWriter( System.out, "UTF-8" )), true); AlignmentVisitor renderer = new OWLAxiomsRendererVisitor(writer); - a1.render(renderer); + result.render(renderer); writer.flush(); writer.close(); </pre></div> @@ -326,22 +339,24 @@ alignment selected at the previous exercise as a set of OWL axioms. This can be compiled and used through: </p> <div class="fragment"> -$ javac -classpath ../../lib/align.jar:../../lib/procalign.jar -d results MyApp.java -$ java -cp ../../lib/Procalign.jar:results MyApp file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl > results/MyApp.owl +$ javac -classpath ../../../lib/align.jar:../../../lib/procalign.jar -d results MyApp.java +$ java -cp ../../../lib/Procalign.jar:results MyApp file://$CWD/myOnto.owl file://$CWD/edu.mit.visus.bibtex.owl > results/MyApp.owl </div> <p>The execution provides an insight about the best threshold: <pre> -Threshold 0.0 : 0.4693877551020408 over 148 cells -Threshold 0.2 : 0.5227272727272727 over 128 cells -Threshold 0.4 : 0.5476190476190476 over 120 cells -Threshold 0.6 : 0.6478873239436619 over 94 cells -Threshold 0.8 : 0.75 over 72 cells -Threshold 1.0 : 0.5151515151515151 over 18 cells +Threshold 0.0 : 0.4999999999999999 over 140 cells +Threshold 0.2 : 0.5529411764705882 over 122 cells +Threshold 0.4 : 0.5802469135802468 over 114 cells +Threshold 0.6 : 0.6861313868613137 over 89 cells +Threshold 0.8 : 0.7692307692307693 over 69 cells +Threshold 1.0 : 0.5230769230769231 over 17 cells </pre> <p> <p>A full working solution is <a href="MyApp.java">MyApp.java</a>.</p> +<div class="logic"><p><b>Advanced question:</b> Can you tell why the stored alignment does not seem to contain 69 cells? (Hint: try to render the alignments in RDF and see what happens)</p></div> + <div class="logic"><p><b>More work:</b> You can add a switch like the <tt>-i</tt> switch of <tt>Procalign</tt> so that the main class of the application can be passed at commant-line.</p></div> <div class="logic"><p><b>Advanced:</b> What about writing an editor for the alignment <abbr>API</abbr>?</p></div>