diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java index 379c53c0f47b16c5fdd0e817a23fe7b889594061..4662769db4a9b4f4f8e386d70bee08d4e7214be3 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java @@ -18,8 +18,6 @@ */ package fr.inrialpes.exmo.linkkey; -import fr.inrialpes.exmo.align.impl.edoal.*; -import fr.inrialpes.exmo.align.parser.SyntaxElement; import org.apache.jena.graph.Node; import org.apache.jena.graph.NodeFactory; import org.apache.jena.reasoner.TriplePattern; @@ -50,9 +48,19 @@ import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import java.io.PrintWriter; import java.net.URI; import java.text.NumberFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.Comparator; - +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import org.semanticweb.owl.align.AlignmentException; /** @@ -140,7 +148,9 @@ public class LinkKeyRenderer { if (!(c.getClasses1().isEmpty() && c.getClasses2().isEmpty())) { out.print("|"+toDotClassExpression(c.getClasses1(),true)+","+toDotClassExpression(c.getClasses2(),true)); } - /*+ "|"+toStringSpecificLinks(c)*/ + if (c.getTop()!=c) { + out.print("|" + toDotLinks(c, true)); + } out.println("}\" shape=Mrecord fontname=Courier];"); idx.put(c, i); i++; @@ -285,44 +295,11 @@ public class LinkKeyRenderer { // end String representation } - // inverse if needed - public PathExpression toPathExpressionInverse(String property, Set<String> dataProperties) { - if (property.endsWith(LinkkeyDiscoveryAlgorithm.INVERSE_SUFFIX)) { - return new RelationConstruction(SyntaxElement.Constructor.INVERSE, - Collections.singletonList(new RelationId(URI.create(property.substring(0,property.length()-2))))); - } - if (dataProperties.contains(property)) { - return new PropertyId(URI.create(property)); - } - return new RelationId(URI.create(property)); - } - - public PathExpression toPathExpressionCompose(String property, Set<String> dataProperties) { - int e = property.indexOf('°'); - if (e==-1) { - return toPathExpressionInverse(property,dataProperties); - } - else { - ArrayList l = new ArrayList<>(); - int s=0; - while (s<property.length()) { - String prop = property.substring(s,e); - l.add(toPathExpressionInverse(prop,dataProperties)); - s=e+1; - e=property.indexOf('°',s); - if (e==-1) e=property.length(); - } - if (l.get(l.size()-1) instanceof RelationExpression) { - return new RelationConstruction(SyntaxElement.Constructor.COMP,l); - } - return new PropertyConstruction(SyntaxElement.Constructor.COMP,l); - - } - } + - public Linkkey toLinkkey(CandidateLinkkey c, Set<String> dataProperties) { - if (dataProperties == null) { - dataProperties = Collections.emptySet(); + public Linkkey toLinkkey(CandidateLinkkey c, Set<String> objectProperties) { + if (objectProperties == null) { + objectProperties = Collections.emptySet(); } Linkkey lk = new Linkkey(); @@ -333,16 +310,13 @@ public class LinkKeyRenderer { String p1 = decodeP1(p); String p2 = decodeP2(p); - /*if (dataProperties.contains(p1) && dataProperties.contains(p2)) { - pe1 = new PropertyId(URI.create(p1)); - pe2 = new PropertyId(URI.create(p2)); - - } else { + if (objectProperties.contains(p1) && objectProperties.contains(p2)) { pe1 = new RelationId(URI.create(p1)); pe2 = new RelationId(URI.create(p2)); - }*/ - pe1 = toPathExpressionCompose(p1,dataProperties); - pe2 = toPathExpressionCompose(p2,dataProperties); + } else { + pe1 = new PropertyId(URI.create(p1)); + pe2 = new PropertyId(URI.create(p2)); + } lk.addBinding(new LinkkeyIntersects(pe1, pe2)); } @@ -353,15 +327,13 @@ public class LinkKeyRenderer { String p1 = decodeP1(p); String p2 = decodeP2(p); - /* if (dataProperties.contains(p1) && dataProperties.contains(p2)) { - pe1 = new PropertyId(URI.create(p1)); - pe2 = new PropertyId(URI.create(p2)); - } else { + if (objectProperties.contains(p1) && objectProperties.contains(p2)) { pe1 = new RelationId(URI.create(p1)); pe2 = new RelationId(URI.create(p2)); - }*/ - pe1 = toPathExpressionCompose(p1,dataProperties); - pe2 = toPathExpressionCompose(p2,dataProperties); + } else { + pe1 = new PropertyId(URI.create(p1)); + pe2 = new PropertyId(URI.create(p2)); + } lk.addBinding(new LinkkeyEquals(pe1, pe2)); } diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java index 04b333047637c2ddccd65fb776a90a7cfd2cda61..09c0e4adb1c77d324221bfb169bf2140b506b5d3 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java @@ -50,7 +50,7 @@ import org.apache.jena.vocabulary.OWL; public class LinkkeyDiscoveryAlgorithm { // -1 superscript - public final static String INVERSE_SUFFIX="\u207B\u00B9"; + public static String INVERSE_SUFFIX="\u207B\u00B9"; /* * Sets that contains the instances to be domain of linkeys @@ -69,12 +69,12 @@ public class LinkkeyDiscoveryAlgorithm { private final StringToId pIdxDS2 = new StringToId(true); /** - * String index that assigns an int identifier to each value + * String index that assigns an int identifier to each URI (or blank node) */ public final StringToId uriIdx = new StringToId(); /** - * String index that assigns an int identifier to each URI (or blank node) + * String index that assigns an int identifier to each value */ private final StringToId valueIdx = new StringToId(); @@ -522,6 +522,7 @@ public class LinkkeyDiscoveryAlgorithm { else if (s.size()>=subjects.size()*discThreshold) { propToRem.add(p); it2.remove(); + //System.out.println("Not disc property removed : "+pIdx.getString(p)); } } } @@ -623,6 +624,9 @@ public class LinkkeyDiscoveryAlgorithm { */ public void finishIndexDatasets() throws IOException { + + + // Store URI and datacache on the disk uriIdx.storeToDisk(); valueIdx.storeToDisk(); diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java index e183e02dba0759b94ae64ec5f71b6a990158e2f6..c13896f60d7eea63c68d09ddef2015e510e7efc5 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java @@ -21,6 +21,7 @@ package fr.inrialpes.exmo.linkkey; import fr.inrialpes.exmo.align.impl.edoal.ClassConstruction; import fr.inrialpes.exmo.align.impl.edoal.ClassExpression; import fr.inrialpes.exmo.linkkey.eval.EvalMeasures; +import it.unimi.dsi.fastutil.Hash; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import org.apache.jena.atlas.iterator.Iter; import org.apache.jena.graph.Triple; @@ -419,7 +420,7 @@ public class LinkkeyExtraction { //out.print("# links" + "\t" + "# c1" + "\t" + "# c2" + "\t" + "deltaMeasure" + "\t" + "deltaMeasure1" + "\t"+ "deltaMeasure2" + "\t"+ "lift" + "\t" + "hmeandc" + "\t" + "disc." + "\t" + "cov."+ "\t" + "covClass."+ "\t" +"fPrec." + "\t" + "fRec."); - + /* out.print("# links" + "\t" + "# c1" + "\t" + "# c2" + //"\t" + "deltaMeasure" + "\t" + "deltaMeasure1" + "\t"+ "deltaMeasure2" + "\t"+ "lift" + "\t" + "hmean" + "\t" + "dis" + "\t" + "partSize" + "\t" + "entropyP" + "\t" + "cov" + "\t" + "hmeanClass" + "\t" + "disClass" + "\t" + "covClass." + @@ -462,10 +463,29 @@ public class LinkkeyExtraction { } out.print(renderer.toString(r, true) + "\t" + r.getMinToBottom() + "\t" + r.getMinToTop()); out.println(); - } + }*/ + + TreeMap<Double,Set<CandidateLinkkey>> entropies= new TreeMap<>(); + for (CandidateLinkkey r : res.getCandidates()) { + List<IntSet> part = r.quotientSet(); + double entropy=0; + int sum=0; + for (IntSet s : part) { + sum+=s.size(); + } + for (IntSet s : part) { + double p = (double)s.size()/sum; + entropy += p*Math.log(p); + } + entropies.computeIfAbsent(entropy, x->new HashSet<CandidateLinkkey>()).add(r); + } - /*CandidateLinkkey[] l = res.getCandidates().toArray(new CandidateLinkkey[res.getNbCandidates()]); + entropies.forEach( (k,v) -> v.forEach( r -> + out.println(k+"\t"+f.format(r.quotientSet().size())+f.format(r.getInstances1Size())+"\t"+f.format(r.getInstances2Size())+"\t"+f.format(eval.getSupport(r))+"\t"+renderer.toString(r, true)) + )); + /*ArrayList<Double> sims = new ArrayList<>(); + CandidateLinkkey[] l = res.getCandidates().toArray(new CandidateLinkkey[res.getNbCandidates()]); for (int i=0 ; i< l.length ; i++) { Set<IntSet> l1 = new HashSet<>(l[i].quotientSet()); for (int j=i+1 ; j<l.length ; j++) { @@ -475,11 +495,22 @@ public class LinkkeyExtraction { if (l1.contains(s)) inter+=1; } double sim = ((double) inter) / (l1.size() + l2.size() - inter); - if (sim>0) { - out.println("sim-" + sim); - } + //if (sim>0) { + // out.println("sim-" + sim); + // } + sims.add(sim); } - }*/ + } + + for (double threshold : new double[] {.9,.8,.7,.6,.5,.4,.3,.2,.1}) { + int i=0; + for (double sim : sims) { + if (sim>=threshold) { + i++; + } + } + out.println(threshold+","+i); + }*/ }