diff --git a/README.md b/README.md index ddcaede555a7ceb7162220fc209fe87855adb0f1..5afb70233d12c36f9bc1b2eb52e50d90851b6086 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # Linkex -Linkex is a tool allowing to extract link keys candidate from two RDF datasets. +Linkex is a tool allowing to discover link keys candidate from two RDF datasets. Link keys generalise the combination of keys and ontology alignments for data interlinking. A link key is a set of pairs of properties that uniquely identify the instances of two classes of two RDF datasets. For example, {(hasCreator, aAuteur), (hasTitle, aTitre)} for (Book, Livre), which states that, if an instance of Book have the same values for hasCreator and aAuteur as an instance of Book has for hasCreator and hasTitle, the two instances are the same. This tool can extract link keys candidates and evaluate them using discriminability and coverage. It can also evaluate them according to reference set of links given as input. -It is able to extract canidates with comoposed properties, and inverse properties. +It is able to extract candidates with composed properties, and inverse properties. Linkex is free software distributed it under the terms of the Lesser GNU General Public License. diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/AntichainDisj.java b/src/main/java/fr/inrialpes/exmo/linkkey/AntichainDisj.java index 589c3aa7b21d12573b28efbdbc04b89f25291106..16b156ac2a88e08f7ae9eb4a2dfaac8084d6dec8 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/AntichainDisj.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/AntichainDisj.java @@ -90,7 +90,7 @@ public class AntichainDisj implements EvaluableRule { } - @Override + /*@Override public int getNbAncestors() { Iterator<CandidateLinkkey> it = candidates.iterator(); int max=it.next().getNbAncestors(); @@ -122,7 +122,7 @@ public class AntichainDisj implements EvaluableRule { Iterator<CandidateLinkkey> it = candidates.iterator(); return it.next().getBottom().getNbDescendants(); - } + }*/ public void setIsMaximal(boolean b) { isMaximal=b; diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/CandidateLinkkey.java b/src/main/java/fr/inrialpes/exmo/linkkey/CandidateLinkkey.java index 034c53bbe802568426a2de3512d1367b916b75fe..76742d81da11493d4e2db79bdd83cb5e6b065a57 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/CandidateLinkkey.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/CandidateLinkkey.java @@ -34,8 +34,8 @@ public class CandidateLinkkey implements EvaluableRule { protected final LongSet inPairs; protected final LongSet eqPairs; - // classes of the candidate link key - // the set of IntSet represents a disjunction of cunjunction of classes + // classes expressions of the candidate link key + // the set of IntSet represents a disjunction of conjunctions of classes // classes are identified by int protected Set<IntSet> classes1; protected Set<IntSet> classes2; @@ -48,7 +48,7 @@ public class CandidateLinkkey implements EvaluableRule { protected boolean writable; - // transitive closure of the dual relation - to be cleared when structural change happpen + // transitive closure of the dual relation - to be cleared when structural change happen transient protected Set<CandidateLinkkey> descendants; transient protected Set<CandidateLinkkey> ancestors; @@ -126,7 +126,7 @@ public class CandidateLinkkey implements EvaluableRule { descendants=null; } - @Override + /*@Override public int getNbAncestors() { //return inPairs.size(); return this.getAncestors().size(); @@ -147,8 +147,8 @@ public class CandidateLinkkey implements EvaluableRule { @Override public int getMaxNbDescendants() { //return getBottom().inPairs.size(); - return getBottom().getDescendants().size(); - } + return getTop().getDescendants().size(); + }*/ public CandidateLinkkey getTop() { if (!parents.isEmpty()) { @@ -302,7 +302,7 @@ public class CandidateLinkkey implements EvaluableRule { } /** - * Add a descendant to a candidate linkkey. + * Add a descendant to a candidate link key. * * @param c */ diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/EvaluableRule.java b/src/main/java/fr/inrialpes/exmo/linkkey/EvaluableRule.java index bf7ad9df2494994c900ecb4cfc08f91b436d2b2a..893e4e7a139fb2cb52f9a7cd75c17dac4a4a3456 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/EvaluableRule.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/EvaluableRule.java @@ -24,24 +24,65 @@ import java.util.Collection; import java.util.Set; /** - * + * Interface that a candidate link key or a disjunction of candidate + * link keys have to implement * @author Jerome David <jerome.david@univ-grenoble-alpes.fr> */ public interface EvaluableRule { - + + /** + * Number of links generated by this rule + */ public int getLinksSize(); + + /** + * Number of instances of the first dataset linked by this rule + */ public int getInstances1Size(); + + /** + * Number of instances of the second dataset linked by this rule + */ public int getInstances2Size(); + + /** + * Numbers of links per instance of the first dataset. + * @return a map that associates instance identifiers + * to the number of links for this instance + */ public Int2IntMap getCounts1(); + + /** + * Numbers of links per instance of the second dataset. + * @return a map that associates instance identifiers + * to the number of links for this instance + */ public Int2IntMap getCounts2(); + + /** + * Links generated by this rule. + * @return a collection of long, where each long is the concatenation + * of the identifier of instance 1 and the identifier of instance 2. + */ public Collection<Long> getLinks(); - - public int getNbAncestors(); + + /* Are the four following methods useful ? */ + /*public int getNbAncestors(); public int getMaxNbAncestors(); + public int getNbDescendants(); - public int getMaxNbDescendants(); - + public int getMaxNbDescendants();*/ + + /** + * The class expression of dataset 1 + * @return a set (disjunction) of set (conjunction) of class identifiers (int) + */ public Set<IntSet> getClasses1(); + + /** + * The class expression of dataset 2 + * @return a set (disjunction) of set (conjunction) of class identifiers (int) + */ public Set<IntSet> getClasses2(); diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/ExtractionConfig.java b/src/main/java/fr/inrialpes/exmo/linkkey/ExtractionConfig.java index 8b2a9dd458aeb125085aa92c2f8a260535367701..996acb8934423a1ea712acfdf8c99af64be6eb48 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/ExtractionConfig.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/ExtractionConfig.java @@ -25,9 +25,15 @@ import java.util.Set; * @author Jerome David <jerome.david@univ-grenoble-alpes.fr> */ public class ExtractionConfig { - + + /** Link key candidates with no classes expressions */ public static final int NO_CLASSES=0; + /** Link key candidates with classes expressions but subsumption defined + * by the inclusion between the sets of pairs of properties only */ public static final int CLASSES=1; + /** Link key candidates with classes expressions but subsumption defined + * by the inclusion between the sets of pairs of properties + * and classes expressions */ public static final int CLASSES_FULL=2; diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java index 9584193c1b41cf80f9f53f0838b3ff75a0c63f1c..81be925d900387a195180bb0c6b9fe5c124dc916 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java @@ -18,7 +18,6 @@ */ package fr.inrialpes.exmo.linkkey; -import fr.inrialpes.exmo.linkkey.eval.EvalMeasures; import fr.inrialpes.exmo.linkkey.utils.DescriptionsSet; import fr.inrialpes.exmo.linkkey.normalizers.AbstractNormalizer; import org.apache.jena.vocabulary.RDF; @@ -130,8 +129,8 @@ public class LinkkeyDiscoveryAlgorithm { blankNodes = config.filterBlankNodes?new IntOpenHashSet():null; this.referenceLinks = new LongOpenHashSet(); if (config.classes!=ExtractionConfig.NO_CLASSES) { - instancesTypesDS1=new Int2ObjectOpenHashMap(); - instancesTypesDS2=new Int2ObjectOpenHashMap(); + instancesTypesDS1=new Int2ObjectOpenHashMap<>(); + instancesTypesDS2=new Int2ObjectOpenHashMap<>(); } if (config.classes==ExtractionConfig.CLASSES_FULL) { order = PartialOrder.PROPERTY_PAIRS_AND_CLASS_EXP; @@ -158,12 +157,11 @@ public class LinkkeyDiscoveryAlgorithm { * (if generatesIn is true) * * - * @param sId the subject - * @param pId the predicate - * @param oId the object - * @param spoIndex - * @param opsIndex - * @param subjects + * @param sId the subject identifier + * @param pId the predicate identifier + * @param oId the object identifier + * @param spoIndex the subject to predicate to object index in which the triple (sId,pId,oId) will be added + * @param opsIndex the object to predicate to subject index in which the triple (sId,pId,oId) will be added */ private void indexTriple(int sId, int pId, int oId, TripleIndex spoIndex, TripleIndex opsIndex) { //subjects.add(sId); @@ -231,7 +229,7 @@ public class LinkkeyDiscoveryAlgorithm { - public void addURITripleDS2(String s, String p, String o) throws IOException { + public void addURITripleDS2(String s, String p, String o) { int sId = uriIdx.getId(s); int pId = pIdxDS2.getId(p); int oId = uriIdx.getId(o); @@ -262,7 +260,7 @@ public class LinkkeyDiscoveryAlgorithm { } } - public void addValueTripleDS2(String s, String p, Object o) throws IOException { + public void addValueTripleDS2(String s, String p, Object o) { if (o instanceof String && ((String)o).length() == 0) { return; } @@ -392,7 +390,7 @@ public class LinkkeyDiscoveryAlgorithm { }); } }); - + }); }); Iterator<Map.Entry<IntList,Int2ObjectMap<IntSet>>> it = nextPathsSO.entrySet().iterator(); @@ -403,7 +401,7 @@ public class LinkkeyDiscoveryAlgorithm { int sum=0; for (IntSet oSet : soIndex.values()) { sum+=oSet.size(); - }; + } // Check the OSFactor if (maxOSFactor==-1 || sum/soIndex.size() < maxOSFactor) { @@ -481,9 +479,6 @@ public class LinkkeyDiscoveryAlgorithm { /** * Removes from this index all data which are not about the given subjects - * - * @param set - * @param subjects */ private static <T> void intersectionWithSubjects(Map<T,Int2ObjectOpenHashMap<IntOpenHashSet>> map, IntSet subjects) { Iterator<Map.Entry<T,Int2ObjectOpenHashMap<IntOpenHashSet>>> it1 = map.entrySet().iterator(); @@ -506,9 +501,6 @@ public class LinkkeyDiscoveryAlgorithm { /** * Removes from the given index, the properties that not discriminant for at least one value - * - * @param set - * @param subjects */ private static <T> void removeNotDiscProperties(Map<T,Int2ObjectOpenHashMap<IntOpenHashSet>> map, IntSet subjects, double discThreshold) { IntSet propToRem = new IntOpenHashSet(); @@ -614,7 +606,7 @@ public class LinkkeyDiscoveryAlgorithm { }*/ private <T> void supportThreshold(Map<T,Int2ObjectOpenHashMap<IntOpenHashSet>> xpsIndex, int nbInstances, StringToId pIdx) { - Int2ObjectOpenHashMap<IntOpenHashSet> propCount = new Int2ObjectOpenHashMap(); + Int2ObjectOpenHashMap<IntOpenHashSet> propCount = new Int2ObjectOpenHashMap<>(); xpsIndex.values().forEach( psMap -> { for (Entry<IntOpenHashSet> ps : psMap.int2ObjectEntrySet()) { @@ -775,7 +767,7 @@ public class LinkkeyDiscoveryAlgorithm { intersectionWithSubjects(opsIndexDS2URI, subjectsDS2); intersectionWithSubjects(opsIndexDS2Value, subjectsDS2); } - else { // case only compostion without In -> ops indexes are now useless + else { // case only composition without In -> ops indexes are now useless opsIndexDS1URI=null; opsIndexDS1Value=null; opsIndexDS2URI=null; @@ -896,7 +888,7 @@ public class LinkkeyDiscoveryAlgorithm { /* * Add Intent Algorithm */ - public ExtractionResult computeLinkkeyWithFCA(Map<String,String> prefixMap) throws IOException { + public ExtractionResult computeLinkkeyWithFCA(Map<String,String> prefixMap) { // DEBUG /*Int2ObjectMap<IntSet> classes2InstancesDS1 = getClasses2Instances(instancesTypesDS1); @@ -1005,11 +997,11 @@ public class LinkkeyDiscoveryAlgorithm { } } CandidateLinkkey newCandidate = new CandidateLinkkey(eqPairs,inPairs,classes1,classes2); - /**nb++; + /*nb++; if (nb%100000==0) { System.out.println(nb); //if (nb%100000==0) System.out.println("Ancestors: "+generator.getBottom().getNbAncestors()); - }**/ + }*/ for (CandidateLinkkey np : newParents) { generator.removeParent(np); @@ -1059,7 +1051,7 @@ public class LinkkeyDiscoveryAlgorithm { a generator is a most general subsumee of the given intent This is used by addIntent algorithm */ - public final static CandidateLinkkey getMaximal(PartialOrder order, CandidateLinkkey generator, LongSet eqPairs, LongSet inPairs, Set<IntSet> classes1, Set<IntSet> classes2) { + public static CandidateLinkkey getMaximal(PartialOrder order, CandidateLinkkey generator, LongSet eqPairs, LongSet inPairs, Set<IntSet> classes1, Set<IntSet> classes2) { boolean parentIsMaximal = true; while (parentIsMaximal) { parentIsMaximal = false; @@ -1078,7 +1070,7 @@ public class LinkkeyDiscoveryAlgorithm { // old algorithm for extraction @Deprecated - public ExtractionResult computeLinkRuleCandidates(Map<String,String> prefixMap) throws IOException { + public ExtractionResult computeLinkRuleCandidates(Map<String,String> prefixMap) { //System.out.println("START Compute candidates"); ExtractionResult res = new ExtractionResult(pIdxDS1.getStrings(), pIdxDS2.getStrings(), uriIdx.getStrings(), config.typesDS1, config.typesDS2,subjectsDS1, subjectsDS2,referenceLinks,prefixMap); @@ -1147,10 +1139,8 @@ public class LinkkeyDiscoveryAlgorithm { private static Int2ObjectMap<IntSet> getClasses2Instances(Int2ObjectMap<IntSet> instancesTypes) { if (instancesTypes==null) return Int2ObjectMaps.emptyMap(); Int2ObjectMap<IntSet> res = new Int2ObjectOpenHashMap<>(); - - Iterator<Entry<IntSet>> it = instancesTypes.int2ObjectEntrySet().iterator(); - while (it.hasNext()) { - Entry<IntSet> pair = it.next(); + + for (Entry<IntSet> pair : instancesTypes.int2ObjectEntrySet()) { pair.getValue().forEach((int c) -> { res.computeIfAbsent(c, x -> new IntOpenHashSet()).add(pair.getIntKey()); });