diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java index dde0ec69b7f39b704ddd3e60db10260ae6719ed6..b12ce89fd3fad0e18a529593a74d7aa74c481204 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkKeyRenderer.java @@ -421,8 +421,8 @@ public class LinkKeyRenderer { } public String toString(CandidateLinkkey c, boolean abbrev) { - return "EQ" + toString(c.getEqPairs(), abbrev) + ", IN" + toString(c.getInPairs(), abbrev) - + ", CL[" + toString(c.getClasses1(), abbrev) + "," + toString(c.getClasses2(), abbrev) + "]"; + return toString(c.getEqPairs(), abbrev) + "\t" + toString(c.getInPairs(), abbrev) + + "\t[" + toString(c.getClasses1(), abbrev) + "," + toString(c.getClasses2(), abbrev) + "]"; } public void renderTXT(boolean abbreviateUris, PrintWriter out) { diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java index ca70b4af2008fe15612353ea47e495c112d0b926..2bf60b018dc2f07a114d094b05f83dd5130bdebe 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyDiscoveryAlgorithm.java @@ -8,7 +8,7 @@ * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * - * Linkex is distributed in the hope that it2 will be useful, + * Linkex is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. @@ -18,6 +18,7 @@ */ package fr.inrialpes.exmo.linkkey; +import fr.inrialpes.exmo.linkkey.utils.DescriptionsSet; import fr.inrialpes.exmo.linkkey.normalizers.AbstractNormalizer; import org.apache.jena.vocabulary.RDF; import fr.inrialpes.exmo.linkkey.normalizers.StringNormalizer; @@ -48,7 +49,9 @@ import java.util.logging.Logger; public class LinkkeyDiscoveryAlgorithm { + // -1 superscript private static String INVERSE_SUFFIX="\u207B\u00B9"; + /* * Sets that contains the instances to be domain of linkeys */ @@ -96,12 +99,10 @@ public class LinkkeyDiscoveryAlgorithm { private TripleIndex spoIndexDS2URI = new TripleIndex(); private TripleIndex spoIndexDS2Value = new TripleIndex(); - private MapSet<Integer, Integer> spToRemoveDS2 = new MapSet(); - - // descriptions: map each pair of instances to set of pairs of properties - private Long2LongSetFileMap inCandidates; - private Long2LongSetFileMap eqCandidates; + //private MapSet<Integer, Integer> spToRemoveDS2 = new MapSet(); + private Int2ObjectMap<IntSet> spToRemoveDS2 = new Int2ObjectOpenHashMap<>(); + // stores the In and Eq pairs of each pair instances private DescriptionsSet descriptions; @@ -123,40 +124,6 @@ public class LinkkeyDiscoveryAlgorithm { private LuceneIndex luceneIdx; - /*public LinkkeyDiscoveryAlgorithm() { - this(true, true, true, 1); - } - - public LinkkeyDiscoveryAlgorithm(boolean in, boolean eq, boolean inverse, int compose) { - this(in,eq,inverse,compose,(Set)null,(Set)null,null,null,0.0,10,true,false); - } - - public LinkkeyDiscoveryAlgorithm(boolean in, boolean eq, boolean inverse, int compose, Set<String> typesDS1, Set<String> typesDS2, String p1, String p2, double supportThreshold, int maxOSFactor, boolean filterNotDisc, boolean filterBlankNodes) { - this.generatesIn = in; - this.generatesEq = eq; - this.inverse = inverse; - this.compose = compose; - this.typesDS1=typesDS1; - this.typesDS2=typesDS2; - this.supportThreshold=supportThreshold; - this.maxOSFactor=maxOSFactor; - this.referenceLinks = new LongOpenHashSet(); - this.filterNotDisc=filterNotDisc; - blankNodes =filterBlankNodes?new IntOpenHashSet():null; - this.typesPrefixDS1=p1; - this.typesPrefixDS2=p2; - -// try { -// luceneIdx = new LuceneIndex(); -// } catch (IOException ex) { -// Logger.getLogger(LinkkeyDiscoveryAlgorithm.class.getName()).log(Level.SEVERE, null, ex); -// } - } - - public LinkkeyDiscoveryAlgorithm(boolean in, boolean eq, boolean inverse, int compose, String typeDS1, String typeDS2, double supportThreshold, int maxOSFactor, boolean filterNotDisc, boolean filterBlankNodes) { - this(in, eq, inverse, compose, Collections.singleton(typeDS1), Collections.singleton(typeDS2), null, null, supportThreshold, maxOSFactor, filterNotDisc, filterBlankNodes); - }*/ - public LinkkeyDiscoveryAlgorithm(ExtractionConfig config) { this.config=config; blankNodes = config.filterBlankNodes?new IntOpenHashSet():null; @@ -209,7 +176,6 @@ public class LinkkeyDiscoveryAlgorithm { public void addBlankNode(String n) { if (blankNodes!=null) blankNodes.add(uriIdx.getId(n)); - //System.err.println(n); } public void addURITripleDS1(String s, String p, String o) { @@ -321,7 +287,8 @@ public class LinkkeyDiscoveryAlgorithm { if (oId > -1) { indexTriple(sId, pId, oId, spoIndexDS2Value, opsIndexDS2Value); } else { - spToRemoveDS2.add(sId, pId); + //spToRemoveDS2.add(sId, pId); + spToRemoveDS2.computeIfAbsent(sId, x->new IntOpenHashSet()).add(pId); } } @@ -673,15 +640,11 @@ public class LinkkeyDiscoveryAlgorithm { } /** - * To be called when indexation is finish to compute candidates + * To be called when indexation is finished to compute candidates * @throws IOException */ public void finishIndexDatasets() throws IOException { - //pIdxDS1.storeToDisk(); - //pIdxDS2.storeToDisk(); - - // Store URI and datacache on the disk uriIdx.storeToDisk(); valueIdx.storeToDisk(); @@ -817,12 +780,15 @@ public class LinkkeyDiscoveryAlgorithm { // removeAllBranch all subject-property pairs for which at least one object is not in DS1 // concern only values - for (Map.Entry<Integer, Set<Integer>> e : spToRemoveDS2.entrySet()) { - Int2ObjectOpenHashMap<IntOpenHashSet> v1Map = spoIndexDS2Value.get(e.getKey()); + + for (Int2ObjectMap.Entry<IntSet> e : spToRemoveDS2.int2ObjectEntrySet()) { + //for (Map.Entry<Integer, Set<Integer>> e : spToRemoveDS2.entrySet()) { + Int2ObjectOpenHashMap<IntOpenHashSet> v1Map = spoIndexDS2Value.get(e.getIntKey()); if (v1Map != null) { v1Map.keySet().removeAll(e.getValue()); } } + spToRemoveDS2 = null; @@ -1116,54 +1082,19 @@ public class LinkkeyDiscoveryAlgorithm { config.typesDS1, config.typesDS2,subjectsDS1, subjectsDS2,referenceLinks,prefixMap); - //Map<LongSet, Map<LongSet, CandidateLinkkey>> candidates = new HashMap<>(); - //LinkKeySet candidates = new ExtractionResult(); - // itIn works because an eqcandidate is a incandidate - // LongIterator itIn = inCandidates.keySet().iterator(); - Iterator<Long2ObjectMap.Entry<LongSet>> itIn = inCandidates.entrySetIterator(); - Iterator<Long2ObjectMap.Entry<LongSet>> itEq = null; - - Long2ObjectMap.Entry<LongSet> eqEntry = null ; - - if (eqCandidates!=null) { - itEq = eqCandidates.entrySetIterator(); - if (itEq.hasNext()) { - eqEntry=itEq.next(); - } - } - - //int nblk=0; - while (itIn.hasNext()) { - Long2ObjectMap.Entry<LongSet> inEntry = itIn.next(); - - LongSet eqPairs=LongSets.EMPTY_SET; - if (eqEntry!=null && inEntry.getLongKey() == eqEntry.getLongKey()) { - eqPairs = eqEntry.getValue(); - - if (itEq.hasNext()) { - eqEntry=itEq.next(); - } - } - - // reencode eqPairs - LongSet inPairs = inEntry.getValue(); - - CandidateLinkkey c = res.getOrAddCandidate(eqPairs, inPairs, Collections.emptySet(), Collections.emptySet()); + Iterator<Long2ObjectMap.Entry<LongSet[]>> it = descriptions.entryIterator(); + while (it.hasNext()) { + Long2ObjectMap.Entry<LongSet[]> entry = it.next(); + long link = entry.getLongKey(); - c.addSpecificLink(inEntry.getLongKey()); + Set<IntSet> classes1 = getClassExpression(IntPair.decodeI1(link),instancesTypesDS1); + Set<IntSet> classes2 = getClassExpression(IntPair.decodeI2(link),instancesTypesDS2); + CandidateLinkkey c = res.getOrAddCandidate(entry.getValue()[1], entry.getValue()[0], classes1, classes2); - } + c.addSpecificLink(link); - if (config.eq) { - eqCandidates.close(); - } - if (config.in) { - inCandidates.close(); - } - eqCandidates = null; - inCandidates = null; - + } return res;//finalCandidates; } @@ -1232,11 +1163,4 @@ public class LinkkeyDiscoveryAlgorithm { } return res; } - /*public Int2ObjectMap<IntSet> getClasses2InstancesDS1() { - return getClasses2Instances(instancesTypesDS1); - } - - public Int2ObjectMap<IntSet> getClasses2InstancesDS2() { - return getClasses2Instances(instancesTypesDS2); - }*/ } diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java index e5d628ab13cdb0655266517caea1ccfc9f268bf7..a5e3e7ab9961f37ac123ab4431840ac6108d08f9 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/LinkkeyExtraction.java @@ -28,8 +28,6 @@ import fr.inrialpes.exmo.align.impl.edoal.EDOALAlignment; import fr.inrialpes.exmo.align.impl.edoal.EDOALCell; import fr.inrialpes.exmo.align.impl.renderer.RDFRendererVisitor; import fr.inrialpes.exmo.linkkey.eval.SupervisedEvalMeasures; -import fr.inrialpes.exmo.linkkey.utils.IntPair; -import fr.inrialpes.exmo.linkkey.utils.LinkkeyDiscComparator; import fr.inrialpes.exmo.linkkey.utils.renderer.HtmlRenderer; import fr.inrialpes.exmo.ontowrap.BasicOntology; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; @@ -39,27 +37,21 @@ import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.RandomAccessFile; import java.net.URI; -import java.nio.charset.Charset; import java.nio.file.Files; -import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; @@ -362,15 +354,20 @@ public class LinkkeyExtraction { NumberFormat f = NumberFormat.getInstance(); f.setMaximumFractionDigits(3); - out.print("# links" + "\t" + "# c1" + "\t" + "# c2" + "\t" + "deltaMeasure" + "\t" + "deltaMeasure1" + "\t"+ "deltaMeasure2" + "\t"+ "lift" + "\t" + "hmeandc" + "\t" + "disc." + "\t" + "cov."+ "\t" + "covClass."+ "\t" +"fPrec." + "\t" + "fRec."); + //out.print("# links" + "\t" + "# c1" + "\t" + "# c2" + "\t" + "deltaMeasure" + "\t" + "deltaMeasure1" + "\t"+ "deltaMeasure2" + "\t"+ "lift" + "\t" + "hmeandc" + "\t" + "disc." + "\t" + "cov."+ "\t" + "covClass."+ "\t" +"fPrec." + "\t" + "fRec."); + + out.print("# links" + "\t" + "# c1" + "\t" + "# c2" + + "\t" + "deltaMeasure" + "\t" + "deltaMeasure1" + "\t"+ "deltaMeasure2" + "\t"+ "lift" + + "\t" + "hmeandc" + "\t" + "disc." + "\t" + "cov."+ "\t" + "covClass."+ + "\t" +"fPrec." + "\t" + "fRec."); if (!res.getReferenceLinks().isEmpty()) { out.print("\tf-meas\tprec.\trec.\test. f-meas\test. prec.\test. rec."); } - out.println("\t" + "candidate"+"\t"+"dist to B"+"\t"+"dist to T"); + out.println("\t" + "EQ"+"\t" + "IN"+"\t" + "CL"+"\t"+"dist to B"+"\t"+"dist to T"); for (CandidateLinkkey r : res.getCandidates()) { //+ "\t" + f.format(eval.estPrec(r)) + "\t" + f.format(eval.estRec(r)) + "\t"+ EvalMeasures.hmean(eval.estPrec(r),eval.estRec(r)) out.print(f.format(eval.getSupport(r)) + "\t" + f.format(r.getInstances1Size()) + "\t" + f.format(r.getInstances2Size()) + "\t" - + f.format(eval.deltaMeasure(r))+ "\t"+ f.format(eval.deltaMeasureD1(r))+ "\t"+ f.format(eval.deltaMeasureD2(r))+ "\t"+ f.format(eval.lift(r))+ "\t" + //+ f.format(eval.deltaMeasure(r))+ "\t"+ f.format(eval.deltaMeasureD1(r))+ "\t"+ f.format(eval.deltaMeasureD2(r))+ "\t"+ f.format(eval.lift(r))+ "\t" + f.format(eval.hmeanDiscCov(r)) + "\t" + f.format(eval.discriminability(r)) + "\t" + f.format(eval.coverage(r)) + "\t" + f.format(eval.coverageClassExp(r)) + "\t" + f.format(eval.estPrec(r)) + "\t" + f.format(eval.estRec(r)) + "\t"); diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/RenderForRedesc.java b/src/main/java/fr/inrialpes/exmo/linkkey/RenderForRedesc.java index 022bb5172bfeb96641242169f3dd9d03b824d2ae..825e5181bfe684ee4388001ca5a0ab0874d0d987 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/RenderForRedesc.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/RenderForRedesc.java @@ -18,12 +18,8 @@ */ package fr.inrialpes.exmo.linkkey; -import fr.inrialpes.exmo.align.impl.edoal.Linkkey; import fr.inrialpes.exmo.linkkey.eval.EvalMeasures; import fr.inrialpes.exmo.linkkey.utils.IntPair; -import fr.inrialpes.exmo.linkkey.utils.trie.Trie; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; import it.unimi.dsi.fastutil.ints.IntSets; @@ -38,26 +34,21 @@ import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.NumberFormat; import java.util.ArrayList; -import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.jena.graph.Triple; import org.apache.jena.riot.RDFDataMgr; import org.apache.jena.riot.RDFLanguages; -import org.apache.jena.vocabulary.OWL; import org.apache.jena.vocabulary.RDF; /** @@ -65,167 +56,166 @@ import org.apache.jena.vocabulary.RDF; * @author Jerome David <jerome.david@univ-grenoble-alpes.fr> */ public class RenderForRedesc { + private ExtractionResult result; private LinkKeyRenderer renderer; private Path outputDir; - + private Path ds1; private Path ds2; - + private Object2IntMap<String> types1; private Object2IntMap<String> types2; - - - private Map<String,BitSet> instanceTypes1; - private Map<String,BitSet> instanceTypes2; - - + + private Map<String, BitSet> instanceTypes1; + private Map<String, BitSet> instanceTypes2; + public RenderForRedesc(ExtractionResult result, LinkKeyRenderer renderer, Path outputDir, Path ds1, Path ds2) { - this.result=result; - this.renderer=renderer; - this.outputDir=outputDir; - this.ds1=ds1; - this.ds2=ds2; + this.result = result; + this.renderer = renderer; + this.outputDir = outputDir; + this.ds1 = ds1; + this.ds2 = ds2; } - - - + public void run() throws IOException { - + indexTypes(); Files.createDirectories(outputDir); - int i=0; - + int i = 0; + NumberFormat f = NumberFormat.getInstance(); f.setMaximumFractionDigits(3); EvalMeasures eval = result.getUnsupervisedEval(); - - Comparator<CandidateLinkkey> comp = (CandidateLinkkey o1, CandidateLinkkey o2) -> o1.getLinksSize()-o2.getLinksSize(); + + Comparator<CandidateLinkkey> comp = (CandidateLinkkey o1, CandidateLinkkey o2) -> o1.getLinksSize() - o2.getLinksSize(); ArrayList<CandidateLinkkey> l = new ArrayList<>(result.getCandidates()); Collections.sort(l, comp.reversed()); for (CandidateLinkkey c : l) { - + try { - + Path currentDir = outputDir.resolve(String.valueOf(i)); Files.createDirectory(currentDir); //create output files - PrintWriter linksOut = new PrintWriter(Files.newBufferedWriter(currentDir.resolve("links.txt"),StandardOpenOption.CREATE)); - PrintWriter view1Out = new PrintWriter(Files.newBufferedWriter(currentDir.resolve("view1.txt"),StandardOpenOption.CREATE)); - - PrintWriter view2Out = new PrintWriter(Files.newBufferedWriter(currentDir.resolve("view2.txt"),StandardOpenOption.CREATE)); - - + PrintWriter linksOut = new PrintWriter(Files.newBufferedWriter(currentDir.resolve("links.txt"), StandardOpenOption.CREATE)); + PrintWriter view1Out = new PrintWriter(Files.newBufferedWriter(currentDir.resolve("view1.txt"), StandardOpenOption.CREATE)); + + PrintWriter view2Out = new PrintWriter(Files.newBufferedWriter(currentDir.resolve("view2.txt"), StandardOpenOption.CREATE)); + // print the candidate //linksOut.println(renderer.toString(c, true)); - linksOut.println("id"+ "\t" +"# links" + "\t" + "# c1" + "\t" + "# c2" + "\t" + "disc." + "\t" + "cov." + "\t" + "candidate"); - linksOut.println(i+ "\t" +eval.getSupport(c) + "\t" + c.getInstances1Size() + "\t" + c.getInstances2Size() + "\t" + f.format(eval.discriminability(c)) + "\t" + f.format(eval.coverage(c)) + "\t" + renderer.toString(c, true)); - - + linksOut.println("id" + "\t" + "# links" + "\t" + "# c1" + "\t" + "# c2" + "\t" + "disc." + "\t" + "cov." + "\t" + "candidate"); + linksOut.println(i + "\t" + eval.getSupport(c) + "\t" + c.getInstances1Size() + "\t" + c.getInstances2Size() + "\t" + f.format(eval.discriminability(c)) + "\t" + f.format(eval.coverage(c)) + "\t" + renderer.toString(c, true)); + //print the classes - renderClasses(types1,view1Out); - renderClasses(types2,view2Out); - + renderClasses(types1, view1Out); + renderClasses(types2, view2Out); + // print the eqClasses - List<IntSet[]> eqClasses = computeEqClasses(c); + List<IntSet[]> eqClasses = computeEqClasses(c); BitSet types = new BitSet(); for (IntSet[] cl : eqClasses) { - + for (int inst : cl[0]) { - // print the eq class - linksOut.print(renderer.decodeAbbrevS1((long)inst<<32)); - linksOut.print(';'); - - String iUri = renderer.decodeS1((long)inst<<32); - - //System.out.println(types1.get(iUri)) - if (instanceTypes1.containsKey(iUri)) types.or(instanceTypes1.get(iUri)); - // + // print the eq class + linksOut.print(renderer.decodeAbbrevS1((long) inst << 32)); + linksOut.print(';'); + + String iUri = renderer.decodeS1((long) inst << 32); + + //System.out.println(types1.get(iUri)) + if (instanceTypes1.containsKey(iUri)) { + types.or(instanceTypes1.get(iUri)); + } + // } //view1Out.println(types); - renderBS(types,types1.size(),view1Out); - + renderBS(types, types1.size(), view1Out); + types.clear(); - + for (int inst : cl[1]) { - linksOut.print(renderer.decodeAbbrevS2(inst)); - linksOut.print(';'); - - String iUri = renderer.decodeS2(inst); - if (instanceTypes2.containsKey(iUri)) types.or(instanceTypes2.get(iUri)); + linksOut.print(renderer.decodeAbbrevS2(inst)); + linksOut.print(';'); + + String iUri = renderer.decodeS2(inst); + if (instanceTypes2.containsKey(iUri)) { + types.or(instanceTypes2.get(iUri)); + } } //view2Out.println(types); - renderBS(types,types2.size(),view2Out); + renderBS(types, types2.size(), view2Out); types.clear(); - + linksOut.println(); } linksOut.close(); view1Out.close(); view2Out.close(); - + } catch (IOException ex) { Logger.getLogger(RenderForRedesc.class.getName()).log(Level.SEVERE, null, ex); } - i+=1; + i += 1; } - + } - + private static void renderBS(BitSet bs, int nb, PrintWriter out) { - for (int i=0; i<nb-1 ; i++) { - out.print(bs.get(i)?'1':'0'); + for (int i = 0; i < nb - 1; i++) { + out.print(bs.get(i) ? '1' : '0'); out.print(';'); } - if (nb>0) { - out.print(bs.get(nb-1)?'1':'0'); + if (nb > 0) { + out.print(bs.get(nb - 1) ? '1' : '0'); } out.println(); } - + private static void renderClasses(Object2IntMap<String> classes, PrintWriter out) { - if (classes.size()>0) { + if (classes.size() > 0) { String[] classesA = new String[classes.size()]; - classes.forEach((c,id)->classesA[id]=c); - + classes.forEach((c, id) -> classesA[id] = c); + out.print(classesA[0]); - for (int i=1;i<classesA.length;i++) { + for (int i = 1; i < classesA.length; i++) { out.print(';'); out.print(classesA[i]); } } out.println(); } - + private void indexTypes() { try { types1 = new Object2IntOpenHashMap<>(); - instanceTypes1=indexTypes(ds1,types1); + instanceTypes1 = indexTypes(ds1, types1); types2 = new Object2IntOpenHashMap<>(); - instanceTypes2=indexTypes(ds2,types2); + instanceTypes2 = indexTypes(ds2, types2); } catch (IOException ex) { Logger.getLogger(RenderForRedesc.class.getName()).log(Level.SEVERE, null, ex); } - + } - - private static Map<String,BitSet> indexTypes(Path ds, Object2IntMap classes) throws IOException { - int nextId=0; - Map<String,BitSet> res =new HashMap<>(); - Iterator<Triple> it = RDFDataMgr.createIteratorTriples(Files.newInputStream(ds),RDFLanguages.filenameToLang(ds.getFileName().toString()),""); + + private static Map<String, BitSet> indexTypes(Path ds, Object2IntMap classes) throws IOException { + int nextId = 0; + Map<String, BitSet> res = new HashMap<>(); + Iterator<Triple> it = RDFDataMgr.createIteratorTriples(Files.newInputStream(ds), RDFLanguages.filenameToLang(ds.getFileName().toString()), ""); while (it.hasNext()) { Triple t = it.next(); - + if (t.getPredicate().getURI().equals(RDF.type.getURI()) && t.getSubject().isURI()) { String s = t.getSubject().getURI(); BitSet types = res.get(s); - if (types==null) { + if (types == null) { types = new BitSet(); res.put(s, types); } String classUri = t.getObject().getURI(); - int currentId= classes.getOrDefault(classUri, -1); - if (currentId==-1) { + int currentId = classes.getOrDefault(classUri, -1); + if (currentId == -1) { classes.put(classUri, nextId++); } types.set(classes.getInt(classUri)); @@ -233,27 +223,26 @@ public class RenderForRedesc { } return res; } - - + public List<IntSet[]> computeEqClasses(CandidateLinkkey c) { List<IntSet[]> partition = new ArrayList<>(); LongList links = new LongArrayList(c.getLinks()); LongSet instances = new LongOpenHashSet(); - + while (!links.isEmpty()) { LongListIterator it = links.iterator(); - IntSet[] eqClass = {new IntOpenHashSet(),new IntOpenHashSet()}; + IntSet[] eqClass = {new IntOpenHashSet(), new IntOpenHashSet()}; long l = it.nextLong(); - int i1=IntPair.decodeI1(l); - int i2=IntPair.decodeI2(l); + int i1 = IntPair.decodeI1(l); + int i2 = IntPair.decodeI2(l); eqClass[0].add(i1); eqClass[1].add(i2); instances.add(i1); instances.add(i2); it.remove(); - boolean added=false; + boolean added = false; do { - added=false; + added = false; it = links.iterator(); while (it.hasNext()) { l = it.nextLong(); @@ -261,13 +250,12 @@ public class RenderForRedesc { eqClass[1].add(IntPair.decodeI2(l)); instances.add(IntPair.decodeI2(l)); it.remove(); - added=true; - } - else if (eqClass[1].contains(IntPair.decodeI2(l))) { + added = true; + } else if (eqClass[1].contains(IntPair.decodeI2(l))) { eqClass[0].add(IntPair.decodeI1(l)); instances.add(IntPair.decodeI1(l)); it.remove(); - added=true; + added = true; } } } while (added); @@ -275,16 +263,16 @@ public class RenderForRedesc { } for (int i : result.getInstances1()) { if (!instances.contains(i)) { - partition.add(new IntSet[]{IntSets.singleton(i),IntSets.EMPTY_SET}); + partition.add(new IntSet[]{IntSets.singleton(i), IntSets.EMPTY_SET}); } } for (int i : result.getInstances2()) { if (!instances.contains(i)) { - partition.add(new IntSet[]{IntSets.EMPTY_SET,IntSets.singleton(i)}); + partition.add(new IntSet[]{IntSets.EMPTY_SET, IntSets.singleton(i)}); } } return partition; - + } - + } diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/DescriptionsSet.java b/src/main/java/fr/inrialpes/exmo/linkkey/utils/DescriptionsSet.java similarity index 91% rename from src/main/java/fr/inrialpes/exmo/linkkey/DescriptionsSet.java rename to src/main/java/fr/inrialpes/exmo/linkkey/utils/DescriptionsSet.java index fc5bf5ac272d315d86899525ff6af26d8788b945..4f340063b246c823b38f0bb8b06f02527723c1f4 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/DescriptionsSet.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/utils/DescriptionsSet.java @@ -16,7 +16,7 @@ * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -package fr.inrialpes.exmo.linkkey; +package fr.inrialpes.exmo.linkkey.utils; import it.unimi.dsi.fastutil.longs.*; import it.unimi.dsi.fastutil.objects.ObjectIterator; @@ -30,15 +30,26 @@ import java.util.Iterator; */ public class DescriptionsSet { + /** + * Associate each pair od properties to an int identifier + */ private final Long2IntMap propertyPairsIdx; + + /** + * reverse index: the position "id" of this list + * gives the pair of property associated + */ private final LongList propertyPairs; + /** + * used to build the maximal description + */ private final LongSet inPairs; private final LongSet eqPairs; private final Long2ObjectMap<BitSet[]> descriptions; - private FileChannel fc; + private FileChannel fcIn; public DescriptionsSet() { propertyPairsIdx = new Long2IntOpenHashMap(); @@ -49,7 +60,8 @@ public class DescriptionsSet { propertyPairs = new LongArrayList(); inPairs = new LongOpenHashSet(); eqPairs = new LongOpenHashSet(); - //fc.m + + //fcIn.map(FileChannel.MapMode.READ_WRITE, 0, 0); } diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/utils/Long2LongSetFileMap.java b/src/main/java/fr/inrialpes/exmo/linkkey/utils/Long2LongSetFileMap.java deleted file mode 100644 index 88757ca89f97cb2e6820eee01a25fc52919fb377..0000000000000000000000000000000000000000 --- a/src/main/java/fr/inrialpes/exmo/linkkey/utils/Long2LongSetFileMap.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (C) 2014-2018 Jerome David <jerome.david@univ-grenoble-alpes.fr> - * - * This file is part of Linkex. - * - * Linkex is free software: you can redistribute it and/or modify - * it under the terms of the Lesser GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Linkex is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -package fr.inrialpes.exmo.linkkey.utils; - -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.longs.AbstractLong2ObjectMap; -import it.unimi.dsi.fastutil.longs.Long2ObjectAVLTreeMap; -import it.unimi.dsi.fastutil.longs.Long2ObjectMap; -import it.unimi.dsi.fastutil.longs.Long2ObjectSortedMap; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongList; -import it.unimi.dsi.fastutil.longs.LongOpenHashSet; -import it.unimi.dsi.fastutil.longs.LongSet; -import java.io.Externalizable; -import java.io.File; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.nio.ByteBuffer; -import java.nio.MappedByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Maps long (i.e. pair of subjects) to a set of long (set of pairs of properties) - * It uses a file to flush the map when it becomes too large. - * @author Jerome David <jerome.david@univ-grenoble-alpes.fr> - */ -public class Long2LongSetFileMap { - - /** - * The number of 4 bytes integers needed to flush this index - The serialization of the index consists in storing for each key: - the key (8 bytes), the number of values (4 bytes) and the value (n*8bytes) - */ - private int counter; - - /** - * the current index with keys sorted - * */ - private Long2ObjectSortedMap<LongSet> index; - - private IntArrayList lengths; - - private File data; - private FileChannel fc; - - private long bufStart; - - /** - * The maximal numer of 4-bytes integers stored in the map - */ - private final long maxIndexSize = 1000000; - - - private List<MappedByteBuffer> buffers; - - long lastPos = 0; - - public Long2LongSetFileMap() throws IOException { - - data = File.createTempFile(this.getClass().getName(), ".dat"); - data.deleteOnExit(); - fc = FileChannel.open(data.toPath(), StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE); - - counter = 0; - bufStart = 0; - - index = new Long2ObjectAVLTreeMap<>(); - lengths = new IntArrayList(); - buffers = new ArrayList<>(); - //Runtime.getRuntime().gc(); - // use half of memory - //maxIndexSize = (Runtime.getRuntime().freeMemory()/8); - //System.out.println("maxIndexSize="+maxIndexSize); - - } - - public Long2LongSetFileMap(File data) throws IOException { - this.data = data; - fc = FileChannel.open(data.toPath(), StandardOpenOption.WRITE, StandardOpenOption.READ); - counter = 0; - bufStart = 0; - - index = new Long2ObjectAVLTreeMap<>(); - lengths = new IntArrayList(); - buffers = new ArrayList<>(); - //Runtime.getRuntime().gc(); - // use half of memory - //maxIndexSize = (Runtime.getRuntime().freeMemory()/8); - } - - public void putAdd(long key, long value) { - LongSet l = index.get(key); - if (l == null) { - index.put(key, l = new LongOpenHashSet()); - // the key 2 + length of the value set 1 - counter += 3; - } - if (l.add(value)) { - counter += 2; - } - if (counter > maxIndexSize) { - flush(); - } - } - - public void put(int s1, int s2, int e1, int e2) { - long key = IntPair.encode(s1, s2); - long value = IntPair.encode(e1, e2); - putAdd(key, value); - - } - - public void flush() { - - if (index.isEmpty()) return; - - try { - lengths.add(index.size()); - //pos.add(lastPos); - - MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_WRITE, fc.size(), counter * Integer.BYTES); - - for (Long2ObjectMap.Entry<LongSet> ent : index.long2ObjectEntrySet()) { - buf.putLong(ent.getLongKey()); - buf.putInt(ent.getValue().size()); - for (long l : ent.getValue()) { - buf.putLong(l); - } - - } - //lastPos+=this.getPosition(); - //System.out.println(this+" , "+fc.size()+" , "+counter+" FLUSH"); - counter = 0; - buffers.add(buf); - - } catch (IOException ex) { - Logger.getLogger(Long2LongSetFileMap.class.getName()).log(Level.SEVERE, null, ex); - } - index.clear(); - } - - - - public Iterator<Long2ObjectMap.Entry<LongSet>> entrySetIterator() throws IOException { - flush(); - return new Iterator<Long2ObjectMap.Entry<LongSet>>() { - - private IntArrayList indexes = new IntArrayList(); - private LongArrayList keys = new LongArrayList(); - private IntArrayList sizes = new IntArrayList(); - private IntArrayList lengthsCopy = lengths.clone(); - - private List<MappedByteBuffer> buffersCopy = new ArrayList(buffers); - - { - for (ByteBuffer buf : buffersCopy) { - buf.position(0);//.rewind(); - indexes.add(0); - keys.add(buf.getLong()); - sizes.add(buf.getInt()); - } - - } - - @Override - public boolean hasNext() { - return !indexes.isEmpty(); - } - - public void readEntry() { - - } - - @Override - public Long2ObjectMap.Entry<LongSet> next() { - if (indexes.isEmpty()) { - throw new NoSuchElementException(); - } - - long min = Long.MAX_VALUE; - for (long k : keys) { - if (k < min) { - min = k; - } - } - - LongSet pairs = new LongOpenHashSet(); - - for (int i = 0; i < keys.size(); i++) { - if (keys.getLong(i) == min) { - - ByteBuffer buf = buffersCopy.get(i); - - for (int j = 0; j < sizes.getInt(i); j++) { - pairs.add(buf.getLong()); - } - - int nextIdx = indexes.getInt(i) + 1; - if (nextIdx == lengthsCopy.getInt(i)) { - indexes.removeInt(i); - keys.removeLong(i); - sizes.removeInt(i); - buffersCopy.remove(i); - lengthsCopy.removeInt(i); - - } else { - - // read following key and size - keys.set(i, buf.getLong()); - sizes.set(i, buf.getInt()); - // update counters - indexes.set(i, nextIdx); - } - - } - } - // coud be improved by resuing BasicEntry instance - return new AbstractLong2ObjectMap.BasicEntry<>(min, pairs); - - } - - }; - } - - - public void close() throws IOException { - buffers.clear(); - fc.close(); - //data.delete(); - } -} diff --git a/src/main/java/fr/inrialpes/exmo/linkkey/utils/TripleIndex.java b/src/main/java/fr/inrialpes/exmo/linkkey/utils/TripleIndex.java index 7a61f759a364b6988423d0a8ccede6146b2e0822..3381041f46a4babf9d049bfd74007f7bb2823b25 100644 --- a/src/main/java/fr/inrialpes/exmo/linkkey/utils/TripleIndex.java +++ b/src/main/java/fr/inrialpes/exmo/linkkey/utils/TripleIndex.java @@ -21,7 +21,7 @@ package fr.inrialpes.exmo.linkkey.utils; import it.unimi.dsi.fastutil.ints.*; /** - * Index that store triple (a,b,c) as a Maps b Map c + * Index that stores triple (a,b,c) as a Maps b Map c * @author jdavid */ public class TripleIndex extends Int2ObjectOpenHashMap<Int2ObjectOpenHashMap<IntOpenHashSet>> {