diff --git a/src/kdiv/DoubleMapSimilarity.java b/src/kdiv/DoubleMapSimilarity.java new file mode 100644 index 0000000000000000000000000000000000000000..abe4e76788796af43410d73c731df8b59ffb5afa --- /dev/null +++ b/src/kdiv/DoubleMapSimilarity.java @@ -0,0 +1,24 @@ +package kdiv; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +class DoubleMapSimilarity<T> implements Similarity<T> { + private final Map<T, Map<T, Double>> similarityMap = new HashMap<>(); + + public void addSimilarity(T obj1, T obj2, double similarity) { + similarityMap.computeIfAbsent(obj1, k -> new HashMap<>()).put(obj2, similarity); + + similarityMap.computeIfAbsent(obj2, k -> new HashMap<>()) .put(obj1, similarity); //Symmetry of similarity + } + + @Override + public Double getSimilarity(Object obj1, Object obj2) { + if (obj1 instanceof String && obj2 instanceof String) { + return Optional.ofNullable(similarityMap.get(obj1)).map(m -> m.get(obj2)).orElse(0.0); + } + return null; //Return 0.0 if no similarity is found + } + +} \ No newline at end of file diff --git a/src/kdiv/Main.java b/src/kdiv/Main.java index 3fc815ecb684878714b79cca18828f5abab7cadf..009e4930d634d5febee68446d86899218968707c 100644 --- a/src/kdiv/Main.java +++ b/src/kdiv/Main.java @@ -1,8 +1,27 @@ package kdiv; +import java.util.Map; + + public class Main { - public static void main(String[] args) { - System.out.println("Hello, World!"); - } + + static final String UNSTRUCT = "unstructdist"; + static final String LINEAR = "linearstructdist"; + static final String GRAPHSEM = "graphsemdistinit"; + static final String NAMESEM = "namesemdistinit"; + + public static void main(String[] args) throws Exception { + Sample samp = new Sample(); + Map<String, Double> distrib = samp.createSampleDistribution(); + DoubleMapSimilarity<String> similarity = samp.createSampleDistance(GRAPHSEM); + //For this sample data, we expect a : positive & decreasing result + for (int i = 0 ; i <= 10 ; i++) { + double diversityAti = MathUtils.diversity(distrib, similarity, i); + System.out.println("Diversity at "+ i +" is "+ diversityAti); + } + } } + + + diff --git a/src/kdiv/MathUtils.java b/src/kdiv/MathUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..90d009d8b2197eea0a585191db859b8ae0e7d19f --- /dev/null +++ b/src/kdiv/MathUtils.java @@ -0,0 +1,130 @@ +package kdiv; + +import java.util.HashMap; +import java.util.Map; + +/** + * Provide mathematical tools. + * + * Normalize values to a [0, 1] scale. + * Convert a distance to a similarity using exponential. + * Compute diversity for q [0, inf[ using prodation or summation. + * + */ +public class MathUtils { + + /** + * Converts a given distance to a similarity using exponentinal. + * formula : similarity = exp(-distance) + * + * @param distance The distance value. + * @return The similarity score computed as exp(-distance). + */ + public static double distanceToSimilarity(double distance) { + return Math.exp(-distance); + } + + /** + * Normalize a Map<T, Double>. + * + * @param <T> + * @param distrib + * @return + */ + public static <T> Map<T, Double> relativeAbundance(Map<T, Double> distrib) { + Map<T, Double> rab = new HashMap<>(); + double total = 0.0; + + //Count = cardinal + for (Double count : distrib.values()) { + total += count; + } + //Avoid division by zero + if (total == 0.0) { + return rab; + } + for (T key : distrib.keySet()) { + rab.put(key, distrib.get(key) / total); + } + //Relative abundance + return rab; + } + + /** + * Return the diversity for each mesure where q!=1. + * Defined in definition 6.1.5 of Leinster's book. + * @param <T> + * @param distrib + * @param sim + * @param exp + * @return + */ + public static <T> double summation(Map<T, Double> distrib, DoubleMapSimilarity<String> similarity , double exp) { + Map<T, Double> rab = relativeAbundance(distrib); + //System.out.println("RAB" + rab.toString()); + double result = 0.0; + + //Loop over each category (cat) + for (T cat : distrib.keySet()) { + if (distrib.get(cat) != 0) { + double innerResult = 0.0; + //Inner summation over each category (ocat) + for (T ocat : distrib.keySet()) { + if (distrib.get(ocat) != 0) { + //We assume that sim.get(cat).get(ocat) exists. + Double similaritya = (distanceToSimilarity(similarity.getSimilarity(cat, ocat))); + innerResult += rab.get(ocat) * similaritya; + } + } + result += rab.get(cat) * Math.pow(innerResult, exp); + } + } + return result; + } + + /** + * Return the diversity when q==1. + * Defined in definition 6.1.5 of Leinster's book. + * @param <T> + * @param distrib + * @param sim + * @return + */ + public static <T> double prodation(Map<T, Double> distrib, DoubleMapSimilarity<String> similarity) { + Map<T, Double> rab = relativeAbundance(distrib); + double result = 1.0; + + //Loop over each category (cat) + for (T cat : distrib.keySet()) { + if (distrib.get(cat) != 0) { + double innerResult = 0.0; + //Inner summation over each category (ocat) + for (T ocat : distrib.keySet()) { + if (distrib.get(ocat) != 0) { + Double similaritya = similarity.getSimilarity(cat, ocat); + innerResult += rab.get(ocat) * (similaritya != null ? (distanceToSimilarity(similaritya)) : 1); //exp(0) = 1 + } + } + result *= Math.pow(innerResult, -rab.get(cat)); + } + } + return result; + } + + /** + * Compute diversity and call the right function depending on q. + * @param <T> + * @param distrib + * @param sim + * @param q + * @return + */ + public static <T> double diversity(Map<T, Double> distrib, DoubleMapSimilarity<String> sim, double q) { + if (q==1) { + return prodation(distrib, sim); + } else { + double sum = summation(distrib, sim, q-1); + return Math.pow(sum, 1/(1-q)); + } + } +} diff --git a/src/kdiv/Sample.java b/src/kdiv/Sample.java new file mode 100644 index 0000000000000000000000000000000000000000..eea5b733e97ef4c6af9ec611fc4916f8dac7b9d4 --- /dev/null +++ b/src/kdiv/Sample.java @@ -0,0 +1,152 @@ +package kdiv; + +import java.util.HashMap; +import java.util.Map; + +public class Sample { + + + /** + * Create a sample distribution as a HashMap. For now, the sample is equidistributed + * @return + */ + public Map<String, Double> createSampleDistribution() { + Map<String, Double> distrib = new HashMap<>(); + + //There is (for now) only a distribution where all the ontology are equally diverse + //In order to show all distribution used in the python code, we might have to do a map of map so that we can store them + distrib.put("A", 0.2); + distrib.put("B", 0.2); + distrib.put("C", 0.2); + distrib.put("D", 0.2); + distrib.put("E", 0.2); + return distrib; + } + + /** + * Creates a sample distance matrix as a nested HashMap. + * Distances are assumed to be normalized between 0 and 1 //But we don't care, it's still working + * @param graph + * @return + * @throws Exception + */ + public DoubleMapSimilarity<String> createSampleDistance(String graph) throws Exception { + DoubleMapSimilarity<String> similarity = new DoubleMapSimilarity<>(); + + if (graph.equals("unstructdist")) { + /* + * + * Used Matrix : {A, B, C, D, E} with unstructdist + * + * A B C D E + * A| 0 | 1.0 | 1.0 | 1.0 | 1.0 | + * B| 1.0 | 0 | 1.0 | 1.0 | 1.0 | + * C| 1.0 | 1.0 | 0 | 1.0 | 1.0 | + * D| 1.0 | 1.0 | 1.0 | 0 | 1.0 | + * E| 1.0 | 1.0 | 1.0 | 1.0 | 0 | + */ + + similarity.addSimilarity("A", "B", 1.0); + similarity.addSimilarity("A", "C", 1.0); + similarity.addSimilarity("A", "D", 1.0); + similarity.addSimilarity("A", "E", 1.0); + + similarity.addSimilarity("B", "C", 1.0); + similarity.addSimilarity("B", "D", 1.0); + similarity.addSimilarity("B", "E", 1.0); + + similarity.addSimilarity("C", "D", 1.0); + similarity.addSimilarity("C", "E", 1.0); + + similarity.addSimilarity("D", "E", 1.0); + + + + } + else if (graph.equals("linearstructdist")) { + /* + * + * Used Matrix : {A, B, C, D, E} with linearstructdist + * + * A B C D E + * A| 0 | 1.0 | 2.0 | 3.0 | 4.0 | + * B| 1.0 | 0 | 1.0 | 2.0 | 3.0 | + * C| 2.0 | 1.0 | 0 | 1.0 | 2.0 | + * D| 3.0 | 2.0 | 1.0 | 0 | 1.0 | + * E| 4.0 | 3.0 | 2.0 | 1.0 | 0 | + */ + similarity.addSimilarity("A", "B", 1.0); + similarity.addSimilarity("A", "C", 2.0); + similarity.addSimilarity("A", "D", 3.0); + similarity.addSimilarity("A", "E", 4.0); + + similarity.addSimilarity("B", "C", 1.0); + similarity.addSimilarity("B", "D", 2.0); + similarity.addSimilarity("B", "E", 3.0); + + similarity.addSimilarity("C", "D", 1.0); + similarity.addSimilarity("C", "E", 2.0); + + similarity.addSimilarity("D", "E", 1.0); + + + } else if (graph.equals("graphsemdistinit")) { + /* + * + * Used Matrix : {A, B, C, D, E} with graphsemdistinit + * + * A B C D E + * A| 0 | 1/3 | 2/3 | 1.0 | 1.0 | + * B| 1/3 | 0 | 1/3 | 1.0 | 1.0 | + * C| 2/3 | 1/3 | 0 | 1/3 | 2/3 | + * D| 1.0 | 1.0 | 1/3 | 0 | 1/3 | + * E| 1.0 | 1.0 | 2/3 | 1/3 | 0 | + */ + similarity.addSimilarity("A", "B", 1.0/3.0); + similarity.addSimilarity("A", "C", 2.0/3.0); + similarity.addSimilarity("A", "D", 1.0); + similarity.addSimilarity("A", "E", 1.0); + + similarity.addSimilarity("B", "C", 1.0/3.0); + similarity.addSimilarity("B", "D", 1.0); + similarity.addSimilarity("B", "E", 1.0); + + similarity.addSimilarity("C", "D", 1.0/3.0); + similarity.addSimilarity("C", "E", 2.0/3.0); + + similarity.addSimilarity("D", "E", 1.0/3.0); + + + } else if (graph.equals("namesemdistinit")) { + /* + * + * Used Matrix : {A, B, C, D, E} with graphsemdistinit + * + * A B C D E + * A| 0 | 4/6 | 2/6 | 2/6 | 3/6 | + * B| 4/6 | 0 | 2/4 | 2/4 | 2/6 | + * C| 2/6 | 2/4 | 0 | 2/4 | 2/6 | + * D| 2/6 | 2/4 | 2/4 | 0 | 4/6 | + * E| 3/6 | 2/6 | 2/6 | 4/6 | 0 | + */ + similarity.addSimilarity("A", "B", 4.0/6.0); + similarity.addSimilarity("A", "C", 2.0/6.0); + similarity.addSimilarity("A", "D", 2.0/6.0); + similarity.addSimilarity("A", "E", 3.0/6.0); + + similarity.addSimilarity("B", "C", 2.0/4.0); + similarity.addSimilarity("B", "D", 2.0/4.0); + similarity.addSimilarity("B", "E", 2.0/6.0); + + similarity.addSimilarity("C", "D", 2.0/4.0); + similarity.addSimilarity("C", "E", 2.0/6.0); + + similarity.addSimilarity("D", "E", 4.0/6.0); + + } else { + throw new Exception("Name of the graph does not exist, error in createSampleDistance"); + } + return similarity; + } + +} diff --git a/src/kdiv/Similarity.java b/src/kdiv/Similarity.java new file mode 100644 index 0000000000000000000000000000000000000000..8ca8a2031701e96a2099880e2a7e5dd82de7be3e --- /dev/null +++ b/src/kdiv/Similarity.java @@ -0,0 +1,11 @@ +package kdiv; + +/** + * The Similarity class get the similarity between two categories. + */ +public interface Similarity<T> { + + Double getSimilarity(T obj1, T obj2); + +} +