diff --git a/src/kdiv/DiversityResult.java b/src/kdiv/DiversityResult.java new file mode 100644 index 0000000000000000000000000000000000000000..67155378a153dc8e1e366330055020148111766d --- /dev/null +++ b/src/kdiv/DiversityResult.java @@ -0,0 +1,30 @@ +package kdiv; + +import java.util.Map; + +/** + * A container class to store the diversity value and the optimal distribution. + */ + +public class DiversityResult<T> { + private final double maxDiversity; + private final Map<T, Double> distribution; + + public DiversityResult(double maxDiversity, Map<T, Double> distribution) { + this.maxDiversity = maxDiversity; + this.distribution = distribution; + } + + public double getMaxDiversity() { + return maxDiversity; + } + + public Map<T, Double> getDistribution() { + return distribution; + } + + @Override + public String toString() { + return "Max Diversity: " + maxDiversity + "\nDistribution: " + distribution; + } +} diff --git a/src/kdiv/DoubleMapSimilarity.java b/src/kdiv/DoubleMapSimilarity.java index f402068e82c1c0311fd416d0bc78448f1dbbbeae..61e77e5c50431a187d926d4d38f676003e0053d0 100644 --- a/src/kdiv/DoubleMapSimilarity.java +++ b/src/kdiv/DoubleMapSimilarity.java @@ -3,6 +3,7 @@ package kdiv; import java.util.HashMap; import java.util.Map; import java.util.Optional; +import java.util.Set; class DoubleMapSimilarity<T> implements Similarity<T> { private final Map<T, Map<T, Double>> similarityMap = new HashMap<>(); @@ -31,4 +32,32 @@ class DoubleMapSimilarity<T> implements Similarity<T> { } return null; //Return 0.0 if no similarity is found (But for now it return null if cat1.class != cat2.class) } + + + // #TODO Not sure on how to do that ### + /*public Double getSimilarity(Integer int1, Integer int2) { + double res = int2-int1; + return res; + }*/ + + // --- Functions used in the maximum diversity computation --- + + /** + * Get the size of similarityMap + * @return + */ + public int getSize() { + return similarityMap.size(); + } + + + /** + * Get the keyset of similarityMap + * @return + */ + public Set<T> getKey() { + return similarityMap.keySet(); + } + + } \ No newline at end of file diff --git a/src/kdiv/Main.java b/src/kdiv/Main.java index c4de608daecc4893e42de3ef60c225380f97484c..b28fdf69cbd66101fa7c91f1ad16d088e86167ad 100644 --- a/src/kdiv/Main.java +++ b/src/kdiv/Main.java @@ -3,6 +3,10 @@ package kdiv; import java.util.HashMap; import java.util.Map; +/** + * Main used for test now. + */ + public class Main { static final String UNSTRUCT = "unstructdist"; @@ -18,34 +22,47 @@ public class Main { * - Compute the diversity with q = i */ Sample samp = new Sample(); - Map<String, Map<String, Double>> distrib = samp.createSampleDistribution(); - Map<String, DoubleMapSimilarity<String>> similarity = new HashMap<>(); + Map<Object, Map<Object, Double>> distrib = samp.createSampleDistribution(); + Map<Object, DoubleMapSimilarity<Object>> similarity = new HashMap<>(); + similarity.put(UNSTRUCT, samp.createSampleDistance(UNSTRUCT)); similarity.put(LINEAR, samp.createSampleDistance(LINEAR)); similarity.put(GRAPHSEM, samp.createSampleDistance(GRAPHSEM)); similarity.put(NAMESEM, samp.createSampleDistance(NAMESEM)); - //For this sample data, we expect a : positive & decreasing result + //Iterate over each distance matrix and compute max diversity + for (Map.Entry<Object, DoubleMapSimilarity<Object>> distanceMatrix : similarity.entrySet()) { + Object matrix_key = distanceMatrix.getKey(); + DoubleMapSimilarity<Object> distanceMatrix_i = distanceMatrix.getValue(); + + //Compute max diversity + DiversityResult<Object> maxDivResult = MathUtils.maxDiversity(distanceMatrix_i); - - for (Map.Entry<String, DoubleMapSimilarity<String>> distanceMatrix : similarity.entrySet()) { - String matrix_key = distanceMatrix.getKey(); - DoubleMapSimilarity<String> distanceMatrix_i = distanceMatrix.getValue(); - System.out.println("--- MATRIX : "+ matrix_key); - for (Map.Entry<String, Map<String, Double>> entry : distrib.entrySet()) { - String distrib_key = entry.getKey(); - Map<String, Double> distrib_i = entry.getValue(); - System.out.println("--- KEY : "+ distrib_key); - for (int i = 0 ; i <= 10 ; i++) { - double diversityAti = MathUtils.diversity(distrib_i, distanceMatrix_i, i); - System.out.println("Diversity at "+ i +" is "+ diversityAti); + //Extract values + double maxDiversity = maxDivResult.getMaxDiversity(); + Map<Object, Double> optimalDistribution = maxDivResult.getDistribution(); + + //Print results + System.out.println("=== MATRIX: " + matrix_key + " ==="); + System.out.println("Max Diversity: " + maxDiversity); + System.out.println("Optimal Distribution: " + optimalDistribution); + System.out.println("------------------------------------\n"); + for (Map.Entry<Object, Map<Object, Double>> entry : distrib.entrySet()) { + Object distrib_key = entry.getKey(); + Map<Object, Double> distrib_i = entry.getValue(); + + System.out.println("--- KEY : "+ distrib_key); + + for (int i = 0 ; i <= 10 ; i++) { + double diversityAti = MathUtils.diversity(distrib_i, distanceMatrix_i, i); + System.out.println("Diversity at q = "+ i +" is "+ diversityAti); + } + } + System.out.println(); + + } } - } - System.out.println(); - } - - } } diff --git a/src/kdiv/MathUtils.java b/src/kdiv/MathUtils.java index 90d009d8b2197eea0a585191db859b8ae0e7d19f..ab931213ac37ee0cb71714cbb85d3f31f16ae828 100644 --- a/src/kdiv/MathUtils.java +++ b/src/kdiv/MathUtils.java @@ -1,7 +1,7 @@ package kdiv; -import java.util.HashMap; -import java.util.Map; +import java.util.*; +import org.apache.commons.math3.linear.*; /** * Provide mathematical tools. @@ -29,7 +29,7 @@ public class MathUtils { * * @param <T> * @param distrib - * @return + * @return A normalized map. */ public static <T> Map<T, Double> relativeAbundance(Map<T, Double> distrib) { Map<T, Double> rab = new HashMap<>(); @@ -59,7 +59,7 @@ public class MathUtils { * @param exp * @return */ - public static <T> double summation(Map<T, Double> distrib, DoubleMapSimilarity<String> similarity , double exp) { + public static <T> double summation(Map<T, Double> distrib, DoubleMapSimilarity<T> similarity , double exp) { Map<T, Double> rab = relativeAbundance(distrib); //System.out.println("RAB" + rab.toString()); double result = 0.0; @@ -68,10 +68,10 @@ public class MathUtils { for (T cat : distrib.keySet()) { if (distrib.get(cat) != 0) { double innerResult = 0.0; + //Inner summation over each category (ocat) for (T ocat : distrib.keySet()) { - if (distrib.get(ocat) != 0) { - //We assume that sim.get(cat).get(ocat) exists. + if (distrib.get(ocat) != 0) { //We assume that sim.get(cat).get(ocat) exists Double similaritya = (distanceToSimilarity(similarity.getSimilarity(cat, ocat))); innerResult += rab.get(ocat) * similaritya; } @@ -90,7 +90,7 @@ public class MathUtils { * @param sim * @return */ - public static <T> double prodation(Map<T, Double> distrib, DoubleMapSimilarity<String> similarity) { + public static <T> double prodation(Map<T, Double> distrib, DoubleMapSimilarity<T> similarity) { Map<T, Double> rab = relativeAbundance(distrib); double result = 1.0; @@ -119,7 +119,7 @@ public class MathUtils { * @param q * @return */ - public static <T> double diversity(Map<T, Double> distrib, DoubleMapSimilarity<String> sim, double q) { + public static <T> double diversity(Map<T, Double> distrib, DoubleMapSimilarity<T> sim, double q) { if (q==1) { return prodation(distrib, sim); } else { @@ -127,4 +127,125 @@ public class MathUtils { return Math.pow(sum, 1/(1-q)); } } + + /** + * Compute the maximum diversity distribution and return both the diversity and the distribution. + * @param similarityMatrix The similarity matrix. + * @return DiversityResult containing max diversity and optimal distribution. + */ + public static <T> DiversityResult<T> maxDiversity(DoubleMapSimilarity<T> similarityMatrix) { + //Parameters initialization + Set<T> elements = similarityMatrix.getKey(); + List<T> elementList = new ArrayList<>(elements); + int n = elementList.size(); + + double maxDiversity = 0; + List<T> bestSubset = null; + RealVector bestWeights = null; + Map<T, Double> bestDistribution = null; + + //Iterate over all non-empty subsets + for (int k = 1; k <= n; k++) { + for (Set<T> subset : generateCombinations(elementList, k)) { + List<T> subsetList = new ArrayList<>(subset); + RealMatrix subMatrix = createSubMatrix(similarityMatrix, subsetList); + + try { + RealVector b = new ArrayRealVector(k, 1.0); + DecompositionSolver solver = new LUDecomposition(subMatrix).getSolver(); + RealVector weights = solver.solve(b); + + if (areNonNegative(weights)) { + double magnitude = weights.getL1Norm(); + + if (magnitude > maxDiversity) { + maxDiversity = magnitude; + bestSubset = subsetList; + bestWeights = weights; + bestDistribution = computeDistribution(bestSubset, bestWeights, maxDiversity); + } + } + } catch (SingularMatrixException e) { + // Ignore singular matrices + } + } + } + + return new DiversityResult<>(maxDiversity, bestDistribution != null ? bestDistribution : new HashMap<>()); + } + + /* + * ---- Helpers for generating subsets + */ + + /** + * Generate all k-sized subsets of a given list. + */ + private static <T> Set<Set<T>> generateCombinations(List<T> elements, int k) { + Set<Set<T>> result = new HashSet<>(); + generateCombinationsHelper(elements, k, 0, new HashSet<>(), result); + return result; + } + + /** + * Recursive helper method to generate combinations. + */ + private static <T> void generateCombinationsHelper(List<T> elements, int k, int index, Set<T> current, Set<Set<T>> result) { + if (current.size() == k) { + result.add(new HashSet<>(current)); + return; + } + for (int i = index; i < elements.size(); i++) { + current.add(elements.get(i)); + generateCombinationsHelper(elements, k, i + 1, current, result); + current.remove(elements.get(i)); + } + } + + /* + * Helpers for matrix manipulation + */ + + + /** + * Create a sub-matrix for the given subset from the similarity matrix. + */ + private static <T> RealMatrix createSubMatrix(DoubleMapSimilarity<T> similarityMatrix, List<T> subset) { + int size = subset.size(); + RealMatrix matrix = new Array2DRowRealMatrix(size, size); + + for (int i = 0; i < size; i++) { + for (int j = 0; j < size; j++) { + double similarity = similarityMatrix.getSimilarity(subset.get(i), subset.get(j)); + double distance = similarity != 0 ? distanceToSimilarity(similarity) : 1.0; + matrix.setEntry(i, j, distance); + } + } + + return matrix; + } + + /** + * Check if all elements in a RealVector are non-negative. + */ + private static boolean areNonNegative(RealVector vector) { + for (int i = 0; i < vector.getDimension(); i++) { + if (vector.getEntry(i) < 0) { + return false; + } + } + return true; + } + + /** + * Compute the probability distribution from weights. + */ + private static <T> Map<T, Double> computeDistribution(List<T> subset, RealVector weights, double maxDiversity) { + Map<T, Double> distribution = new HashMap<>(); + for (int i = 0; i < subset.size(); i++) { + distribution.put(subset.get(i), weights.getEntry(i) / maxDiversity); + } + return distribution; + } + } diff --git a/src/kdiv/Sample.java b/src/kdiv/Sample.java index 5bd6dc5838bb590de282c2485ce7c810148c9fb8..513d2b6b8b65895ac548156163b9cbac659c22ac 100644 --- a/src/kdiv/Sample.java +++ b/src/kdiv/Sample.java @@ -3,20 +3,23 @@ package kdiv; import java.util.HashMap; import java.util.Map; -public class Sample { +/** + * This is a example file of how a similarity between ontolongies can be done + */ +public class Sample { /** * Create a sample distribution as a HashMap. For now, the sample is equidistributed * @return */ - public Map<String, Map<String, Double>> createSampleDistribution() { - Map<String, Map<String, Double>> distrib = new HashMap<>(); + public Map<Object, Map<Object, Double>> createSampleDistribution() { + Map<Object, Map<Object, Double>> distrib = new HashMap<>(); //There is (for now) only a distribution where all the ontology are equally diverse //In order to show all distribution used in the python code, we might have to do a map of map so that we can store them - Map<String, Double> dist1 = new HashMap<>(); + Map<Object, Double> dist1 = new HashMap<>(); dist1.put("A", 0.0); dist1.put("B", 0.0); dist1.put("C", 1.0); @@ -24,7 +27,7 @@ public class Sample { dist1.put("E", 0.0); distrib.put("dist1", dist1); - Map<String, Double> dist2 = new HashMap<>(); + Map<Object, Double> dist2 = new HashMap<>(); dist2.put("A", 0.0); dist2.put("B", 0.5); dist2.put("C", 0.0); @@ -32,7 +35,7 @@ public class Sample { dist2.put("E", 0.0); distrib.put("dist2", dist2); - Map<String, Double> dist3 = new HashMap<>(); + Map<Object, Double> dist3 = new HashMap<>(); dist3.put("A", 0.0); dist3.put("B", 0.2); dist3.put("C", 0.6); @@ -40,7 +43,7 @@ public class Sample { dist3.put("E", 0.0); distrib.put("dist3", dist3); - Map<String, Double> dist4 = new HashMap<>(); + Map<Object, Double> dist4 = new HashMap<>(); dist4.put("A", 0.1); dist4.put("B", 0.1); dist4.put("C", 0.6); @@ -48,7 +51,7 @@ public class Sample { dist4.put("E", 0.1); distrib.put("dist4", dist4); - Map<String, Double> dist5 = new HashMap<>(); + Map<Object, Double> dist5 = new HashMap<>(); dist5.put("A", 0.5); dist5.put("B", 0.0); dist5.put("C", 0.0); @@ -56,7 +59,7 @@ public class Sample { dist5.put("E", 0.5); distrib.put("dist5", dist5); - Map<String, Double> dist6 = new HashMap<>(); + Map<Object, Double> dist6 = new HashMap<>(); dist6.put("A", 0.1); dist6.put("B", 0.2); dist6.put("C", 0.4); @@ -64,7 +67,7 @@ public class Sample { dist6.put("E", 0.1); distrib.put("dist6", dist6); - Map<String, Double> dist7 = new HashMap<>(); + Map<Object, Double> dist7 = new HashMap<>(); dist7.put("A", 0.2); dist7.put("B", 0.2); dist7.put("C", 0.2); @@ -82,8 +85,8 @@ public class Sample { * @return * @throws Exception */ - public DoubleMapSimilarity<String> createSampleDistance(String graph) throws Exception { - DoubleMapSimilarity<String> similarity = new DoubleMapSimilarity<>(); + public DoubleMapSimilarity<Object> createSampleDistance(String graph) throws Exception { + DoubleMapSimilarity<Object> similarity = new DoubleMapSimilarity<>(); if (graph.equals("unstructdist")) { /* diff --git a/src/kdiv/Similarity.java b/src/kdiv/Similarity.java index c6646bf1ffea5e6691bff0d7dea2c14d8db76a42..0195ff6fe13cf0f613e5f26e46d86d5170a76dc3 100644 --- a/src/kdiv/Similarity.java +++ b/src/kdiv/Similarity.java @@ -1,7 +1,7 @@ package kdiv; /** - * The Similarity class get the similarity between two categories. + * The Similarity interface to get the similarity between two categories. */ public interface Similarity<T> {