From 84b96b5f46a2d969aa2956e7a4796276ce62229f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Euzenat?= <Jerome.Euzenat@inria.fr>
Date: Thu, 20 Feb 2025 15:50:22 +0100
Subject: [PATCH] Enhancement on printTree and distance for Classifications

- suppressed useless exception statement in eqClass
- make printTree to use a Stream
- authorized creating classifications from String specification
- implemented 'semantic' distance between Classifications
---
 .../inria/moex/classapp/Classification.java   | 106 ++++++++++++++----
 1 file changed, 87 insertions(+), 19 deletions(-)

diff --git a/src/fr/inria/moex/classapp/Classification.java b/src/fr/inria/moex/classapp/Classification.java
index 025a801..578e92d 100644
--- a/src/fr/inria/moex/classapp/Classification.java
+++ b/src/fr/inria/moex/classapp/Classification.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) INRIA, 2020-2024
+ * Copyright (C) INRIA, 2020-2025
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -57,6 +57,10 @@ import java.io.PrintStream;
 import java.io.BufferedOutputStream;
 import java.io.FileOutputStream;
 import java.io.File;
+import java.io.ByteArrayOutputStream;
+import java.io.UnsupportedEncodingException;
+//import static java.nio.charset.StandardCharsets.UTF_8;
+import java.nio.charset.StandardCharsets;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -106,18 +110,44 @@ public class Classification implements Iterable<Classification> {
 		}
 	}
 
+	/*
 	public Classification( String specif ) {
-		// If specif == "()"
-		//return this;
-		// else
-		// (xxx a1 a2 a3)
+		if ( !specif.equals( "()" ) ) {
+			// get the feature, set it 
+			subTree = new Classification[NBVALUES];
+			// read the subtree and put in the place
+		}
 	}
 
 	// May not need to be static if call from above?
 	public static Classification read( String specif ) {
-		String newsp = specif.trim();
-		return new Classification();
-	}
+		Classification result;
+		try { 
+			StringReader reader = new StringReader( specif );
+			int c;
+			while((c = reader.read()) != -1) {
+				switch ( c ) {
+				case '(':					
+					while((c = reader.read()) != -1) { if ( c != ' ' && c != '\t' ) break; }
+					if ( c == ')' )	result = new Classification(); break;
+					String ff = (Char)c;
+					while((c = reader.read()) != -1) { if ( c != ' ' && c != '\t' && c != '(' ) ff += (Char)c; }
+					// Analyse ff
+					result = new Classification( ff );
+					result.subTree = new Classification[NBVALUES];
+					// Go ahead...		
+				case ' ': ; break;
+				case '\t': ; break;
+				case ')': ; break;
+				}
+			}
+		} catch ( Exception ex ) { 
+            System.out.println(ex); 
+		}
+		fillPatterns();
+		// This puts the lists of cards in leaves: this is necessary for the game
+		fillLeaves();		
+		}*/
 
 	// JE2024: Taken can be replaced by pattern
 	public Classification( int level, int forks, boolean[] taken, boolean exact ) {
@@ -173,7 +203,7 @@ public class Classification implements Iterable<Classification> {
 		return new Classification( levels, nbforks, new boolean[NBFEATURES], exact );
 	}
 
-	public boolean eqClass( Classification cl ) throws Exception {
+	public boolean eqClass( Classification cl ) {
 		if ( this == cl ) return true;
 		// I must find the meaning of the classification:
 		// could use cards, but must be filled
@@ -328,6 +358,28 @@ public class Classification implements Iterable<Classification> {
 		for ( Classification cl: this ) result++;
     	return result;
     }
+
+	/**
+	 * The distance between two classification is computed as the ratio of common classes on the number of classes.
+	 * It is a distance that can be used on ontologies
+	 * This is not very optimised
+	 */
+    public double distance( Classification classif2 ) {
+    	int nbClasses = 0;
+    	int nbCommon = 0;
+		if ( this == classif2 ) return 1.;
+		for ( Classification cl: this ) {
+			nbClasses++;
+			for ( Classification cl2: classif2 ) {
+				if ( cl.eqClass( cl2 ) ) {
+					nbCommon++;
+					break;
+				}
+			}
+		}
+		for ( Classification cl2: classif2 ) nbClasses++;
+    	return (2*nbCommon)/nbClasses;
+    }
 	
 	public int nbForks() {
 		return (nbNodes()-1)/3;
@@ -337,7 +389,7 @@ public class Classification implements Iterable<Classification> {
 	public int nbLeaves() {
 		return (2*nbNodes()+1)/3;
 	}
-	
+
 	// draw a random integer between min and max
 	private int drawForks( int minforks, int maxforks ) {
 		int result = minforks+generator.nextInt( maxforks-minforks+1 );
@@ -606,27 +658,43 @@ public class Classification implements Iterable<Classification> {
 			return result+")";
 		}
 	}
+	// Alternative?
+	public String printTreeToString() {
+		final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		String result = null;
+		//
+		final String UTF_8 = StandardCharsets.UTF_8.name();
+		try ( PrintStream ps = new PrintStream( baos, true, UTF_8 ) ) {
+			printTree( ps );
+			//new String( baos.toByteArray(), java.nio.charset.StandardCharsets.UTF_8 );		
+			result = baos.toString( UTF_8 );
+		} catch ( UnsupportedEncodingException ex ) {
+		}
+		return result;
+	}
 
 	// So far everything is generic (features/values are just numbers)
 	// Below the output functions are specific to the Class? game
 
 	// Print the board as a lisp-like structure indicating only the characteristics
 	public void printTree() {
-		System.out.print( " (" );
+		printTree( System.out );
+	}
+	public void printTree( PrintStream out ) {
+		out.print( " (" );
 		if ( feature != null ) {
-			switch (feature) {
-			case COLOR: System.out.print( PlayClass.getString( "col" ) ); break;
-			case SHAPE: System.out.print( PlayClass.getString( "sha" ) ); break;
-			case FILLING: System.out.print( PlayClass.getString( "fil" ) ); break;
-			case NUMBER: System.out.print( PlayClass.getString( "num" ) ); break;
+			switch ( feature ) {
+			case COLOR: out.print( PlayClass.getString( "col" ) ); break;
+			case SHAPE: out.print( PlayClass.getString( "sha" ) ); break;
+			case FILLING: out.print( PlayClass.getString( "fil" ) ); break;
+			case NUMBER: out.print( PlayClass.getString( "num" ) ); break;
 			default: break;
 			}
-			//if ( feature != null ) {
 			for ( Classification sub : subTree ) {
-				sub.printTree();
+				sub.printTree( out );
 			}
 		}
-		System.out.print( ") " );
+		out.print( ") " );
 	}
 
 	// JE: all these could be computed once and for all (because here it is computed many times)
-- 
GitLab