From 84b96b5f46a2d969aa2956e7a4796276ce62229f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Euzenat?= <Jerome.Euzenat@inria.fr> Date: Thu, 20 Feb 2025 15:50:22 +0100 Subject: [PATCH] Enhancement on printTree and distance for Classifications - suppressed useless exception statement in eqClass - make printTree to use a Stream - authorized creating classifications from String specification - implemented 'semantic' distance between Classifications --- .../inria/moex/classapp/Classification.java | 106 ++++++++++++++---- 1 file changed, 87 insertions(+), 19 deletions(-) diff --git a/src/fr/inria/moex/classapp/Classification.java b/src/fr/inria/moex/classapp/Classification.java index 025a801..578e92d 100644 --- a/src/fr/inria/moex/classapp/Classification.java +++ b/src/fr/inria/moex/classapp/Classification.java @@ -1,5 +1,5 @@ /* - * Copyright (C) INRIA, 2020-2024 + * Copyright (C) INRIA, 2020-2025 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -57,6 +57,10 @@ import java.io.PrintStream; import java.io.BufferedOutputStream; import java.io.FileOutputStream; import java.io.File; +import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; +//import static java.nio.charset.StandardCharsets.UTF_8; +import java.nio.charset.StandardCharsets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,18 +110,44 @@ public class Classification implements Iterable<Classification> { } } + /* public Classification( String specif ) { - // If specif == "()" - //return this; - // else - // (xxx a1 a2 a3) + if ( !specif.equals( "()" ) ) { + // get the feature, set it + subTree = new Classification[NBVALUES]; + // read the subtree and put in the place + } } // May not need to be static if call from above? public static Classification read( String specif ) { - String newsp = specif.trim(); - return new Classification(); - } + Classification result; + try { + StringReader reader = new StringReader( specif ); + int c; + while((c = reader.read()) != -1) { + switch ( c ) { + case '(': + while((c = reader.read()) != -1) { if ( c != ' ' && c != '\t' ) break; } + if ( c == ')' ) result = new Classification(); break; + String ff = (Char)c; + while((c = reader.read()) != -1) { if ( c != ' ' && c != '\t' && c != '(' ) ff += (Char)c; } + // Analyse ff + result = new Classification( ff ); + result.subTree = new Classification[NBVALUES]; + // Go ahead... + case ' ': ; break; + case '\t': ; break; + case ')': ; break; + } + } + } catch ( Exception ex ) { + System.out.println(ex); + } + fillPatterns(); + // This puts the lists of cards in leaves: this is necessary for the game + fillLeaves(); + }*/ // JE2024: Taken can be replaced by pattern public Classification( int level, int forks, boolean[] taken, boolean exact ) { @@ -173,7 +203,7 @@ public class Classification implements Iterable<Classification> { return new Classification( levels, nbforks, new boolean[NBFEATURES], exact ); } - public boolean eqClass( Classification cl ) throws Exception { + public boolean eqClass( Classification cl ) { if ( this == cl ) return true; // I must find the meaning of the classification: // could use cards, but must be filled @@ -328,6 +358,28 @@ public class Classification implements Iterable<Classification> { for ( Classification cl: this ) result++; return result; } + + /** + * The distance between two classification is computed as the ratio of common classes on the number of classes. + * It is a distance that can be used on ontologies + * This is not very optimised + */ + public double distance( Classification classif2 ) { + int nbClasses = 0; + int nbCommon = 0; + if ( this == classif2 ) return 1.; + for ( Classification cl: this ) { + nbClasses++; + for ( Classification cl2: classif2 ) { + if ( cl.eqClass( cl2 ) ) { + nbCommon++; + break; + } + } + } + for ( Classification cl2: classif2 ) nbClasses++; + return (2*nbCommon)/nbClasses; + } public int nbForks() { return (nbNodes()-1)/3; @@ -337,7 +389,7 @@ public class Classification implements Iterable<Classification> { public int nbLeaves() { return (2*nbNodes()+1)/3; } - + // draw a random integer between min and max private int drawForks( int minforks, int maxforks ) { int result = minforks+generator.nextInt( maxforks-minforks+1 ); @@ -606,27 +658,43 @@ public class Classification implements Iterable<Classification> { return result+")"; } } + // Alternative? + public String printTreeToString() { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + String result = null; + // + final String UTF_8 = StandardCharsets.UTF_8.name(); + try ( PrintStream ps = new PrintStream( baos, true, UTF_8 ) ) { + printTree( ps ); + //new String( baos.toByteArray(), java.nio.charset.StandardCharsets.UTF_8 ); + result = baos.toString( UTF_8 ); + } catch ( UnsupportedEncodingException ex ) { + } + return result; + } // So far everything is generic (features/values are just numbers) // Below the output functions are specific to the Class? game // Print the board as a lisp-like structure indicating only the characteristics public void printTree() { - System.out.print( " (" ); + printTree( System.out ); + } + public void printTree( PrintStream out ) { + out.print( " (" ); if ( feature != null ) { - switch (feature) { - case COLOR: System.out.print( PlayClass.getString( "col" ) ); break; - case SHAPE: System.out.print( PlayClass.getString( "sha" ) ); break; - case FILLING: System.out.print( PlayClass.getString( "fil" ) ); break; - case NUMBER: System.out.print( PlayClass.getString( "num" ) ); break; + switch ( feature ) { + case COLOR: out.print( PlayClass.getString( "col" ) ); break; + case SHAPE: out.print( PlayClass.getString( "sha" ) ); break; + case FILLING: out.print( PlayClass.getString( "fil" ) ); break; + case NUMBER: out.print( PlayClass.getString( "num" ) ); break; default: break; } - //if ( feature != null ) { for ( Classification sub : subTree ) { - sub.printTree(); + sub.printTree( out ); } } - System.out.print( ") " ); + out.print( ") " ); } // JE: all these could be computed once and for all (because here it is computed many times) -- GitLab