From b1a8cf5f0b84d29e627ae7e0924a94bef9decdd3 Mon Sep 17 00:00:00 2001 From: Petko Valtchev <Petko.Valtchev@uqam.ca> Date: Fri, 8 Jul 2005 04:05:31 +0000 Subject: [PATCH] quelques corrections sur la similarit? WN --- .../exmo/align/ling/JWNLDistances.java | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/fr/inrialpes/exmo/align/ling/JWNLDistances.java b/src/fr/inrialpes/exmo/align/ling/JWNLDistances.java index 10db2a21..400b3543 100644 --- a/src/fr/inrialpes/exmo/align/ling/JWNLDistances.java +++ b/src/fr/inrialpes/exmo/align/ling/JWNLDistances.java @@ -158,7 +158,7 @@ public class JWNLDistances { Dists1s2 = StringDistances.subStringDistance(s1, s2); - if (s1.equals(s2)) { + if (s1.equals(s2) || s1.toLowerCase().equals(s2.toLowerCase())) { //System.out.println(s1+" - "+s2+" = "+ (1-Dists1s2) + "|" + sim); return 1; } @@ -201,8 +201,8 @@ public class JWNLDistances { public double computeBestMatch(String s1, String s2) { Vector s1Tokens = new Vector(); Vector s2Tokens = new Vector(); - tokenizes(s1, s1Tokens); - tokenizes(s2, s2Tokens); + tokenize(s1, s1Tokens); + tokenize(s2, s2Tokens); // tokens storage Vector vg; @@ -469,19 +469,30 @@ public class JWNLDistances { return 0; } - public void tokenizes(String s, Vector sTokens) { + public void tokenize(String s, Vector sTokens) { String str1 = s; // starts on the second character of the string int start = 0; int car = start + 1; while (car < str1.length()) { - while (car < str1.length() && !(str1.charAt(car) < 'Z')) { + while (car < str1.length() && (str1.charAt(car) > 'Z')) { + // PV while (car < str1.length() && !(str1.charAt(car) < 'Z')) { car++; } + // PV : Leave unique capitals with the previous token + if (car < str1.length() - 1 && str1.charAt(car+1) <= 'Z') { + car++; + } else { + if (car == str1.length() - 1) { + car++; + } + } + sTokens.add(str1.substring(start, car)); start = car; car = start + 1; } + // PV: Debug System.out.println("Tokens = "+ sTokens.toString()); } /** @@ -632,9 +643,10 @@ public class JWNLDistances { Vector v = new Vector(); JWNLDistances j = new JWNLDistances(); j.Initialize(); - String s1 = "Monograph"; - String s2 = "Book"; + String s1 = "MastersThesis"; + String s2 = "PhDThesis"; System.out.println("Sim = "+ j.computeSimilarity(s1, s2)); System.out.println("SimOld = "+ (1 - j.BasicSynonymDistance(s1, s2))); + System.out.println("SimWN = "+ j.computeBestMatch(s1, s2)); } } -- GitLab