From 37efcdb77039f34e2602b0ccbe57278b4b95d62c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Euzenat?= <Jerome.Euzenat@inria.fr>
Date: Sat, 28 Feb 2009 21:32:43 +0000
Subject: [PATCH] - corrected quotation character in WordNet 3.0

---
 .../align/impl/method/StringDistances.java    | 26 +++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/fr/inrialpes/exmo/align/impl/method/StringDistances.java b/src/fr/inrialpes/exmo/align/impl/method/StringDistances.java
index d5faf26d..cd6cb0da 100644
--- a/src/fr/inrialpes/exmo/align/impl/method/StringDistances.java
+++ b/src/fr/inrialpes/exmo/align/impl/method/StringDistances.java
@@ -327,6 +327,23 @@ public class StringDistances {
 	return 1.0 - (double)metrics.score( s1, s2 );
     }
 	
+    /**
+     * @param s a String
+     * @return s without included quotations between ' or "
+     */
+    public static String stripQuotations( String s ) {
+	int sLength = s.length();
+	String result = "";
+	int sStart = 0;
+	int sEnd = sStart;
+	while ( sStart < sLength ) {
+	    while ( sEnd < sLength  && s.charAt(sStart) != '\"' ) sEnd++;
+	    if ( sEnd < sLength ) result += s.substring(sStart, sEnd);
+	    while ( sEnd < sLength  && s.charAt(sStart) != '\"' ) sEnd++;
+	    sStart = sEnd;
+	}
+	return result;
+    }
 
     /**
      * JE//: This is independent from WordNet and should go to StringDistances
@@ -340,8 +357,10 @@ public class StringDistances {
      * if it is a suffix
      * otherwise the last letter will be taken as the new token
      * start
+     *
+     * Would be useful to parameterise with stop words as well
      */
-    public static Vector<String> tokenize(String s) {
+    public static Vector<String> tokenize( String s ) {
 	String str1 = s;
 	int sLength = s.length();
 	Vector<String> vTokens = new Vector<String>();
@@ -394,10 +413,7 @@ public class StringDistances {
 		}			
 		tkStart=tkEnd;
 	    }			
-	}
-		
-	// else the standard naming convention will be used
-	else{
+	} else { // else the standard naming convention will be used
 	    // start at the beginning of the string
 	    tkStart = 0;			
 	    tkEnd = tkStart;
-- 
GitLab