From 9b0a5cd6a92a1243615561da646f049d5bfa21b6 Mon Sep 17 00:00:00 2001
From: Benoit Sagot <benoit.sagot@inria.fr>
Date: Wed, 18 Jan 2017 08:28:31 +0000
Subject: [PATCH] git-svn-id:
 https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/MElt/trunk@5737
 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4

---
 bin/MElt_postprocess.pl | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/bin/MElt_postprocess.pl b/bin/MElt_postprocess.pl
index ca6b2cf..426fcd5 100755
--- a/bin/MElt_postprocess.pl
+++ b/bin/MElt_postprocess.pl
@@ -47,21 +47,25 @@ if ($lang eq "zzz" || $no_post_process || $tokeniser_mode) {
 #      s/_ACC_F/\\}/g;
     } else {
       # in non-normalising mode, situations with {} require ignoring original tokens and using splitted forms (e.g. "don't > do n't" for English)
-      if (!/(^| )[^ \/{}]+( |$)/) { # did we tag?
+      my $tmp = $_;
+      $tmp =~ s/{.*?} *//g;
+      if ($tmp =~ /(^| )([^ \/{}]+)( |$)/) { # did we tag?
+	# no we did not tag
+	if ($keep_token_form_distinction == 0) {
+	  if (!$tokeniser_mode) {
+	    s/{ *([^}]*?) *} *([^ ]+?\/[^ \/]+(?:\/[0-9\.]+)?) /process_transition_with_slash($1,$2)." "/ge;
+	  }
+	  s/{[^{}]*} *//g;
+	  if ($tokeniser_mode) {
+	    s/_ACC_O/{/g;
+	    s/_ACC_F/}/g;
+	  }
+	}
+      } else {
 	# yes we did tag
 	s/{ *[^}]+? *} *([^ ]+?\/[^ \/]+(?:\/[0-9\.]+)?) +{} */$1 /g;
 	s/{} *//g;
 	s/{ *([^}]*?) *} *[^ ]+?(\/[^ \/]+(?:\/[0-9\.]+)?) /replace_whitespaces_with_underscores($1).$2." "/ge;
-      } elsif ($keep_token_form_distinction == 0) {
-	# no we did not tag
-	if (!$tokeniser_mode) {
-	  s/{ *([^}]*?) *} *([^ ]+?\/[^ \/]+(?:\/[0-9\.]+)?) /process_transition_with_slash($1,$2)." "/ge;
-	}
-	s/{[^{}]*} //g;
-	if ($tokeniser_mode) {
-	  s/_ACC_O/{/g;
-	  s/_ACC_F/}/g;
-	}
       }
       s/◁/{/g;
       s/â–·/}/g;
-- 
GitLab