diff --git a/bin/MElt.in b/bin/MElt.in
index 5a8cd6c678e0881ba8d6fa79e0137e1b6be1e916..603e17c1816cc75fa537242e048e911bd52ec39e 100644
--- a/bin/MElt.in
+++ b/bin/MElt.in
@@ -81,8 +81,7 @@ do case "$o" in
 	C)  CAT_OR_CORRECTOR="${BINDIR}/MElt_normaliser.pl"
 	    DO_TAGGING=0;;
 	M)  DO_TAGGING=0;
-	    SXPIPE_OPTIONS2="-tok -ndd";
-	    POSTPROCESS="${BINDIR}/MElt_postprocess.pl -t";; # tokeniser mode (replace {<something>} _URL by <something>, etc
+	    SXPIPE_OPTIONS2="-tok -ndd";; # tokeniser mode (replace {<something>} _URL by <something>, etc
 	K)  KEEP_COMMENTS="-ktfd";;
 	L)  DO_LEMMATISE=1;;
 	t)  if [ "z$HANDLE_COMMENTS" = "z-c" ]
@@ -173,7 +172,12 @@ done
 
 if [ $DO_TAGGING -eq 0 ]
 then
-    POSTPROCESS="${BINDIR}/MElt_postprocess.pl -npp"
+    if [ "x$SXPIPE_OPTIONS2" = "x-tok -ndd" ]
+    then
+	POSTPROCESS="${BINDIR}/MElt_postprocess.pl -t"
+    else
+	POSTPROCESS="${BINDIR}/MElt_postprocess.pl -npp"
+    fi
 fi
 
 if [ "$MODEL" = "unspecified_model" ]
diff --git a/bin/MElt_postprocess.pl b/bin/MElt_postprocess.pl
index 7052f8315cb5a482eb3adfddcca2537172dc98c4..61415a014704ad0bd34c6709d7fd6ffdbd795775 100755
--- a/bin/MElt_postprocess.pl
+++ b/bin/MElt_postprocess.pl
@@ -51,10 +51,11 @@ if ($lang eq "zzz" || $no_post_process || $tokeniser_mode) {
 	
 	s/{ *([^}]*?) *} *[^ ]+?(\/[^ \/]+(?:\/[0-9\.]+)?) /replace_whitespaces_with_underscores($1).$2." "/ge;
       } elsif ($keep_token_form_distinction == 0) {
+	s/{[^{}]*} //g;
 	if ($tokeniser_mode) {
-	  s/{([^{}]*)} _(?:URL|ROMNUM|NUMBER|NUM)/$1/g;
+	  s/_ACC_O/{/g;
+	  s/_ACC_F/}/g;
 	}
-	s/{[^{}]*} //g;
       }
       s/â—/{/g;
       s/â–·/}/g;
diff --git a/normalisation/en.normalisationdata/ngrams b/normalisation/en.normalisationdata/ngrams
index a98f49e6510ba9b01bb12da36eefa89d7319dc37..4baf95a5ac163602e3a30d75462cb1bc9959a34b 100644
--- a/normalisation/en.normalisationdata/ngrams
+++ b/normalisation/en.normalisationdata/ngrams
@@ -221,10 +221,11 @@ There ain't	There areÂ n't	0
 ain't I	amÂ n't I	0
 I ain't	I amÂ n't	0
 i ain't	I amÂ n't	0
+you ain't	you areÂ n't	0
+we ain't	we areÂ n't	0
 ain't	isÂ n't	0
 wanna	wantÂ to	0
 gonna	goingÂ to	0
-ain't	isÂ n't	0
 Gonna	GoingÂ to	0
 Wanna	WantÂ to	0
 I gotta	I haveÂ gotÂ to	0
@@ -412,4 +413,8 @@ yea	yeah	48
 yess	yes	11
 yo	you	45
 yu	you	50
-yuh	you	11
+([^ ]+in)'	$1g	0
+jes'	just	0
+cannae	canÂ n't	0
+'em	them	0
+las'	last	0
diff --git a/sxpipe-melt/MElt_finalise_tokenisation.pl b/sxpipe-melt/MElt_finalise_tokenisation.pl
index e293cb2ba14f83ba1e84932854e3a11ad958b6e4..d6f1ec1fe25f69d317f28d3536fb73a701b9a571 100644
--- a/sxpipe-melt/MElt_finalise_tokenisation.pl
+++ b/sxpipe-melt/MElt_finalise_tokenisation.pl
@@ -17,12 +17,12 @@ while (1) {
 while (<>) {
   chomp;
   s/_ACC_([OF])/_ACC\1/g;
-  s/(} *_[A-Za-z_]+[A-Za-z])_[^_]+_/\1/g;
+  s/(} *_[A-Za-z_]+[A-Za-z])_[^_]+_( |$)/\1\2/g;
 
   if ($tokeniser_mode) {
     s/{([^{}]*)} *_(?:SENT_BOUND) //g;
     s/(^| ){([^{}]*)} *_(?:SENT_BOUND)$//g;
-    s/{([^{}]*)} *_(?:ROMNUM|NUM|URL|EMAIL|META[^ ]*)( |$)/"{".$1."} ".remove_blanks($1).$2/ge;
+    s/{([^{}]*)} *_(?:ROMNUM|NUM|URL|EMAIL|SMILEY|META[^ ]*)( |$)/"{".$1."} ".remove_blanks($1).$2/ge;
     s/{([^{}]*)} *_NPREF /$1/g;
   } else {
     s/{([0-9 ]+)([^}]*)} *_NUM( |$)/"{$1$2} ".remove_blanks($1).$3/ge;
@@ -31,13 +31,16 @@ while (<>) {
   s/{([^{}]*)} _XML {/{_<_\1_>_/g;
   s/} *([^ {}]+) *{([^{}]+)} _XML/_<_\2_>_} \1/g;
 
-  s/_ACC([OF])/_ACC_\1/g;
 
   if ($tokeniser_mode) {
-    s/_/ /g;
-    s/(^|[^\\])\\(.)/$1$2/g;
     s/(^|[^\\])\\(.)/$1$2/g;
+    s/_(UNDERSCORE|ACC[OF])/ÊƒÉ™Æ™Ê€É›Æ É±ÉœÊ‚Ê‚ÉÊ¥$1/g;
+    s/_/ /g;
+    s/ÊƒÉ™Æ™Ê€É›Æ É±ÉœÊ‚Ê‚ÉÊ¥UNDERSCORE/_/g;
+    s/ÊƒÉ™Æ™Ê€É›Æ É±ÉœÊ‚Ê‚ÉÊ¥/_/g;
   }
+  
+  s/_ACC([OF])/_ACC_\1/g;
 
   print "$_\n";
 }
diff --git a/sxpipe-melt/caponlysentences.pl b/sxpipe-melt/caponlysentences.pl
index 2fe2fb0b85e1556f430e867b2f4eb503ff5ea74c..1f3e43a9d83366852004fd16873844787f3a9fa8 100755
--- a/sxpipe-melt/caponlysentences.pl
+++ b/sxpipe-melt/caponlysentences.pl
@@ -191,6 +191,8 @@ if ($lang =~ /^(de|ja|ko|zh|tw|ar)$/) {
       unless ($decap =~ /[A-ZÐÐ‘Ð’Ð“Ð”Ð•ÐÐ–Ð—Ð˜Ð™Ð†ÃŒÐšÐ›ÐœÐÐžÐŸÐ Ð¡Ð¢Ð£Ð¤Ð¥Ð¦Ð§Ð¨Ð©ÐªÐ«Ð¬Ñ¢ÐÐ®Ð¯Ñ²Ñ´Ó˜Ò’ÒšÒ¢Ó¨Ò°Ò®ÒºÄ°ÃÃ€Ã‚Ã„Ä„ÃƒÄ‚Ã…Ä†ÄŒÃ‡ÄŽÃ‰ÃˆÃŠÃ‹Ä˜ÄšÄžÃŒÃÃŽÄ¨Ä¬ÃÄ¹Ä½ÅÅƒÃ‘Å‡Ã’Ã“Ã”Ã•Ã–Ã˜Å”Å˜ÅšÅ ÅžÅ¤Å¢Ã™ÃšÃ›Å¨ÃœÇ“á»²ÃÅ¶Ã¿Å¹áºÅ»Å½a-zÐ°Ð±Ð²Ð³Ð´ÐµÑ‘Ð¶Ð·Ð¸Ð¹Ñ–Ã¬ÐºÐ»Ð¼Ð½Ð¾Ð¿Ñ€ÑÑ‚ÑƒÑ„Ñ…Ñ†Ñ‡ÑˆÑ‰ÑŠÑ‹ÑŒÑ£ÑÑŽÑÑ³ÑµÓ™Ò“Ò›Ò£Ó©Ò±Ò¯Ò»Ä°Ã¡Ã Ã¢Ã¤Ä…Ã£ÄƒÃ¥Ä‡ÄÃ§ÄÃ©Ã¨ÃªÃ«Ä™Ä›ÄŸÃ¬ÃÃ®Ä©ÄÃ¯ÄºÄ¾Å‚Å„Ã±ÅˆÃ²Ã³Ã´ÃµÃ¶Ã¸Å•Å™Å›Å¡ÅŸÅ¥Å£Ã¹ÃºÃ»Å©Ã¼Ç”á»³Ã½Å·Ã¿Åºáº‘Å¼Å¾_0-9-]{4}[\.\?\!] $/o && /^[A-ZÐÐ‘Ð’Ð“Ð”Ð•ÐÐ–Ð—Ð˜Ð™Ð†ÃŒÐšÐ›ÐœÐÐžÐŸÐ Ð¡Ð¢Ð£Ð¤Ð¥Ð¦Ð§Ð¨Ð©ÐªÐ«Ð¬Ñ¢ÐÐ®Ð¯Ñ²Ñ´Ó˜Ò’ÒšÒ¢Ó¨Ò°Ò®ÒºÄ°ÃÃ€Ã‚Ã„Ä„ÃƒÄ‚Ã…Ä†ÄŒÃ‡ÄŽÃ‰ÃˆÃŠÃ‹Ä˜ÄšÄžÃŒÃÃŽÄ¨Ä¬ÃÄ¹Ä½ÅÅƒÃ‘Å‡Ã’Ã“Ã”Ã•Ã–Ã˜Å”Å˜ÅšÅ ÅžÅ¤Å¢Ã™ÃšÃ›Å¨ÃœÇ“á»²ÃÅ¶Ã¿Å¹áºÅ»Å½a-zÐ°Ð±Ð²Ð³Ð´ÐµÑ‘Ð¶Ð·Ð¸Ð¹Ñ–Ã¬ÐºÐ»Ð¼Ð½Ð¾Ð¿Ñ€ÑÑ‚ÑƒÑ„Ñ…Ñ†Ñ‡ÑˆÑ‰ÑŠÑ‹ÑŒÑ£ÑÑŽÑÑ³ÑµÓ™Ò“Ò›Ò£Ó©Ò±Ò¯Ò»Ä°Ã¡Ã Ã¢Ã¤Ä…Ã£ÄƒÃ¥Ä‡ÄÃ§ÄÃ©Ã¨ÃªÃ«Ä™Ä›ÄŸÃ¬ÃÃ®Ä©ÄÃ¯ÄºÄ¾Å‚Å„Ã±ÅˆÃ²Ã³Ã´ÃµÃ¶Ã¸Å•Å™Å›Å¡ÅŸÅ¥Å£Ã¹ÃºÃ»Å©Ã¼Ç”á»³Ã½Å·Ã¿Åºáº‘Å¼Å¾_0-9-]/o) {
 	if ($lang eq "tr" && $c eq "I") {
 	  $c = "Ä±";
+	} elsif ($lang eq "en" && $c eq "I" && $decap =~ /(^| )$/ && /^[ ,]/) {
+	  $c = "I";
 	} else {
 	  $c = ext_lc($c);
 	}
diff --git a/sxpipe-melt/gl_format.pl b/sxpipe-melt/gl_format.pl
index 028b25a1a34399f7ad79343cdc4c9b234f735eeb..bcb11e43f6deda6f3d85c3b82a0862d380137cd7 100755
--- a/sxpipe-melt/gl_format.pl
+++ b/sxpipe-melt/gl_format.pl
@@ -21,8 +21,8 @@ while (<>) {
     s/\s*$/ /o;
 
     # reconnaissance
-    s/(\s)_UNDERSCORE([^\s_]+)_UNDERSCORE(\s)/$1\{_UNDERSCORE$2\_UNDERSCORE\} $2$3/o;
-    s/(\s)\*([^ _\*]+)\*(\s)/$1\{*$2\*\} $2$3/o;
+    s/(\s)_UNDERSCORE([^\s_{}]+)_UNDERSCORE(\s)/$1\{_UNDERSCORE$2\_UNDERSCORE\} $2$3/o;
+    s/(\s)\*([^ _\*{}]+)\*(\s)/$1\{*$2\*\} $2$3/o;
 
     # sortie
     s/^ //o;
diff --git a/sxpipe-melt/gl_number.pl b/sxpipe-melt/gl_number.pl
index 6cc4defb1450dc71080a2db2075d67a7edac19d0..a5c567a5eede4e5c833fe8536eb415d61ab33104 100755
--- a/sxpipe-melt/gl_number.pl
+++ b/sxpipe-melt/gl_number.pl
@@ -220,10 +220,10 @@ while (<>) {
     
     # remet M. en mode no word segmentation au lieu de _META_TEXTUAL_GN _SENT_BOUND
     if ($no_sw) {
-      s/{(Mr\.?|M\.|Mme|Miss|Mrs\.?|Sir|Lady)}_META_TEXTUAL_GN/\1/g;
+      s/{(Mr\.?|M\.|Mme|Miss|Mrs\.?|Sir|Lady|Sgt\.?)}_META_TEXTUAL_GN/\1/g;
     }
     
-    s/((?:Mr\.?|M\.|Mme|Miss|Mrs\.?|Sir|Lady) )\{([A-ZÃ‰ÃƒÃ€Ã‚ÃŠÃ›ÃŽÃ”Ã„Ã‹ÃœÃÃ–Ã‡])\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT/$1$2$3./g;
+    s/((?:Mr\.?|M\.|Mme|Miss|Mrs\.?|Sir|Lady|Sgt\.?) )\{([A-ZÃ‰ÃƒÃ€Ã‚ÃŠÃ›ÃŽÃ”Ã„Ã‹ÃœÃÃ–Ã‡])\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT/$1$2$3./g;
     s/$listnumprefix\{([G-ZÃ‰ÃƒÃ€Ã‚ÃŠÃ›ÃŽÃ”Ã„Ã‹ÃœÃÃ–Ã‡])\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT/$1$2./g; # \1 est dans listnumprefix
     if ($lang =~ /^(?:fr|en|es|pt|it|ro)$/) {
       s/ ((?:pp|[pnv])\.?) \{(\d+)\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT/ $1 $2$3./g;
@@ -234,10 +234,15 @@ while (<>) {
     }
 
     if ($lang eq "en") { # le mot "a" ne peut terminer une phrase... donc on va dire artificiellement que " a[.\)...]" dÃ©signe tjs un _META_TEXTUAL_truc
-#      s/ ([Aa])([\.\)\/\]\-\Â])(?=[^0-9])/ \{$1\}_META_TEXTUAL_GN\{$2\}_META_TEXTUAL_PONCT/go; # a. TROP GREEDY
-      s/ ([Aa])([\.\)\]])( *)(?=[^0-9mM ])/ \{$1\}_META_TEXTUAL_GN\{$2\}_META_TEXTUAL_PONCT\3/go; # a. ATTENTION, TRES RISQUÃ‰
+      s/ ([Aa])(\.)( *)(?=[^0-9mM ])/ \{$1\}_META_TEXTUAL_GN\{$2\}_META_TEXTUAL_PONCT\3/go; # a. ATTENTION, TRES RISQUÃ‰
+      unless ($less_lists) {
+	s/ ([Aa])([\)\]])( *)(?=[^0-9mM ])/ \{$1\}_META_TEXTUAL_GN\{$2\}_META_TEXTUAL_PONCT\3/go; # a. ATTENTION, TRES RISQUÃ‰
+      }
       # ... sauf dans certains contextes gauches
       s/((?:Mr\.?|M\.|Mme|Miss|Mrs\.?|Sir|Lady) )\{([Aa])\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT/\1\2\3./g;
+      # ... et droite
+      s/\{([Aa])\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT(\.)/\1.\2/g;
+      s/\{A\}_META_TEXTUAL_GN\{\.\}_META_TEXTUAL_PONCT([A-Z]\.)/A.\1/g;
     }
     
     if ($less_lists) {
diff --git a/sxpipe-melt/rebuild_easy_tags.pl b/sxpipe-melt/rebuild_easy_tags.pl
index 3c55d18ff489422ce2c2495e74ba394f96604c67..1378f6e0fec7e5fdfd7914dcbd5458029d138fdd 100755
--- a/sxpipe-melt/rebuild_easy_tags.pl
+++ b/sxpipe-melt/rebuild_easy_tags.pl
@@ -7,8 +7,6 @@ binmode STDERR, ":utf8";
 
 $| = 1;
 
-$e=0;
-
 # Construction des commentaires au format XML issu de Easy, i.e. { <F id=\"E$iF$j\">token</F> }
 # En entrÃ©e, chaque mot peut ne pas avoir de commentaire, ou avoir un commentaire non-XML: {token} mot
 # Il peut y avoir plusieurs tokens dans un mÃªme commentaire : { <F id=\"E$iF$j\">token1</F> <F id=\"E$iF$j+1\">token2</F> <F id=\"E$iF$j+2\">token3</F> }
@@ -26,7 +24,12 @@ $e=0;
 #            {a} b _REGLUE_c _UNSPLIT_d ==> { <F id="E1F1">ac</F> } b { <F id="E1F1">ac</F> } c { <F id="E1F1">ac</F> } d
 # Il n'est pas prÃ©vu que _(REGLUE|UNSPLIT)_b ait dÃ©jÃ  un commentaire Ã  lui. Si c'est le cas, la sortie est incorrecte.
 
-$no_sf = 0; # no split forms (i.e., EASy sous-mots ou FTB sous-mots)
+
+my $MElt_tokeniser_mode = 0; # no split forms (i.e., EASy sous-mots ou FTB sous-mots)
+my $no_sf = 0; #no replace _UNDERSCORE by _
+
+my $e=0;
+
 if ($lang =~ /^(fa|ckb)$/) {
   $no_sf = 1;
 }
@@ -36,6 +39,7 @@ while (1) {
     if (/^$/) {last;}
     elsif (/^-l$/ || /^-lang$/i) {$lang=shift;}
     elsif (/^-no_sf$/) {$no_sf=1;}
+    elsif (/^-tok$/) {$MElt_tokeniser_mode=1;}
 }
 
 while (<>) {
@@ -87,7 +91,7 @@ while (<>) {
     s/^ +//g;
     s/ +$//g;
     if ($commentaire && $_!~/^(_REGLUE_|_UNSPLIT_)/) {
-      $tobeprinted =~ s/_UNDERSCORE/_/g;
+      $tobeprinted =~ s/_UNDERSCORE/_/g unless $MElt_tokeniser_mode;
       $line .= "$tobeprinted";
       $tobeprinted="";
     }
@@ -133,7 +137,7 @@ while (<>) {
       $tobeprinted.=" $_ ";
     }
   }
-  $tobeprinted =~ s/_UNDERSCORE/_/g;
+  $tobeprinted =~ s/_UNDERSCORE/_/g unless $MElt_tokeniser_mode;
 
   $line .= $tobeprinted;
 
diff --git a/sxpipe-melt/segmenteur.pl b/sxpipe-melt/segmenteur.pl
index 09429b42a6694f8172eb29a96ad8a1d9c6f20917..b88092650b7e669e72d58c81da1de9588dc57fef 100755
--- a/sxpipe-melt/segmenteur.pl
+++ b/sxpipe-melt/segmenteur.pl
@@ -249,6 +249,7 @@ while (<STDIN>) {
 
     my $inputline = $_;
     my $line = "";
+
     if ($xml) {
       while ($inputline =~ s/(<[^>]*){([^}>]+)} _[^ {}<>]+ /$1$2/g) {}
       while ($inputline =~ s/^(.*?)(<[^>]+>(?:\s*<[^>]+>)*)//) {
@@ -458,12 +459,12 @@ sub tokenize_sequence {
 	$sq = s/(?<=[Ss])\'(?=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])/'/g;
 	if ($sq == 0 && $lq == 0 && $rq_no_s == 0) {
 	} elsif ($sq == 0 && $lq == $rq_no_s) {
-	  s/(?<=[ \(\[])([\'\`])([^ \'](?:[^\'])*?[^ \'sS])\'(?=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])/ {\1} ` \2 ' /g; # les apostrophes peuvent servir Ã  quoter...
+	  s/(?<=[ \(\[])([\'\`])(?!em )([^ \'](?:[^\'])*?[^ \'sS])\'(?=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])/ {\1} ` \2 ' /g; # les apostrophes peuvent servir Ã  quoter...
 	} elsif ($sq == 1 && $lq == $rq_no_s) {
-	  s/(?<=[ \(\[])(['\`])([^ '](?:[^']|[sS]')*?[^ 'sS])'(?=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])/ {\1} ` \2 ' /g; # les apostrophes peuvent servir Ã  quoter...
+	  s/(?<=[ \(\[])(['\`])(?!em )([^ '](?:[^']|[sS]')*?[^ 'sS])'(?=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])/ {\1} ` \2 ' /g; # les apostrophes peuvent servir Ã  quoter...
 	} else {
 	  $_ = reverse($_);
-	  s/(?<=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])'([^ '](?:[^']|'[sS])*?[^ '])(['\`])(?=[ \(\[])/ ' \1 ` \}\2\{ /g; # les apostrophes peuvent servir Ã  quoter...
+	  s/(?<=[ ,;?\!:\"â€œâ€\)\(\*\#<>\[\]\%\/\\\=\+\Â«\Â»â€”â€“\Ë\&\`\.])'(?!em )([^ '](?:[^']|'[sS])*?[^ '])(['\`])(?=[ \(\[])/ ' \1 ` \}\2\{ /g; # les apostrophes peuvent servir Ã  quoter...
 	  $_ = reverse($_);
 	  s/{`} ` /` /g;
 	}
@@ -676,40 +677,49 @@ sub tokenize_sequence {
 
   if ($lang eq "en") {
     if ($expand_contractions) {
+      #TODO: report here improvements made to the !$expand_contractions section
       s/(?<=[^\}]) ([cC]a)n't / {\1n't} \1n _UNSPLIT_not /goi;      
       s/(?<=[^\}]) ([Ww])on't / {\1on't} \1ill _UNSPLIT_not /goi;      
       s/(?<=[^\}]) ([^ _][^ ]*)n't / {\1n't} \1 _UNSPLIT_not /goi;      
       s/(?<=[^\}]) ([Ii])'m / {\1'm} I _UNSPLIT_am /goi;      
       s/(?<=[^\}]) ([Yy]ou|[Ww]e)'re / {\1're} \1 _UNSPLIT_are /goi;      
       s/(?<=[^\}]) (I|you|we|they|should|would)'(ve) / {\1've} \1 _UNSPLIT_have /goi;      
-      s/(?<=[^\}]) (I|you|he|she|we|they|there)'(d) / {\1'd} \1 _UNSPLIT_would /goi;      
-      s/(?<=[^\}]) (I|you|he|she|we|they|there)'(ll) / {\1'll} \1 _UNSPLIT_will /goi;      
+      s/(?<=[^\}]) (I|you|he|she|we|they|there|this|that|it)'(d) / {\1'd} \1 _UNSPLIT_would /goi;      
+      s/(?<=[^\}]) (I|you|he|she|we|they|there|this|that|it)'(ll) / {\1'll} \1 _UNSPLIT_will /goi;      
       s/(?<=[^\}]) (they)'(re) / {\1're} \1 _UNSPLIT_are /goi;
       s/(?<=[^\}]) ([^ ]*[^ s_])'s / {\1's} \1 _UNSPLIT_'s /goi;
       s/(?<=[^\}]) ([^ _][^ ]*s)'(?=[a-z] )/ \1 _REGLUE___APOS__/goi;
-      s/(?<=[^\}]) ([^ _][^ ]*s)'(?!s |\}.)/ {\1'} \1 _UNSPLIT_'s /goi;
+      s/(?<=[^\}]) ([^ _][^ ]*s)'(?![SsDd] |[lL][lL]|\}.)/ {\1'} \1 _UNSPLIT_'s /goi;
       s/__APOS__/'/g;
     } else {
-      s/(?<=[^\}]) ([cC]a)n't / {\1n't} \1n _UNSPLIT_n't /goi;      
+      #preliminary corrections
+      s/(?<=[^\}]) ((?:some|every|any|no)(?:one|body))(s)' / {$1} $1 _REGLUE_'$2 /goi;
+      #processing
+      s/(?<=[^\}]) ([cC][aA])([Nn])('[tT]) / {$1$2$3} $1$2 _UNSPLIT_$2$3 /go;
+      s/(?<=[^\}]) WON'T / {WON'T} WILL _UNSPLIT_N'T /go;
       s/(?<=[^\}]) ([Ww])on't / {\1on't} \1ill _UNSPLIT_n't /goi;
-      s/(?<=[^\}]) ([^_ ][^ ])n't / {\1n't} \1 _UNSPLIT_n't /goi;      
-      s/(?<=[^\}]) ([Ii])'m / {\1} I 'm /goi;      
-      s/(?<=[^\}]) ([Yy]ou|[Ww]e)'re / \1 're /goi;      
-      s/(?<=[^\}]) (I|you|we|they|should|would)'(ve) / \1 '\2 /goi;      
-      s/(?<=[^\}]) (I|you|he|she|we|they|there)'(d|ll) / \1 '\2 /goi;      
+      s/(?<=[^\}]) ([^_ ][^ ])N'T / {\1N'T} \1 _UNSPLIT_N'T /go;
+      s/(?<=[^\}]) ([^_ ][^ ])n't / {\1n't} \1 _UNSPLIT_n't /goi;
+      s/(?<=[^\}]) ([Ii])'([mMdD]|[lL][lL]) / {$1} I '$2 /go;
+      s/(?<=[^\}]) (YOU|WE)'RE / \1 'RE /go;
+      s/(?<=[^\}]) ([Yy][Oo][Uu]|[Ww][Ee])'re / \1 're /goi;      
+      s/(?<=[^\}]) (you|we|they|should|would)'(ve) / \1 '\2 /goi;
+      s/(?<=[^\}]) (you|he|she|we|they|there|this|that|it)'(d|ll) / \1 '\2 /goi;
       s/(?<=[^\}]) (they)'(re) / \1 '\2 /goi;
-      s/(?<=[^\}]) ([^ ]*[^ s_])'s / \1 's /goi;
-      s/(?<=[^\}]) ([^ ]*[^ s_]){''} " ((?:_REGLUE_)?)s / {\1''s} \1 \2's /goi;
+      s/(?<=[^\}]) ([^ ]*[^ s_])'([sS]) / $1 '$2 /go;
+      s/(?<=[^\}]) ([^ ]*[^ s_]){''} " ((?:_REGLUE_)?)([smd]|re|ve|ll) / {\1''s} \1 \2'\3 /goi;
       s/(?<=[^\}]) ([^ _][^ ]*s)'(?=[a-z] )/ \1 _REGLUE___APOS__/goi;
-      s/(?<=[^\}]) ([^ _][^ ]*s)'(?!s |\}.)/ \1 {'} 's /goi;
+      s/(?<=[^\}]) (jes|las)'(?= )/ \1__APOS__/goi; # jes' = jus' = just
+      s/(?<=[^\}]) ([^ _][^ ]*)(s)'(?![SsDd] |[lL][lL]|\}.)/ \1\2 {'} '\2 /go;
       s/__APOS__/'/g;
+      s/([a-z]{2,})'(ll) /$1 '$2 /goi;
     }
   } elsif ($lang eq "fr") {
     s/(?<=[^\}]) ([Ss]) ' (\S+)/ {\1 '} \1' \2/goi;
     s/(?<=[^\}]) (\') / {$1} " /go;
     s/ ([ldnmst]) ([aeÃ©iouy]\S+)/ {\1} \1' \2/goi;
   }
-  
+
   s/(?<=[^\}]) \'([^ ]+)\' / {'\1'} ' \1 ' /goi;
 
   if (!$no_sw) {
@@ -792,6 +802,8 @@ sub tokenize_sequence {
     s/(?<=[^\}])([- ])([Rr])([eÃ©]num[eÃ©]ration) /$1\{$2$3} $2Ã©munÃ©ration /go;
     s/(?<=[^\}])([- ])c (est|ets) /$1\{c} c' \{$2} est /go;
   } elsif ($lang eq "en") {
+    #dots for frequent abbrevs
+    s/\b(mrs?|sgt|ms|dr|prof|lt) \. ([A-Z])/$1. $2/goi;
     #abrÃ©viations courantes
     s/(?<=[^\}])([- ])(acct(?: ?\.)?) /$1\{$2} account /g;
     s/(?<=[^\}])([- ])(addl(?: ?\.)?) /$1\{$2} additional /g;
@@ -826,7 +838,7 @@ sub tokenize_sequence {
     s/(?<=[^\}])([- ])(co(?: ?\.)?) /$1\{$2} company /g;
     s/(?<=[^\}])([- ])(hr(?: ?\.)?) /$1\{$2} hour /g;
     s/(?<=[^\}])([- ])(hrs(?: ?\.)?) /$1\{$2} hours /g;
-    s/(?<=[^\}])([- ])(mo(?: ?\.)?) /$1\{$2} month /g;
+    #s/(?<=[^\}])([- ])(mo(?: ?\.)?) /$1\{$2} month /g;
     #s/(?<=[^\}])([- ])(mon(?: ?\.)?) /$1\{$2} Monday /g;
     #s/(?<=[^\}])([- ])(tue(?: ?\.)?) /$1\{$2} Tuesday /g;
     #s/(?<=[^\}])([- ])(wed(?: ?\.)?) /$1\{$2} Wednesday /g;
@@ -838,8 +850,8 @@ sub tokenize_sequence {
     s/(?<=[^\}])([- ])(abt) /$1\{$2} about /g;
     s/(?<=[^\}])([- ])(jr(?: ?\.)?) /$1\{$2} junior /g;
     s/(?<=[^\}])([- ])(jnr(?: ?\.)?) /$1\{$2} junior /g;
-    s/(?<=[^\}])([- ])(mo(?: ?\.)?) /$1\{$2} month /g;
-    s/(?<=[^\}])([- ])(mos(?: ?\.)?) /$1\{$2} months /g;
+    #s/(?<=[^\}])([- ])(mo(?: ?\.)?) /$1\{$2} month /g;
+    #s/(?<=[^\}])([- ])(mos(?: ?\.)?) /$1\{$2} months /g;
     s/(?<=[^\}])([- ])(sr(?: ?\.)?) /$1\{$2} senior /g;
     s/(?<=[^\}])([- ])(co-op) /$1\{$2} cooperative  /g;
     s/(?<=[^\}])([- ])(co(?: ?\.)?) /$1\{$2} company /g;
diff --git a/sxpipe-melt/sxpipe-melt-light.conf.in b/sxpipe-melt/sxpipe-melt-light.conf.in
index 59d40c3b9795aee852929f9820435daf7d32d481..c8483d539d087c42b4398401aad3847da729ee53 100644
--- a/sxpipe-melt/sxpipe-melt-light.conf.in
+++ b/sxpipe-melt/sxpipe-melt-light.conf.in
@@ -91,7 +91,7 @@ cmd = $melt/remove_inner_ne.pl
 desc = desencapsulation des entitÃ©es nommÃ©es
 
 [caponly]
-cmd = $melt/caponlysentences.pl
+cmd = $melt/caponlysentences.pl -l $lang
 desc = minusculisation des phrases (presque) entiÃ¨rement en majuscules
 
 [segment]
@@ -100,7 +100,7 @@ options = -a -ca -af=@pkgdatadir@/pctabr -p=r $*
 desc = segmentation
 
 [rebuild]
-cmd = $melt/rebuild_easy_tags.pl -no_sf
+cmd = $melt/rebuild_easy_tags.pl -no_sf $*
 
 [sxpipe2melt]
 cmd = $melt/sxpipe2melt.pl
diff --git a/sxpipe-melt/sxpipe-melt.conf.in b/sxpipe-melt/sxpipe-melt.conf.in
index 1704e481e6b79eb4262328647179b021209aa852..b22ade221ac7a51c828093001457ea2fff88820a 100644
--- a/sxpipe-melt/sxpipe-melt.conf.in
+++ b/sxpipe-melt/sxpipe-melt.conf.in
@@ -100,7 +100,7 @@ options = -l $lang -ll $*
 desc = reconnaissance des nombres
 
 [caponly]
-cmd = $melt/caponlysentences.pl
+cmd = $melt/caponlysentences.pl -l $lang
 desc = minusculisation des phrases (presque) entiÃ¨rement en majuscules
 
 [segment]