diff --git a/README b/README
index d937dfff16be1caa6f47afc2a05fcbbffb079606..26ff19ae52c26c211b85466011d84e4fcdb77fd7 100644
--- a/README
+++ b/README
@@ -110,3 +110,8 @@ VINF	   infinitive verb form
 VPP	   past participle
 VPR	   present participle
 VS	   subjunctive verb form
+
+When using normalization options, other tags may appear:
+- when using -n, Y means "non-last token of a multi-token unit", X means "multiword/multitag token"
+- when using -N, Y means "non-last token of a multi-token unit", multiword/multitag tokens are annotated with tags of the form T1+T2+...+Tn (ex.: chÃ©pa/CLS+V+ADV)
+
diff --git a/bin/MElt-eval.in b/bin/MElt-eval.in
index 3efe0b769c80db0e690ce27686d03c2837212a1e..c8f9d7ab1494a0f4051d54ed2b8fc66b05632366 100644
--- a/bin/MElt-eval.in
+++ b/bin/MElt-eval.in
@@ -5,32 +5,54 @@ PYTHONPATH=@pkgpythondir@:$PYTHONPATH
 BINDIR=@bindir@
 MODEL=unspecified_model
 LOWERCAPOPTION=
+NORMALIZEOPTION=
+LANGUAGE=
 NE=0
+REMOVE_SXPIPE_COMMENTS_OR_CAT="cat"
 
-while getopts :m:Sdhv o
+while getopts :m:l:nNSdhv o
 do case "$o" in
 	v)  echo $VERSION && exit 1;;
 	h)  echo "Usage: MElt-eval [ -dS ] -m trained-model-name eval-corpus" \
             && echo "  -m	[/path/to/model] Use the MElt model given instead of the default one (which is the model 'fr' for French, trained on the FTB and the Lefff, installed in @datadir@/fr)" \
-            && echo "-S process named entities before tagging, using SxPipe grammars distributed with MElt (the model must have been trained with the -S option)" \
-            && echo "-h	Output this help message" \
-            && echo "-v	Output MElt version" \
+            && echo "  -S process named entities before tagging, using SxPipe grammars distributed with MElt (the model must have been trained with the -S option)" \
+            && echo "  -n	Use -n option for temporary normalization (requires -l)" \
+            && echo "  -N	Use -N option for temporary normalization (requires -l)" \
+            && echo "  -l	Specify language for de-noisification options" \
+            && echo "  -h	Output this help message" \
+            && echo "  -v	Output MElt version" \
             && exit 1;;
-	m)  MODEL=$OPTARG
-	    ;;
+	m)  MODEL=$OPTARG;;
+	l)  LANGUAGE=$OPTARG;;
 	S)  NE=1;;
+	n)  NORMALIZEOPTION="-n -l ${LANGUAGE} -c"
+	    REMOVE_SXPIPE_COMMENTS_OR_CAT="perl -pe \"s/\{.*?\}\\s*//g;\""
+	    ;;
+	N)  NORMALIZEOPTION="-N -l ${LANGUAGE} -c"
+	    REMOVE_SXPIPE_COMMENTS_OR_CAT="perl -pe \"s/\{.*?\}\\s*//g;\""
+	    ;;	
 	d)  LOWERCAPOPTION="-d";;
 	\?)
 	    echo "Invalid option: -$OPTARG" >&2
 	    exit 1
 	    ;;
-	:) # ici $OPTARG==d
+	:)
 	    echo "Error: value needed after '-r' or '-C'" >&2
 	    exit 4
 	    ;;
     esac
 done
 
+if [ "x$NORMALIZEOPTION" != "x" ]
+then
+    if [ "x$LANGUAGE" == "x" ]
+    then
+	echo "Error: the language must be specified using option -l if the normalizer (-n or -N) is used" >&2
+	exit 4
+    fi
+fi
+
+
 shift $(($OPTIND - 1)) # so that $1 is indeed the first argument AFTER those already parsed by getopts
 
 # usage: MElt-train trained-model-name eval-corpus [option for MElt]
@@ -39,13 +61,13 @@ MODEL_BASE=$(basename "${MODEL}")
 if [ $NE -eq 1 ]
 then
     echo "Evaluating model $MODEL_BASE on corpus $1 (named-entity-aware setup)" >&2
-    echo "cat \"$1\" | perl @bindir@/brown2txt.pl -S | tee \"$1.txt\" | MElt -S ${LOWERCAPOPTION} -m \"$MODEL\" > \"$1.txt.melted.$MODEL_BASE\"" >&2
-    cat "$1" | perl -pe "s/_A_ANONYMISER//g" | perl @bindir@/brown2txt.pl | tee "$1.txt" | MElt -S ${LOWERCAPOPTION} -m "$MODEL" > "$1.txt.melted.$MODEL_BASE"
+    echo "cat \"$1\" | perl @bindir@/brown2txt.pl -S | tee \"$1.txt\" | MElt -S ${LOWERCAPOPTION} ${NORMALIZEOPTION} -m \"$MODEL\" | ${REMOVE_SXPIPE_COMMENTS_OR_CAT} > \"$1.txt.melted.$MODEL_BASE\"" >&2
+    cat "$1" | perl -pe "s/_A_ANONYMISER//g" | perl @bindir@/brown2txt.pl | tee "$1.txt" | MElt -S ${NORMALIZEOPTION} ${LOWERCAPOPTION} -m "$MODEL" | ${REMOVE_SXPIPE_COMMENTS_OR_CAT} > "$1.txt.melted.$MODEL_BASE"
     PYTHONPATH=$PYTHONPATH python @bindir@/eval_brown.py -p "$1.txt.melted.$MODEL_BASE" -g "$1" -d "$MODEL/tag_dict.json"
 else
     echo "Evaluating model $MODEL_BASE on corpus $1" >&2
-    echo "cat \"$1\" | perl @bindir@/brown2txt.pl | tee \"$1.txt\" | MElt -r ${LOWERCAPOPTION} -m \"$MODEL\" > \"$1.txt.melted.$MODEL_BASE\"" >&2
-    cat "$1" | perl -pe "s/_A_ANONYMISER//g" | perl @bindir@/brown2txt.pl | tee "$1.txt" | MElt -r ${LOWERCAPOPTION} -m "$MODEL" > "$1.txt.melted.$MODEL_BASE"
+    echo "cat \"$1\" | perl @bindir@/brown2txt.pl | tee \"$1.txt\" | MElt ${NORMALIZEOPTION} -r ${LOWERCAPOPTION} -m \"$MODEL\" | ${REMOVE_SXPIPE_COMMENTS_OR_CAT} > \"$1.txt.melted.$MODEL_BASE\"" >&2
+    cat "$1" | perl -pe "s/_A_ANONYMISER//g" | perl @bindir@/brown2txt.pl | tee "$1.txt" | MElt ${NORMALIZEOPTION} -r ${LOWERCAPOPTION} -m "$MODEL" | ${REMOVE_SXPIPE_COMMENTS_OR_CAT} > "$1.txt.melted.$MODEL_BASE"
     PYTHONPATH=$PYTHONPATH python @bindir@/eval_brown.py -p "$1.txt.melted.$MODEL_BASE" -g "$1" -d "$MODEL/tag_dict.json"
 fi
 
diff --git a/bin/MElt-train.in b/bin/MElt-train.in
index ae7c7a84ef1234062491611cc38cdce250ee3b98..24d640a121d87b2be067ca77c301ca3082719792 100644
--- a/bin/MElt-train.in
+++ b/bin/MElt-train.in
@@ -21,12 +21,14 @@ FEATURE_SELECTION_PARAMETERS=
 while getopts :f:r:C:m:sSDMvh o
 do case "$o" in
 	v)  echo $VERSION && exit 1;;
-	h)  echo "Usage: MElt-train [ -rmMCS ] trained-model-name lexicon training-corpus test-corpus" \
+	h)  echo "Usage: MElt-train [ -rmsvhfMCS ] trained-model-name lexicon training-corpus test-corpus" \
             && echo "-r	[n_reruns] Number of successive runs for MegaM (used only in the multiclass MaxEnt setting)" \
 	    && echo "-C [multiclass|multitron] Classifier type: multiclass MaxEnt (often a bit better, much slower to train) or multitron (multiclass perceptron, faster to train, default)" \
             && echo "-m	[n_iter] Maximum number of iterations for each run in MegaM (default is 100 for multiclass and 40 for multitron)" \
-            && echo "-S train a named-entity-aware tagger, using SxPipe grammars distributed with MElt" \
-            && echo "-D dump megam model" \
+            && echo "-f [parameters] Use specified values as parameters (default is win:2,pwin:2,lex_wd:1,lex_lhs:1,lex_rhs:1,pln:4,sln:4,rpln:1,rsln:0,ffthrsld:2,norm:0 - see bin/MElt_tagger.py.in for details)" \
+            && echo "-S Train a named-entity-aware tagger, using SxPipe grammars distributed with MElt" \
+            && echo "-s Do not train the lemmatizer" \
+            && echo "-D Dump megam model" \
             && echo "-h	Output this help message" \
             && echo "-v	Output MElt version" \
             && exit 1;;
diff --git a/bin/MElt_XML_preprocessor.pl b/bin/MElt_XML_preprocessor.pl
index 621acaea53277775e865b81421717bb6279e9fed..02bc3a8573f91a39910932aa6e2c5ba97d651062 100644
--- a/bin/MElt_XML_preprocessor.pl
+++ b/bin/MElt_XML_preprocessor.pl
@@ -37,3 +37,9 @@ while (<>) {
 
   print $_;
 }
+
+__END__
+
+<a>
+  ceci est un <b>superbe</b> texte
+</a>
diff --git a/bin/MElt_explore_featureset_space.pl b/bin/MElt_explore_featureset_space.pl
index 9f06f886570715499f71a90aa217bb3daf95662d..22740911a66aeac6f6022335a59991f1f1da54cc 100644
--- a/bin/MElt_explore_featureset_space.pl
+++ b/bin/MElt_explore_featureset_space.pl
@@ -30,15 +30,15 @@ if (0) { # default
   $features{ffthrsld} = "1,2,3";
   $features{norm} = "0";
 } else { # expÃ©s rÃ©duites mais multiclass PERCEO
-  $features{lex_lhs} = "0";
+  $features{lex_lhs} = "1";
   $features{lex_rhs} = "1";
   $features{lex_wd} = "1";
-  $features{pln} = "3,4";
+  $features{pln} = "3,4,5";
+  $features{sln} = "4,5,6,7";
   $features{pwin} = "2";
   $features{win} = "2";
-  $features{sln} = "4,5,6";
-  $features{rpln} = "0,1";
-  $features{rsln} = "0,1,2";
+  $features{rpln} = "0,1,2,3";
+  $features{rsln} = "0,1,2,3";
   $features{ffthrsld} = "1,2";
   $features{norm} = "0";
 }
@@ -51,6 +51,7 @@ $lang = shift || "fr";
 $modeltype = shift || "multitron";
 $maxiter = shift || 0;
 $is_NE_aware = shift || 0;
+$dryrun = shift || 0;
 
 $maxiter = 40 if $maxiter == 0;
 
@@ -92,6 +93,8 @@ for (@options) {
 }
 print STDERR "$n\n";
 
+exit (0) if $dryrun;
+
 for (@options) {
   unless (-d "$lang-$modeltype$modeltypesuffix-$NE_option-$_") {
     system("MElt-train -s -C $modeltype -m $maxiter $NE_option -f $_ $lang-$modeltype$modeltypesuffix-$NE_option-$_ $lex $train $test && rm $lang-$modeltype$modeltypesuffix-$NE_option-$_.trainingdata && rm -f $lang-$modeltype$modeltypesuffix-$NE_option-$_/*.json && rm -f $lang-$modeltype$modeltypesuffix-$NE_option-$_/*.npy && rm -f $lang-$modeltype$modeltypesuffix-$NE_option-$_/*.ftl") == 0 or die "";
diff --git a/bin/MElt_tagger.py.in b/bin/MElt_tagger.py.in
index 3d082d64e9c4a4aab4f5feca7bcea43a20e0ea9e..9e792a813d809faa37b0bf43df8e80d3b44baaf9 100644
--- a/bin/MElt_tagger.py.in
+++ b/bin/MElt_tagger.py.in
@@ -490,6 +490,8 @@ class BrownReader(CorpusReader):
             wasCapOnly = CAPONLYLINE_RE.match(line)
         token_list = []
         for item in line.split(' '):
+            if item == '':
+                continue
             wdtag = WD_TAG_RE.match(item)
             if (wdtag):
                 wd,tag = wdtag.groups()
@@ -498,7 +500,7 @@ class BrownReader(CorpusReader):
                 else:
                     token_list.append( (wd,tag) )
             else:
-                print >> sys.stderr, "Warning: Incorrect token/tag pair: \""+item+"\""+" --- line: "+line
+                print >> sys.stderr, "Warning: Incorrect token/tag pair: \""+(item.encode('utf8'))+"\""+" --- line: "+line
         return token_list
 
 
@@ -1111,7 +1113,7 @@ def debug_n_best_sequence(n_best_sequences):
 
 def tag_dict(file_path):
     tag_dict = defaultdict(dict)
-    for s in BrownReader(file_path,encoding="utf-8"):
+    for s in BrownReader(file_path):
         for wd,tag in s:
             tag_dict[wd][tag] = 1
     return tag_dict
@@ -1120,7 +1122,7 @@ def tag_dict(file_path):
 
 def word_list(file_path,t=5):
     word_ct = {}
-    for s in BrownReader(file_path,encoding="utf-8"):
+    for s in BrownReader(file_path):
         for wd,tag in s:
             word_ct[wd] =  word_ct.get(wd,0) + 1
     filtered_wd_list = {} 
diff --git a/bin/build_tag_dict.py b/bin/build_tag_dict.py
index fd5811fb0265e9d31d87c8ded17ec2063eee2378..bd787bf3573f1c68a961da0464dbf1fadaab4802 100644
--- a/bin/build_tag_dict.py
+++ b/bin/build_tag_dict.py
@@ -52,6 +52,8 @@ class BrownReader():
             wasCapOnly = CAPONLYLINE_RE.match(line)
         token_list = []
         for item in line.split(' '):
+            if item == '':
+                continue
             wdtag = WD_TAG_RE.match(item)
             if (wdtag):
                 wd,tag = wdtag.groups()
diff --git a/bin/eval_brown.py b/bin/eval_brown.py
index e89b8880278dda7506a639c059cc70a827a7131f..2fa49a89228c828b1e18789deaeff0ea00b9fa17 100644
--- a/bin/eval_brown.py
+++ b/bin/eval_brown.py
@@ -144,9 +144,9 @@ class AccuracySink(ResultSink):
         print "Confusion matrix"
         print
         print '\tas'
-        print 'is\t',"\t".join(self.tags.keys())
+        print 'is\t',"\t".join(self.tags.keys()).encode('utf8')
         for tag1 in self.tags.keys():
-            print tag1,'\t',
+            print tag1.encode('utf8'),'\t',
             for tag2 in self.tags.keys():
                 print self.matrix.get((tag1,tag2),0),'\t',
             print            
@@ -177,7 +177,7 @@ class AccuracySink(ResultSink):
             f = 0.0
             if r+p:
                 f = (2*r*p)/(r+p)
-            print "%10s | %10s %10s %10s | %10s %10s %10s" %(tag, round(r,3), round(p,3), round(f,3), corr, pred, total)
+            print "%10s | %10s %10s %10s | %10s %10s %10s" %(tag.encode('utf8'), round(r,3), round(p,3), round(f,3), corr, pred, total)
         print "-"*80
         return
     
@@ -197,7 +197,7 @@ def compare_files( gold_file, pred_file, sink, known_words={}, quiet=True, encod
                 pwd,ptag = pred_s[i]
             except IndexError:
                 ptag = "UNK"
-                print >> sys.stderr, "Warning: Missing prediction for Sentence #%s, token %s" %(s_ct,i)
+                print >> sys.stderr, "Warning: Missing prediction for Sentence #%s, token %s" %(s_ct,i.encode('utf8'))
             # update sinks
             sink.update(gwd,gtag,ptag,known_words)
             # errors
diff --git a/configure.ac b/configure.ac
index 565ed571c46a02f17e35db58d31642e914f7d711..00b082bb6609a8cdfe0c6b047997605de88064e2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -12,11 +12,6 @@ if test "x$PERL" = x; then
 	AC_MSG_ERROR(perl not found)
 fi
 
-# look for dicos
-# AC_PATH_ALEXINA
-# AC_PATH_LEFFF
-# AM_CONDITIONAL(HAVE_LEFFF, test -n "$lefffdir")
-
 AC_CONFIG_FILES(	data/Makefile
 			bin/Makefile
 			pkgpythonlib/Makefile
diff --git a/data/Makefile.am b/data/Makefile.am
index 8202e74bfc0c807133943da8649ad26351de2a2c..b92b487306c4412b9e622295ac12efab3668c219 100644
--- a/data/Makefile.am
+++ b/data/Makefile.am
@@ -189,8 +189,7 @@ lefff.ftb4tags: update_lefff_data
 %.reg: %
 	cat $* | perl ../bin/lexicon2features.pl > $*.data
 	cut -f1,2 $*.data > $*.data.1,2
-	megam.opt -nc -maxi 20 multiclass $*.data.1,2 > $*.model
-#	megam.opt -nc -maxi 20 multitron $*.data.1,2 > $*.model
+	megam.opt -nc -maxi 100 -repeat 5 multiclass $*.data.1,2 > $*.model
 	cat $* | perl -pe "s/ /_/g" > $*.us
 	cat $*.data | perl ../bin/megam_predict.pl $*.model | perl ../bin/pred2reg.pl | paste $* - | cut -f1,2,3,6 > $@
 
diff --git a/data/lex2ftb4cats.pl b/data/lex2ftb4cats.pl
index fc89dc0577f4967f3762222435c455a4e736462c..f03d7b5938bfe0928717129bba9d366796d1f60d 100755
--- a/data/lex2ftb4cats.pl
+++ b/data/lex2ftb4cats.pl
@@ -7,6 +7,63 @@ use utf8;
 
 # cat /usr/local/share/enlex/?.lex /usr/local/share/enlex/ponct.lex /usr/local/share/enlex/{lostandfound,addons}.lex | recode l1..u8 | grep -v "@?" | perl data/lex2ftb4cats.pl -l en -ms -o data/enlex_ms
 
+# cat /usr/local/share/enlex/?.lex /usr/local/share/enlex/ponct.lex /usr/local/share/enlex/{lostandfound,addons}.lex | recode l1..u8 | grep -v "@?" | perl lex2ftb4cats.pl -l en -ms -o enlex.ms.meltlex
+# cat /usr/local/share/enlex/?.lex /usr/local/share/enlex/ponct.lex /usr/local/share/enlex/{lostandfound,addons}.lex | recode l1..u8 | grep -v "@?" | perl lex2ftb4cats.pl -l en -nms -o enlex.meltlex
+
+# cat /usr/local/share/delex/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l de -ms -o delex.ms.meltlex
+# cat /usr/local/share/delex/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l de -nms -o delex.meltlex
+
+# cat /usr/local/share/leffe/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l es -ms -o leffe.ms.meltlex
+# cat /usr/local/share/leffe/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l es -nms -o leffe.meltlex
+
+# cat /usr/local/share/saldo/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l sv -ms -o saldo.ms.meltlex
+# cat /usr/local/share/saldo/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l sv -nms -o saldo.meltlex
+
+# cat /usr/local/share/sloleks/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l si -ms -o sloleks.ms.meltlex
+# cat /usr/local/share/sloleks/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l si -nms -o sloleks.meltlex
+# cat /usr/local/share/sloleks/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l si -lms -o sloleks.ms2.meltlex
+
+# cat /usr/local/share/lefff/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l fr -ms -o lefff.ms.meltlex
+# cat /usr/local/share/lefff/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l fr -nms -o lefff.meltlex
+# cat /usr/local/share/lefff/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l fr -lms -o lefff.ms2.meltlex
+
+# cat /usr/local/share/multext-east-4-0-ro/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l ro -ms -o multext-east-4-0-ro.ms.meltlex
+# cat /usr/local/share/multext-east-4-0-ro/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l ro -nms -o multext-east-4-0-ro.meltlex
+# cat /usr/local/share/multext-east-4-0-ro/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l ro -lms -o multext-east-4-0-ro.ms2.meltlex
+
+# cat /usr/local/share/multext-east-4-0-et/*.lex | perl lex2ftb4cats.pl -l et -ms -o multext-east-4-0-et.ms.meltlex
+# cat /usr/local/share/multext-east-4-0-et/*.lex | perl lex2ftb4cats.pl -l et -nms -o multext-east-4-0-et.meltlex
+# cat /usr/local/share/multext-east-4-0-et/*.lex | perl lex2ftb4cats.pl -l et -lms -o multext-east-4-0-et.ms2.meltlex
+
+# cat /usr/local/share/multext-east-4-0-bg/*.lex | perl lex2ftb4cats.pl -l bg -ms -o multext-east-4-0-bg.ms.meltlex
+# cat /usr/local/share/multext-east-4-0-bg/*.lex | perl lex2ftb4cats.pl -l bg -nms -o multext-east-4-0-bg.meltlex
+# cat /usr/local/share/multext-east-4-0-bg/*.lex | perl lex2ftb4cats.pl -l bg -lms -o multext-east-4-0-bg.ms2.meltlex
+
+# cat /usr/local/share/multext-east-4-0-hu/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l hu -ms -o multext-east-4-0-hu.ms.meltlex
+# cat /usr/local/share/multext-east-4-0-hu/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l hu -nms -o multext-east-4-0-hu.meltlex
+# cat /usr/local/share/multext-east-4-0-hu/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l hu -lms -o multext-east-4-0-hu.ms2.meltlex
+
+# cat /usr/local/share/delex/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l de -ms -o delex.ms.meltlex
+# cat /usr/local/share/delex/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l de -nms -o delex.meltlex
+
+# cat /usr/local/share/inmdb/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l ga -ms -o inmdb.ms.meltlex
+# cat /usr/local/share/inmdb/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l ga -nms -o inmdb.meltlex
+
+# cat /usr/local/share/leffga/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l gl -ms -o leffga.ms.meltlex
+# cat /usr/local/share/leffga/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l gl -nms -o leffga.meltlex
+
+# cat /usr/local/share/dela_gr/*.lex | perl lex2ftb4cats.pl -l el -ms -o dela_gr.ms.meltlex
+# cat /usr/local/share/dela_gr/*.lex | perl lex2ftb4cats.pl -l el -nms -o dela_gr.meltlex
+
+# cat /usr/local/share/pollex/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l sk -ms -o pollex.ms.meltlex
+# cat /usr/local/share/pollex/*.lex | recode l2..u8 | perl lex2ftb4cats.pl -l sk -nms -o pollex.meltlex
+
+# cat /usr/local/share/enlex/?.lex  /usr/local/share/enlex/ponct.lex /usr/local/share/enlex/{lostandfound,addons}.lex | recode l1..u8 | grep -v "@?" | perl lex2ftb4cats.pl -l en -ms -o enlex.ms.meltlex
+# cat /usr/local/share/enlex/?.lex  /usr/local/share/enlex/ponct.lex /usr/local/share/enlex/{lostandfound,addons}.lex | recode l1..u8 | grep -v "@?" | perl lex2ftb4cats.pl -l en -nms -o enlex.meltlex
+
+# cat /usr/local/share/morph_it/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l it -ms -o morph_it.ms.meltlex
+# cat /usr/local/share/morph_it/*.lex | recode l1..u8 | perl lex2ftb4cats.pl -l it -nms -o morph_it.meltlex
+
 
 $output_file = "lefff.ftb4tags";
 $stdout = 0;
@@ -294,7 +351,7 @@ unless ($all) {
   }
   while (<TMP>) {
     chomp;
-    /^(.*?)\t(.*?)((?:__.*?)?)\t(.*)$/ || die;
+    /^(.*?)\t(.*?)((?:__.*?)?)\t(.*)$/ || die $_;
     $f = $1;
     $cat = $2;
     $synt = $3;
diff --git a/data/normalization/en/ngrams b/data/normalization/en/ngrams
index 89322bc0fb769535bd7103a4c9e75c88a87ad219..2d80d217d59cbbf8fe1c528cd0934e049f1c0c5c 100644
--- a/data/normalization/en/ngrams
+++ b/data/normalization/en/ngrams
@@ -25,8 +25,7 @@
 456	alot	aÂ lot
 426	pls	please
 408	ng	ng
-382	gon	gon
-328	rly	rly
+328	rly	really
 326	mmbtu	mmbtu
 279	mws	mws
 263	approx.	approximately
@@ -61,7 +60,7 @@
 110	didnt	didÂ n't
 110	gty	gty
 107	whÂ =Â en	when
-105	recieved	recieved
+105	recieved	received
 104	thurs	thursday
 103	tommorrow	tomorrow
 100	passcode	code
@@ -84,7 +83,7 @@
 82	ent	ent
 81	([^ {}]+)iÂ =Â ze	$1ize
 81	mid-	mid-
-79	recieve	recieve
+79	recieve	receive
 79	([^ {}]+)Â =Â ry	$1ry
 78	conf.	conf.
 78	rebook	rebook
@@ -95,7 +94,7 @@
 75	co-operation	cooperation
 73	cashout	cash-out
 72	mistransmission	transmission
-70	plc	plc
+70	plc	company
 69	chnages	changes
 69	non-refundable	refundable
 67	username	name
@@ -105,10 +104,10 @@
 65	demarc	demarc
 64	re-send	re-send
 64	non-binding	non-binding
-63	tx	tx
-62	seperate	seperate
+63	tx	thanks
+62	seperate	separate
 62	post-id	post-id
-62	plz	plz
+62	plz	please
 61	undercollections	collections
 60	taht	that
 60	pre-petition	petition
@@ -214,3 +213,17 @@
 0	&Â #Â 039Â ;Â ve	've
 0	&Â #Â 039Â ;Â d	'd
 0	&Â #Â (\d+)Â ;	&#$1;
+0	w/o	without
+0	they ain't	they areÂ n't
+0	there ain't	there areÂ n't
+0	They ain't	They areÂ n't
+0	There ain't	There areÂ n't
+0	ain't I	amÂ n't I
+0	I ain't	I amÂ n't
+0	i ain't	I amÂ n't
+0	ain't	isÂ n't
+0	wanna	want to
+0	gonna	going to
+0	ain't	isÂ n't
+0	Gonna	GoingÂ to
+0	Wanna	WantÂ to
diff --git a/data/normalization/fr/ngrams b/data/normalization/fr/ngrams
index 8ab2c4f2ea53f1cfd8506958664b6f04fcc9e24f..b055d10bf30793ed00f82670c137b23d3e56aa02 100644
--- a/data/normalization/fr/ngrams
+++ b/data/normalization/fr/ngrams
@@ -1105,13 +1105,13 @@ ek twa		19
 7 aprem		19
 Slt c	salut c'Â est	19
 gro bizou		19
-ala maizon		19
-a taleur		19
+ala maizon	Ã Â la maison	19
+a taleur	Ã  toutÂ Ã Â l'Â heure	19
 Slt sava	salut Ã§aÂ va	19
 rien .		19
 Ou lÃ©		19
 chÃ©rie !		19
-bone soirÃ©		19
+bone soirÃ©	bonne soirÃ©e	19
 le match		19
 pr twa		19
 ton num		19
@@ -1711,7 +1711,7 @@ rÃ©p		14
 Kosa		14
 malad	malade	14
 ? alor		14
-me bÃ©gnÃ©		14
+me bÃ©gnÃ©	baignais	14
 ds ma		14
 fai ?		14
 lui a		14
@@ -1925,7 +1925,7 @@ la ou		13
 mi tÃ©		13
 bien sur		13
 Bsr	bonsoir	13
-je voulÃ©		13
+je voulÃ©	je voulais	13
 parti a		13
 stp ,		13
 Cava	Ã‡aÂ va	13
@@ -2285,10 +2285,9 @@ Koman	Comment	12
 koi toi ?	quoi toi ?	12
 ma mÃ¨r	ma mÃ¨re	12
 pu d		12
-o moin		12
+o moin	au moins	12
 suis a la	suis Ã  la	12
 pa entendu		12
-soirÃ© .		12
 di rien		12
 fr koi		12
 chance pr		12
@@ -2540,7 +2539,7 @@ d choz		11
 soirÃ¨	soirÃ©e	11
 ce mat		11
 g +		11
-toi sava		11
+toi sava	toi Ã§aÂ va	11
 Merci a		11
 JE	Je	11
 JespÃ¨r	j'Â espÃ¨re	11
@@ -2554,63 +2553,63 @@ je rentre		11
 et mi		11
 jtai	jeÂ t'Â ai	11
 Sui	Suis	11
-ms bon		11
-on vera		11
+ms bon	mais bon	11
+on vera	on verra	11
 a toi et	Ã  toi et	11
-1 truc		10
+1 truc	un truc	10
 ai pa		10
 sa pa		10
-fÃ©r		10
+fÃ©r	faire	10
 je vous		10
-dsl g		10
+dsl g	dÃ©solÃ© j'Â ai	10
 c tou	c'Â est tout	10
 dsl	dÃ©solÃ©	10
 bonne journÃ©		10
 _ACC_F lÃ©		10
 ce jour		10
-ki te		10
-gt en		10
+ki te	qui te	10
+gt en	j'Â Ã©tais en	10
 gros bisou		10
-jc ke		10
-i ve		10
-pensÃ© a		10
-bne fete		10
+jc ke	jeÂ sais que	10
+i ve	il veut	10
+pensÃ© a	pensÃ© Ã 	10
+bne fete	bonne fÃªte	10
 TfÃ©	TuÂ fais	10
-c po		10
+c po	c'Â est pas	10
 ? _SENT_BOUND		10
-si ca		10
+si ca	si Ã§a	10
 _ACC_F lol		10
 me prendre		10
-Ã© moi		10
+Ã© moi	et moi	10
 se voit		10
 prenom	prÃ©nom	10
-trÃ¨ for		10
+trÃ¨ for	trÃ¨s fort	10
 pa avoir		10
-a ki		10
+a ki	Ã  qui	10
 le devoir		10
 zuma		10
-ki vien		10
-en tt		10
+ki vien	qui vient	10
+en tt	en tout	10
 svoi		10
 toi oci	toi aussi	10
 emmene	emmÃ¨ne	10
 anne	annÃ©e	10
 kosa ou		10
 biz !		10
-mÃ© el		10
+mÃ© el	mais elle	10
 momt	moment	10
 x		10
 moin .		10
 2 ma		10
 pour une		10
 pa ton		10
-Bne soirÃ©e		10
+Bne soirÃ©e	Bonne soirÃ©e	10
 Ã© d		10
 ben g		10
-c bn		10
+c bn	c'Â est bon	10
 Papa		10
 maman Ã©		10
-etwa ?		10
+etwa	etÂ toi	10
 ek sa		10
 sa a		10
 Hey !		10
@@ -2621,28 +2620,27 @@ un moune		10
 toi ta		10
 . tu va	. tu vas	10
 t ek		10
-nimporte koi		10
+nimporte koi	n'Â importe quoi	10
 tro cool		10
-a coz		10
+a coz	Ã  cause	10
 Snif		10
 gro bizou	gros bisous	10
-Coucou c		10
+Coucou c	Coucou c'Â est	10
 etwa	etÂ toi	10
 peu .		10
 J espÃ¨re que	J' espÃ¨re que	10
-chÃ© mwa		10
+chÃ© mwa	chez moi	10
 aime ma		10
-ce sOir		10
-A ouÃ©		10
-Tu a		10
-kan c		10
+ce sOir	ce soir	10
+A ouÃ©	ah ouais	10
+Tu a	Tu as	10
+kan c	quand c'Â est	10
 c chian	c'Â est chiant	10
-jpense a		10
+jpense a	jeÂ pense Ã 	10
 toute la		10
 pa ni		10
 kelkun	quelqu'un	10
-WE		10
-javÃ© oubliÃ©		10
+WE	week-end	10
 OuÃ¨	Oui	10
 tro !		10
 el !		10
@@ -3748,20 +3746,20 @@ news .		8
 Kmt		8
 bÃ©gnÃ© Ã©		8
 dit ke		8
-jmen		8
+jmen	je m'Â en	8
 ma chambre		8
 ke je v	que je vais	8
-enf1		8
+enf1	enfin	8
 veu pa		8
 c mol		8
-komence		8
+komence	commence	8
 peur !		8
 pa tro tar	pas trop tard	8
 ? bien		8
 aussi je		8
 bien la		8
 cÂ t	c'Â Ã©tait	8
-pa kome		8
+pa kome	pas comme	8
 kan il		8
 recu .		8
 soir Ã©		8
@@ -3785,6 +3783,7 @@ Anz !		8
 fac !		8
 film !		8
 t !		8
+presk	presque	
 preske	presque	8
 ptite	petite	8
 ma fait	m'Â a fait	8