diff --git a/bin/MElt_tagger.py.in b/bin/MElt_tagger.py.in
index dc5692ceeb26ee144c8ea61dcd57696afde98e04..8c9c406552efa615ad0f084864f37a4aca23e34b 100644
--- a/bin/MElt_tagger.py.in
+++ b/bin/MElt_tagger.py.in
@@ -93,7 +93,6 @@ outfile = codecs.getwriter(options.encoding)(sys.stdout)
 if options.output_file:
     outfile = codecs.open( options.output_file, "w", options.encoding )
 if options.ZH:
-    import sxparser as sxp
     from DagInstance import DagInstance
 
 
@@ -278,7 +277,7 @@ class POSTagger:
         # print >> sys.stderr, "Best tok seq:", [(t.string,t.label) for t in best_sequence]
         return best_sequence
 
-    def tag_token_dag_v2(self, tokens, feat_options={}, beam_size=3):
+    def tag_token_dag_v2(self, tokens, feat_options={}, beam_size=5):
         ''' N-best breath search for the best tag sequence for each sentence'''
         #Dag mode only allowed with window of length 2
         assert(feat_options.get('win',2) == 2)
@@ -323,10 +322,12 @@ class POSTagger:
                                  cache=self.cache )
                         inst.fv = cached_inst.fv[:]
                         inst.get_sequential_features()
+                        # print "debug",token.string.encode("utf8")," ".join(sorted(inst.fv)).encode("utf8")
                         label_pr_distrib = self.classifier.class_distribution(inst.fv)
                         # extend sequence j with current token
                         for (cl,pr) in label_pr_distrib:
                             # make sure that cl is a legal tag
+                            #print cl, pr, token.string.encode("utf8")
                             if legit_tags1 or legit_tags2:
                                 if (cl not in legit_tags1) and (cl not in legit_tags2):
                                     continue
@@ -499,7 +500,7 @@ class POSTagger:
                     tokens.append( token )
             tagged_tokens = self.tag_token_dag_v2( tokens,
                                                      feat_options=feat_options,
-                                                     beam_size=beam_size )
+                                                     beam_size=2*beam_size )
             if (print_probas):
                 tagged_sent = " ".join( [tok.__pstr__() for tok in tagged_tokens] )
             else:
@@ -598,9 +599,26 @@ class WeightedReader(CorpusReader):
         return (w,tokens)
 ###########################DAGReader#######################
 
-from sxparser import sxyacc
+class DagToken():
+    def __init__(self,form,pos="UNK",sem=None,com=None):
+        self.forme = form
+        self.semantique = sem
+        self.commentaire = com
+        self.pos = pos
+    def to_string(self):
+        acc = u""
+        if self.commentaire:
+            acc += "{%s} " % (self.commentaire,)
+        acc += "%s__%s " % (self.forme,self.pos)
+        if self.semantique:
+            acc += "[|%s|] " % (self.semantique,)
+        return acc
+
 
 class DAGReader(CorpusReader):
+    """
+    reads a corpus written as a sequence of udags
+    """
     def __init__(self,infile, encoding='utf8'):
         if type(infile) == str or type(infile) == unicode :
             self.stream = codecs.open(infile,'r',encoding)
@@ -608,19 +626,31 @@ class DAGReader(CorpusReader):
         else :
             self.allowSeek = False #(peut être stdin)
             self.stream = infile
+        self.re_token = rex = re.compile(r"(?P<com>\{[^\}]+})? (?P<tok>[^ ]+) (?P<sem>\[\|[^\|]+\|\])?")
         return
         
     def __iter__(self):
         return self
     def next(self):
-        line = self.stream.readline()
-        if (line == ''):
-            if self.allowSeek:
-                self.stream.seek(0)
+        dag = []
+        line = self.stream.readline().strip()
+        if(line != "##DAG BEGIN"):
             raise StopIteration
-        dag = sxyacc.parse(line.strip())
-        l = sxp.fsa_of_dag(dag)
-        return l
+        line = self.stream.readline()
+        while line != "##DAG END":
+            (source, token_desc, target) = line.split("\t")
+            match = self.re_token.match(token_desc)
+            if match is None:
+                print "problem with", line
+                raise StopIteration
+            token = DagToken(match.group('tok'),com=match.group('com'),sem=match.group('sem'))
+            dag.append((int(source) - 1,  int(target) - 1, token))
+            line = self.stream.readline().strip()
+            if (line == ''):
+                if self.allowSeek:
+                    self.stream.seek(0)
+                raise StopIteration
+        return dag
     
 
 ############################ my_token.py ############################
@@ -628,7 +658,7 @@ class DAGReader(CorpusReader):
 
 class Token:
     def __init__(self, string=None, wasCap=0, pos=None, label=None, proba=None, comment=None, label_pr_distrib=[],index=None,position=None):
-        if type(string) is tuple and isinstance(string[2],sxp.Token) : #DAG
+        if type(string) is tuple and isinstance(string[2],DagToken) : #DAG
             self.string = string[2].forme
             self.position = tuple(string[0:2])
             self.tokobj = string[2]
@@ -663,7 +693,8 @@ class Token:
             r += "%s__%s" %(self.string,self.label)
             if self.tokobj.semantique != "":
                 r += "[|%s|] " %(self.tokobj.semantique,)
-            return r
+            #return r
+            return "%s%s/%s" %(self.comment, self.string, self.label)
         if (self.wasCap):
             return "%s%s/%s" %(self.comment,self.string.upper(),self.label)
         else:
@@ -991,20 +1022,22 @@ class Instance:
         # word string-based features
         if word in self.cache:
             # if wd has been seen, use cache
-            self.add_cached_features(self.cache[word])
+            pass
+            #self.add_cached_features(self.cache[word])
         else:
+            pass
             # word string
-            self.add('wd',word)
+            # NOTE: disabled for PACTE: self.add('wd',word)
             # suffix/prefix
-            wd_ln = len(word)
-            if pln > 0:
-                for i in range(1,pln+1):
-                    if wd_ln >= i:
-                        self.add('pref%i' %i, word[:i])
-            if sln > 0:
-                for i in range(1,sln+1):
-                    if wd_ln >= i:
-                        self.add('suff%i' %i, word[-i:], val)
+            #wd_ln = len(word)
+            #if pln > 0:
+            #    for i in range(1,pln+1):
+            #        if wd_ln >= i:
+            #            self.add('pref%i' %i, word[:i])
+            #if sln > 0:
+            #    for i in range(1,sln+1):
+            #        if wd_ln >= i:
+            #            self.add('suff%i' %i, word[-i:], val)
         # regex-based features
         self.add( 'nb', number.search(word) != None )
         self.add( 'hyph', hyphen.search(word) != None )
diff --git a/pkgpythonlib/sxlexer.py b/pkgpythonlib/sxlexer.py
index 3545a923c4d21070eb830f9d04f56c6005758be6..319ba1923aa7224d1690d0620b8b698319ab3de8 100644
--- a/pkgpythonlib/sxlexer.py
+++ b/pkgpythonlib/sxlexer.py
@@ -6,7 +6,8 @@ tokens = [ 'WORD','SEM_G', 'SEM_D']
 t_ignore = ' '
 t_SEM_G = r'\[\|'
 t_SEM_D = r'\|\]'
-t_WORD = r'[^_{}[\]\|\(\) ]+'
+t_WORD = r'[^{}[\]\|\(\) ]+'
+#t_WORD = r'[^_{}[\]\|\(\) ]+'
 
 def t_error(t):
     t.type = t.value[0]
@@ -14,7 +15,8 @@ def t_error(t):
     t.lexer.skip(1)
     return t
 
-literals = '_{}|()'
+literals = '{}|()'
+#literals = '_{}|()'
 
 lex.lex()
 
diff --git a/pkgpythonlib/sxparser.py b/pkgpythonlib/sxparser.py
index b7bc8bb10dbeb993c846d2d27417374e2b1a9818..6acd3ff3fa2ed337d563fd314dc34cf29b060e04 100644
--- a/pkgpythonlib/sxparser.py
+++ b/pkgpythonlib/sxparser.py
@@ -128,9 +128,13 @@ def p_no_pos(p):
     p[0] = ('Forme',{"Forme":p[1], "POS":""})
 
 def p_special_char(p):
-    '''special_char : '_' WORD
-                    | '_' WORD '_' WORD'''
-    p[0] = ('Forme',{"Forme": "".join(p[1:]), "POS":"escaped"})
+    '''special_char : special_char '_' WORD
+                    | '_' WORD'''
+    if len(p)>3:
+        p[1][1]["Forme"] += "".join(p[2:])
+        p[0] = ('Forme',p[1][1])
+    else:
+        p[0] = ('Forme',{"Forme": "".join(p[1:]), "POS":"escaped"})
 
 def p_wordlist(p):
     '''wordlist : wordlist WORD