Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 89be5dc8 authored by Benoît Sagot's avatar Benoît Sagot
Browse files

one more feature + several commented (tried) features

git-svn-id: https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/metagger/trunk@2796 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4
parent 7d181c7f
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@ import re
# regexes for word form feature computation
number = re.compile("\d")
hyphen = re.compile("\-")
underscore = re.compile("_")
upper = re.compile("[A-Z]")
allcaps = re.compile("^[A-Z]+$")
......@@ -63,25 +64,33 @@ class Instance:
self.add('pref2',word[:2])
self.add('pref3',word[:3])
self.add('pref4',word[:4])
# self.add('pref5',word[:5]) dégrade
self.add('suff1',word[-1])
self.add('suff2',word[-2:])
self.add('suff3',word[-3:])
self.add('suff4',word[-4:])
# self.add('suff5',word[-5:]) dégrade
#### word form features
self.get_wf_features(word, index)
#### lefff tags
if word in lefff:
self.add_lefff_features(word,lefff)
# else:
# self.add("lefff","unknown") dégrade
return
def get_conx_features(self,index,tokens,lefff,w=2):
# for p in range(-w,w+1):
for p in range(-w,w+1):
if p == 0: continue # skip current wd
absp = index+p
word = self.get_conx_wd(tokens,absp)
#### lexical features
self.add('wd%s' %p,word)
# self.add( 'nb%s' %p, number.search(word) != None)
uc = upper.search(word)
self.add( 'uc%s' %p, uc != None)
#### lefff tags
if word in lefff:
self.add_lefff_features(word,lefff,feat_suffix=p)
......@@ -91,10 +100,12 @@ class Instance:
def get_wf_features(self, word, index):
self.add( 'nb', number.search(word) != None )
self.add( 'hyph', hyphen.search(word) != None )
# self.add( 'underscore', underscore.search(word) != None )
uc = upper.search(word)
self.add( 'uc', uc != None)
self.add( 'niuc', uc != None and index > 0)
self.add( 'auc', allcaps.match(word) != None)
# self.add( 'niauc', allcaps.match(word) != None and index > 0)
return
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment