Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 00713268 authored by Pascal Denis's avatar Pascal Denis
Browse files

changes to *.in files

git-svn-id: https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/metagger/trunk@2686 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4
parent 385def6a
No related branches found
No related tags found
No related merge requests found
......@@ -5,17 +5,20 @@ AUTOMAKE_OPTIONS = foreign
bin_SCRIPTS = build_word_list.py \
build_tag_dict.py \
learning_curves.py \
pos_tag.py
pos_tag.py \
eval.py
CLEANFILES = build_word_list.py \
build_tag_dict.py \
learning_curves.py \
pos_tag.py
pos_tag.py \
eval.py
BUILT_SOURCES = build_word_list.py \
build_tag_dict.py \
learning_curves.py \
pos_tag.py
pos_tag.py \
eval.py
EDIT = perl -pe "s|\@alexinadir\@|$(alexinadir)|g;\
s|\@datadir\@|\$(pkgdatadir)|g; \
......
File moved
File moved
......@@ -12,7 +12,6 @@ from collections import defaultdict
from metagger.mytoken import Token
from metagger.megam_classifier import MegamClassifier
from metagger.instance import Instance
from metagger.result_sink import AccuracySink
......@@ -40,7 +39,7 @@ class POSTagger:
def train_model(self,_file):
def train_model(self,_file, model_path="./model.megam"):
print >> sys.stderr, "Generating training data..."
train_inst_file = self.generate_training_data(_file)
print >> sys.stderr, "Data file: %s" %train_inst_file
......@@ -82,7 +81,7 @@ class POSTagger:
def tag_token_sequence(self, tokens):
''' N-best breath search for the best tag sequence for each sentence'''
# maintain N-best sequences of tagged tokens
sequences = [([],0.0)] # log prob.
sequences = [([],0.0)] # log prob.
for i in range(len(tokens)):
token = tokens[i]
n_best_sequences = []
......@@ -119,20 +118,20 @@ class POSTagger:
return best_sequence
def apply(self, _file, sink, outfile=''):
def apply(self, _file, outfile=''):
# open output file
out = sys.stdout
if outfile:
out = codecs.open( outfile, 'w' )
# process sentences
for line in codecs.open(_file, 'r', 'latin-1'):
line = line.strip()
tokens = []
for item in line.split():
token = Token()
token.set( item )
token = Token( string=item )
tokens.append( token )
tagged_tokens = self.tag_token_sequence( tokens )
for t in tagged_tokens:
sink.update(t.pos,t.label,t.label_pr_distrib)
out = sys.stdout
if outfile:
out = codecs.open( outfile, 'w' )
# print tagged sentence to output file
tagged_sent = " ".join( [tok.__str__() for tok in tagged_tokens] )
print >> out, tagged_sent
return
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment