Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 02ef556e authored by Pascal Denis's avatar Pascal Denis
Browse files

adding empty lefff

git-svn-id: https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/metagger/trunk@2700 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4
parent 0dc85a36
No related branches found
No related tags found
No related merge requests found
...@@ -2,26 +2,29 @@ ...@@ -2,26 +2,29 @@
AUTOMAKE_OPTIONS = foreign AUTOMAKE_OPTIONS = foreign
bin_SCRIPTS = build_word_list.py \ bin_SCRIPTS = build_word_list.py \
build_tag_dict.py \ build_tag_dict.py \
learning_curves.py \ learning_curves.py \
pos_tag.py \ pos_tag.py \
lemmatizer.pl \ lemmatizer.pl \
eval.py eval.py \
run_learning_curves_exp.py
CLEANFILES = build_word_list.py \ CLEANFILES = build_word_list.py \
build_tag_dict.py \ build_tag_dict.py \
learning_curves.py \ learning_curves.py \
pos_tag.py \ pos_tag.py \
lemmatizer.pl \ lemmatizer.pl \
eval.py eval.py \
run_learning_curves_exp.py
BUILT_SOURCES = build_word_list.py \ BUILT_SOURCES = build_word_list.py \
build_tag_dict.py \ build_tag_dict.py \
learning_curves.py \ learning_curves.py \
pos_tag.py \ pos_tag.py \
lemmatizer.pl \ lemmatizer.pl \
eval.py eval.py \
run_learning_curves_exp.py
EDIT = perl -pe "s|\@datadir\@|\$(pkgdatadir)|g; \ EDIT = perl -pe "s|\@datadir\@|\$(pkgdatadir)|g; \
s|\@libdir\@|\$(pkglibdir)|g; \ s|\@libdir\@|\$(pkglibdir)|g; \
......
#!/usr/bin/env python
import sys
import os
import re
import optparse
from metagger.pos_tagger import POSTagger
from metagger.result_sink import AccuracySink, compare_files
from metagger.utils import tag_dict, word_list
#### I/O
usage = "usage: %prog [options] <input_file>"
parser = optparse.OptionParser(usage=usage)
parser.add_option("-t", "--training", action="store", help="training data directory")
parser.add_option("-l", "--lefff", action="store", help="lefff directory")
parser.add_option("-v", "--values", action="store", help="different values for precision priors")
parser.add_option("-g", "--gold", action="store", help="gold file")
(options, args) = parser.parse_args()
infile = args[0]
# values = [1, 0.1, 0.01, 0.001, 10, 100]
values = [1]
if options.values:
values = map(int, options.values.split(','))
#### get training files
train_files = [ os.path.join( options.training, f ) for f in os.listdir( options.training ) if not f.startswith('.')]
train_files.sort( key=lambda x:int(os.path.getsize( x )) )
#### get lefff files
lefff_files = [ os.path.join( options.lefff, f ) for f in os.listdir( options.lefff ) if not f.startswith('.')]
lefff_files.sort( key=lambda x:int(os.path.getsize( x )) )
log_filename = "learning_curves_exp.log"
log_file = open(log_filename,'w')
print >> log_file, "%15s %15s %15s | %15s %15s" %("Train. size", "Lefff size", "Prior prec.", "Acc.", "Unk. Acc.")
log_file.flush()
#### set-up experiments
exp_ct = 0
for tf in train_files:
ftb_size = re.search(r"ftb4\+mergeC\+undocpd\_1\_(\d+)\.",tf).group(1)
tags = tag_dict( tf )
wds = word_list( tf )
for lf in lefff_files:
lefff_size = re.search(r"lefff\_(\d+)\.",lf).group(1)
file_infix = "tb%s-lefff%s" %(ftb_size,lefff_size)
for v in values:
exp_ct += 1
print >> sys.stderr, "Exp #%s: train size: %s, lefff size: %s, lambda: %s" %(exp_ct, ftb_size, lefff_size, v)
# intialize tagger
pos_tagger = POSTagger()
# load data
pos_tagger.tag_dict = tags
pos_tagger.known_words = wds
pos_tagger.load_lefff( lf )
# train model
modelfile = file_infix+'.megam'
pos_tagger.train_model( tf, prior_prec=1, model_path=modelfile )
# apply tagger
predfile = file_infix+'.pred'
pos_tagger.apply( infile, outfile=predfile )
# eval
sink = AccuracySink()
unk_sink = AccuracySink() # unkown words
compare_files( options.gold, predfile, sink, unk_sink, pos_tagger.known_words)
print >> log_file, "%15s %15s %15s | %15s %15s" %(ftb_size,lefff_size,v,sink.score(),unk_sink.score())
log_file.flush()
log_file.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment