From 9d9daaf7c7da2c0ff3e48c8934b5522f1628afc3 Mon Sep 17 00:00:00 2001 From: Pascal Denis <pascal.denis@alpage> Date: Thu, 23 Jul 2009 09:12:23 +0000 Subject: [PATCH] extra option for eval in pos_tag.py git-svn-id: https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/metagger/trunk@2727 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4 --- bin/eval.py.in | 7 ++++--- bin/pos_tag.py.in | 12 ++++++++---- src/metagger/megam_classifier.py.in | 6 ++++-- src/metagger/pos_tagger.py.in | 12 ++++++++---- src/metagger/result_sink.py.in | 8 ++++++-- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/bin/eval.py.in b/bin/eval.py.in index ce1a966..77df955 100644 --- a/bin/eval.py.in +++ b/bin/eval.py.in @@ -13,6 +13,7 @@ parser = optparse.OptionParser(usage=usage) parser.add_option("-g", "--gold", action="store", help="gold file") parser.add_option("-p", "--predictions", action="store", help="predictions file") parser.add_option("-d", "--dictionary", action="store", help="dictionary") +parser.add_option("-e", "--errors", action="store_true", help="show errors",default=False) (options, args) = parser.parse_args() if not (options.gold and options.predictions): @@ -33,7 +34,7 @@ unk_sink = AccuracySink() # unkown words ############ compare gold/pred files ######################################## -compare_files( options.gold, options.predictions, sink, unk_sink, known_words) +compare_files( options.gold, options.predictions, sink, unk_sink, known_words, quiet=not options.errors) ############ print out scores ######################################## @@ -41,8 +42,8 @@ sink.rpf() print sink.confusion() print -print "\nOverall Acc:", sink.score() -print "Unk wds Acc:", unk_sink.score() +print "\nOverall Acc: %s (%s/%s)" %(sink.score(),sink.correct,sink.total) +print "Unk wds Acc: %s (%s/%s)" %(unk_sink.score(),unk_sink.correct,unk_sink.total) diff --git a/bin/pos_tag.py.in b/bin/pos_tag.py.in index 4a4e9e9..4786b1e 100644 --- a/bin/pos_tag.py.in +++ b/bin/pos_tag.py.in @@ -5,7 +5,7 @@ import sys import codecs import optparse from metagger.pos_tagger import POSTagger - +from metagger.result_sink import AccuracySink, compare_files # Import Psyco if available try: @@ -25,7 +25,8 @@ parser.add_option("-p", "--prior_prec", action="store", help="set precision of g parser.add_option("-w", "--word_list", action="store", help="read in word_list", default='') parser.add_option("-d", "--tag_dict", action="store", help="read in tag dictionary", default='') parser.add_option("-l", "--lefff", action="store", help="read in Lefff DB", default='') -parser.add_option("-o", "--output_file", action="store", help="output file", default='') +parser.add_option("-o", "--output_file", action="store", help="output file", default='pos_tagger.out') +parser.add_option("-g", "--gold_file", action="store", help="reference file") (options, args) = parser.parse_args() infile = args[0] @@ -67,5 +68,8 @@ else: pos_tagger.apply( infile, beam_size=options.beam_size, outfile=options.output_file ) - - +############## eval ################################## +if options.gold_file: + sink = AccuracySink() + compare_files( options.gold_file, options.output_file, sink ) + print "Acc: %s (%s/%s)" %(sink.score(),sink.correct,sink.total) diff --git a/src/metagger/megam_classifier.py.in b/src/metagger/megam_classifier.py.in index dfcea61..a2d3f76 100755 --- a/src/metagger/megam_classifier.py.in +++ b/src/metagger/megam_classifier.py.in @@ -51,7 +51,7 @@ class MegamClassifier: def train( self, datafile, paramfile=tempfile.mktemp(), \ - prior_prec=1, repeat=4, maxit=100, bias=True, quiet=True ): + prior_prec=1, repeat=2, maxit=100, bias=True, quiet=True ): """ simple call to megam executable for multiclass classification with some relevant options: @@ -79,7 +79,9 @@ class MegamClassifier: #rc = os.spawnv(os.P_WAIT, megam_exec_path, proc) #if rc == 127: # raise Exception("Error while trying to execute "+" ".join(proc)) - os.system( " ".join(proc) ) + proc_str = " ".join(proc) + print >> sys.stderr, proc + os.system( proc_str ) print >> sys.stderr, "Megam parameters dumped into file %s" %self.paramfile # load model from output param file self.load_model() diff --git a/src/metagger/pos_tagger.py.in b/src/metagger/pos_tagger.py.in index 52b8143..28dc285 100755 --- a/src/metagger/pos_tagger.py.in +++ b/src/metagger/pos_tagger.py.in @@ -121,13 +121,15 @@ class POSTagger: return best_sequence - def apply(self, infile, beam_size=3, outfile='', encoding='latin-1'): + def apply(self, infile, beam_size=3, outfile='pos_tagger.out', encoding='latin-1'): + print >> sys.stderr, "Applying tagger on %s" %infile # open output file - out = sys.stdout - if outfile: - out = codecs.open( outfile, 'w', encoding ) + out = codecs.open( outfile, 'w', encoding ) # process sentences + s_ct = 0 for line in codecs.open( infile, 'r', encoding ): + s_ct += 1 + os.write(1, "%s" %"\b"*len(str(s_ct))+str(s_ct)) wds = line.strip().split() tokens = [] for wd in wds: @@ -137,6 +139,8 @@ class POSTagger: # print tagged sentence to output file tagged_sent = " ".join( [tok.__str__() for tok in tagged_tokens] ) print >> out, tagged_sent + # close file + out.close() return diff --git a/src/metagger/result_sink.py.in b/src/metagger/result_sink.py.in index 8b2afc7..8b70203 100644 --- a/src/metagger/result_sink.py.in +++ b/src/metagger/result_sink.py.in @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import sys from metagger.corpus_reader import BrownReader class ResultSink: @@ -85,7 +86,7 @@ class AccuracySink(ResultSink): -def compare_files( gold_file, pred_file, sink, unk_sink, known_words={}): +def compare_files( gold_file, pred_file, sink, unk_sink=None, known_words={}, quiet=True): gold = BrownReader( gold_file ) pred = BrownReader( pred_file ) s_ct = 0 @@ -102,7 +103,10 @@ def compare_files( gold_file, pred_file, sink, unk_sink, known_words={}): print >> sys.stderr, "Warning: Missing prediction for Sentence #%s" %s_ct # update sinks sink.update(gtag,ptag) - if not gwd in known_words: + if not quiet: + if ptag<>gtag: + print >> sys.stderr, "%s: %s <==> *%s" %(gwd,gtag,ptag) + if unk_sink and not gwd in known_words: unk_sink.update(gtag,ptag) return -- GitLab