diff --git a/bin/baseline_tagger.pl b/bin/baseline_tagger.pl
new file mode 100755
index 0000000000000000000000000000000000000000..74ef67a29737fbfac18099077043046afd02c195
--- /dev/null
+++ b/bin/baseline_tagger.pl
@@ -0,0 +1,70 @@
+#!/usr/bin/perl
+
+$training_corpus = shift || die "Please provide a training corpus";
+
+$lefff = shift || "";
+
+open (TRAIN, "<$training_corpus") || die "Could not open traning corpus";
+while (<TRAIN>) {
+  chomp;
+  s/^ +//;
+  s/ +$//;
+  for (split / +/, $_) {
+    /^(.*)\/(.*)$/ || die "Format error: $_";
+    $form_cat2occ{$1}{$2}++;
+    $cat2occ{$2}++;
+  }
+}
+if ($lefff ne "") {
+  open (LEFFF, "<$lefff") || die "Could not open $lefff: $!";
+  while (<LEFFF>) {
+    s/ /_/g;
+    s/(\S)-(\S)/\1_-_\2/g;
+    /^(.*?)\t(.*?)\t/;
+    $lefff{$1}{$2}=1;
+  }
+}
+
+
+for $form (keys %form_cat2occ) {
+  for $cat (keys %{$form_cat2occ{$form}}) {
+    if (!defined($form2baseline_cat_occ{$form}) || $form2baseline_cat_occ{$form} < $form_cat2occ{$form}{$cat}) {
+      $form2baseline_cat_occ{$form} = $form_cat2occ{$form}{$cat};
+      $form2baseline_cat{$form} = $cat;
+    }
+  }
+}
+@ordered_cats = sort {$cat2occ{$b} <=> $cat2occ{$a}} keys %cat2occ;
+for $form (keys %lefff) {
+  for $cat (@ordered_cats) {
+    if (defined ($lefff{$form}{$cat})) {
+      $disambiguated_lefff{$form} = $cat;
+      last;
+    }
+  }
+}
+$best_cat = $ordered_cats [0];
+
+%form2baseline_cat_occ = ();
+
+while (<>) {
+  chomp;
+  s/^ +//;
+  s/ +$//;
+  $line = "";
+  for (split / +/, $_) {
+    s/^(.*)\/(.*)$/\1/;
+    $form = $1;
+    if (defined $form2baseline_cat{$form}) {
+      $line .= $form."/".$form2baseline_cat{$form}." ";
+    } elsif ($lefff ne "" && defined ($disambiguated_lefff{$form})) {
+      $line .= $form."/".$disambiguated_lefff{$form}." ";
+    } elsif ($lefff ne "" && defined ($disambiguated_lefff{lc($form)})) {
+      $line .= $form."/".$disambiguated_lefff{lc($form)}." ";
+    } else {
+      $line .= $form."/".$best_cat." ";
+    }
+  }
+  $line =~ s/ $//;
+  print "$line\n";
+}