diff --git a/src/metagger/Makefile.am b/src/metagger/Makefile.am
index 3f796738f353ab3de31374829b7290da47e4a5e9..7963196eb92e5a0e79f049d907e1df45fbae7805 100644
--- a/src/metagger/Makefile.am
+++ b/src/metagger/Makefile.am
@@ -7,21 +7,27 @@ pkgpython_PYTHON = 	__init__.py		\
 			instance.py		\
 			mytoken.py		\
 			megam_classifier.py	\
-			result_sink.py 
+			result_sink.py 		\
+			utils.py		\
+			corpus_reader.py
 
 CLEANFILES = 		__init__.py		\
 			pos_tagger.py		\
 			instance.py		\
 			mytoken.py		\
 			megam_classifier.py	\
-			result_sink.py
+			result_sink.py 		\
+			utils.py		\
+			corpus_reader.py
 
 BUILT_SOURCES = 	__init__.py		\
 			pos_tagger.py		\
 			instance.py		\
 			mytoken.py		\
 			megam_classifier.py	\
-			result_sink.py
+			result_sink.py 		\
+			utils.py		\
+			corpus_reader.py
 
 EDIT = perl -pe "s|\@alexinadir\@|$(alexinadir)|g;	\
 		s|\@datadir\@|\$(pkgdatadir)|g; 	\
diff --git a/src/metagger/corpus_reader.py b/src/metagger/corpus_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..b354f3340ff06412200f407b85c984e103fee16c
--- /dev/null
+++ b/src/metagger/corpus_reader.py
@@ -0,0 +1,46 @@
+""" Corpus reader """
+
+import sys
+import codecs
+import re
+
+WD_TAG_RE = re.compile(r'^(.+?)/(.+)$')
+
+
+class CorpusReader:
+    pass
+
+
+class BrownReader(CorpusReader):
+
+    """
+    Data reader for corpus in the Brown format:
+
+    Le/DET prix/NC de/P vente/NC du/P+D journal/NC n'/ADV a/V pas/ADV <E9>t<E9>/VPP divulgu<E9>/VPP ./PONCT 
+    Le/DET cabinet/NC Arthur/NPP Andersen/NPP ,/PONCT administrateur/NC judiciaire/ADJ du/P+D titre/NC depuis/P l'/DET effondrement/NC de/P l'/DET empire/NC Maxwell/NPP en/P d<E9>cembre/NC 1991/NC ,/PO
+    NCT a/V indiqu<E9>/VPP que/CS les/DET nouveaux/ADJ propri<E9>taires/NC allaient/V poursuivre/VINF la/DET publication/NC ./PONCT 
+
+    """
+
+    def __init__(self,infile, encoding='latin-1'):
+        self.stream = codecs.open(infile, 'r', encoding)
+        return
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        line = self.stream.readline().strip()
+        if (line == ''):
+            self.stream.seek(0)
+            raise StopIteration
+        token_list = []
+        for item in line.split():
+            wd,tag = WD_TAG_RE.match(item).groups()
+            token_list.append( (wd,tag) )
+        return token_list
+
+
+
+
+
diff --git a/src/metagger/corpus_reader.py.in b/src/metagger/corpus_reader.py.in
new file mode 100644
index 0000000000000000000000000000000000000000..b354f3340ff06412200f407b85c984e103fee16c
--- /dev/null
+++ b/src/metagger/corpus_reader.py.in
@@ -0,0 +1,46 @@
+""" Corpus reader """
+
+import sys
+import codecs
+import re
+
+WD_TAG_RE = re.compile(r'^(.+?)/(.+)$')
+
+
+class CorpusReader:
+    pass
+
+
+class BrownReader(CorpusReader):
+
+    """
+    Data reader for corpus in the Brown format:
+
+    Le/DET prix/NC de/P vente/NC du/P+D journal/NC n'/ADV a/V pas/ADV <E9>t<E9>/VPP divulgu<E9>/VPP ./PONCT 
+    Le/DET cabinet/NC Arthur/NPP Andersen/NPP ,/PONCT administrateur/NC judiciaire/ADJ du/P+D titre/NC depuis/P l'/DET effondrement/NC de/P l'/DET empire/NC Maxwell/NPP en/P d<E9>cembre/NC 1991/NC ,/PO
+    NCT a/V indiqu<E9>/VPP que/CS les/DET nouveaux/ADJ propri<E9>taires/NC allaient/V poursuivre/VINF la/DET publication/NC ./PONCT 
+
+    """
+
+    def __init__(self,infile, encoding='latin-1'):
+        self.stream = codecs.open(infile, 'r', encoding)
+        return
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        line = self.stream.readline().strip()
+        if (line == ''):
+            self.stream.seek(0)
+            raise StopIteration
+        token_list = []
+        for item in line.split():
+            wd,tag = WD_TAG_RE.match(item).groups()
+            token_list.append( (wd,tag) )
+        return token_list
+
+
+
+
+