...
 
Commits (3)
......@@ -22,8 +22,6 @@
../ZEN/minimap.cmi
../ZEN/minimap.cmx : ../ZEN/share.cmx ../ZEN/lexmap.cmx ../ZEN/deco.cmx \
../ZEN/minimap.cmi
../ZEN/zen_lexer.cmo :
../ZEN/zen_lexer.cmx :
control.cmo :
control.cmx :
version.cmo :
......@@ -32,8 +30,10 @@ date.cmo : version.cmo
date.cmx : version.cmx
canon.cmo : ../ZEN/word.cmo
canon.cmx : ../ZEN/word.cmx
transduction.cmo : ../ZEN/zen_lexer.cmo
transduction.cmx : ../ZEN/zen_lexer.cmx
min_lexer.cmo :
min_lexer.cmx :
transduction.cmo : ../ZEN/zen_lexer.cmo min_lexer.cmo
transduction.cmx : ../ZEN/zen_lexer.cmx min_lexer.cmx
encode.cmo : ../ZEN/word.cmo transduction.cmo phonetics.cmo canon.cmo
encode.cmx : ../ZEN/word.cmx transduction.cmx phonetics.cmx canon.cmx
order.cmo :
......@@ -61,8 +61,7 @@ index.cmx : web.cmx ../ZEN/trie.cmx ../ZEN/list2.cmx ../ZEN/gen.cmx \
phonetics.cmo : ../ZEN/list2.cmo canon.cmo
phonetics.cmx : ../ZEN/list2.cmx canon.cmx
int_sandhi.cmo : ../ZEN/word.cmo phonetics.cmo encode.cmo canon.cmo
int_sandhi.cmx : ../ZEN/word.cmx ../ZEN/zen_lexer.cmx phonetics.cmx encode.cmx \
canon.cmx
int_sandhi.cmx : ../ZEN/word.cmx phonetics.cmx encode.cmx canon.cmx
skt_morph.cmi :
morphology.cmi : ../ZEN/word.cmo skt_morph.cmi ../ZEN/lexmap.cmo
naming.cmo : ../ZEN/word.cmo web.cmo skt_morph.cmi ../ZEN/gen.cmo encode.cmo \
......@@ -70,12 +69,11 @@ naming.cmo : ../ZEN/word.cmo web.cmo skt_morph.cmi ../ZEN/gen.cmo encode.cmo \
naming.cmx : ../ZEN/word.cmx web.cmx skt_morph.cmi ../ZEN/gen.cmx encode.cmx \
../ZEN/deco.cmx
inflected.cmi : ../ZEN/word.cmo skt_morph.cmi naming.cmo morphology.cmi \
int_sandhi.cmi ../ZEN/deco.cmx ../ZEN/zen_lexer.cmx
../ZEN/deco.cmo
inflected.cmo : ../ZEN/word.cmo skt_morph.cmi phonetics.cmo naming.cmo \
morphology.cmi ../ZEN/lexmap.cmo encode.cmo ../ZEN/deco.cmo inflected.cmi
inflected.cmx : ../ZEN/word.cmx skt_morph.cmi phonetics.cmx naming.cmx \
morphology.cmi ../ZEN/lexmap.cmx encode.cmx ../ZEN/deco.cmx inflected.cmi \
../ZEN/zen_lexer.cmx
morphology.cmi ../ZEN/lexmap.cmx encode.cmx ../ZEN/deco.cmx inflected.cmi
sandhi.cmo : ../ZEN/word.cmo phonetics.cmo ../ZEN/list2.cmo encode.cmo \
canon.cmo
sandhi.cmx : ../ZEN/word.cmx phonetics.cmx ../ZEN/list2.cmx encode.cmx \
......
This diff is collapsed.
......@@ -4,7 +4,7 @@
(* *)
(* Gérard Huet *)
(* *)
(* ©2017 Institut National de Recherche en Informatique et en Automatique *)
(* ©2018 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* A simple lexer recognizing idents formed from ASCII letters and integers
......
(**************************************************************************)
(* *)
(* The Zen Computational Linguistics Toolkit *)
(* *)
(* Grard Huet *)
(* *)
(* 2007 Institut National de Recherche en Informatique et en Automatique *)
(**************************************************************************)
(* A very simple lexer recognizing 1 character idents and integers
and skipping spaces and comments between [%] and eol;
used for various transduction tasks with Camlp4 Grammars.
It is a copy of ZEN/zen_lexer.ml in order to simplify dependencies. *)
(*i module Min_lexer = struct i*)
open Camlp4.PreCast;
open Format;
module Loc = Loc; (* Using the PreCast Loc *)
module Error = Camlp4.Struct.EmptyError; (* Dummy Error module *)
module Token = struct
module Loc = Loc
;
type t =
[ KEYWORD of string
| LETTER of string
| INT of int
| EOI
]
;
module Error = Error
;
module Filter = struct
type token_filter = Camlp4.Sig.stream_filter t Loc.t
;
type t = string -> bool
;
value mk is_kwd = is_kwd
;
value rec filter is_kwd = parser
[ [: `((KEYWORD s, loc) as p); strm :] ->
if is_kwd s then [: `p; filter is_kwd strm :]
else failwith ("Undefined token: " ^ s)
| [: `x; s :] -> [: `x; filter is_kwd s :]
| [: :] -> [: :]
]
;
value define_filter _ _ = ()
;
value keyword_added _ _ _ = ()
;
value keyword_removed _ _ = ()
;
end
;
value to_string = fun
[ KEYWORD s -> sprintf "KEYWORD %S" s
| LETTER s -> sprintf "LETTER %S" s
| INT i -> sprintf "INT %d" i
| EOI -> "EOI"
]
;
value print ppf x = pp_print_string ppf (to_string x)
;
value match_keyword kwd = fun
[ KEYWORD kwd' when kwd' = kwd -> True
| _ -> False
]
;
value extract_string = fun
[ INT i -> string_of_int i
| LETTER s | KEYWORD s -> s
| EOI -> ""
]
;
end
;
open Token
;
(* The string buffering machinery. *) (*i ddr + np i*)
value store buf c = do { Buffer.add_char buf c; buf }
;
value rec number buf =
parser
[ [: `('0'..'9' as c); s :] -> number (store buf c) s
| [: :] -> Buffer.contents buf
]
;
value rec skip_to_eol =
parser
[ [: `'\n' | '\026' | '\012'; s :] -> ()
| [: `c ; s :] -> skip_to_eol s
]
;
value next_token_fun =
let rec next_token =
parser _bp
[ [: `'%' ; _ = skip_to_eol; s :] -> next_token s
| [: `('a'..'z' | 'A'..'Z' | '\192'..'\246' | '\248'..'\255' (* [| '_'] *)
as c) :] -> LETTER (String.make 1 c)
| [: `('0'..'9' as c); s = number (store (Buffer.create 80) c) :] ->
INT (int_of_string s)
| [: `c :] _ep -> KEYWORD (String.make 1 c)
] in
let rec next_token_loc =
parser bp
[ [: `' ' | '\n' | '\r' | '\t' | '\026' | '\012'; s :] -> next_token_loc s
| [: tok = next_token :] ep -> (tok, (bp, ep))
| [: _ = Stream.empty :] -> (EOI, (bp, succ bp))
] in
next_token_loc
;
value mk () =
fun init_loc cstrm -> Stream.from
(fun _ ->
let (tok, (bp, ep)) = next_token_fun cstrm in
let loc = Loc.move `start bp (Loc.move `stop ep init_loc) in
Some (tok, loc))
;
(*i end; i*)
......@@ -11,9 +11,9 @@
open Camlp4.PreCast; (* MakeGram Loc *)
module Gram = MakeGram Zen_lexer
module Gram = MakeGram Min_lexer
;
open Zen_lexer.Token
open Min_lexer.Token
;
value transducer trad t =
try Gram.parse_string trad Loc.ghost t with
......
......@@ -396,10 +396,9 @@ XML_MORPHOLOGY=$(SL_XML_MORPHOLOGY) $(WX_XML_MORPHOLOGY) $(XML_DTD)
all:
test -e $(ZEN) && echo "Zen toolkit detected" && exit 0 \
|| echo "You should first install the Zen library at $(ZEN)" && exit 1
cd $(ZEN); make
cd $(ZEN) && $(MAKE)
test -e $(DATA) && echo "Heritage_Resources detected" && exit 0 \
|| echo "You should first install Heritage_Resources at $(DATA)" && exit 1
cd $(ZEN) && $(MAKE) all
ln -sf $(ZEN) ZEN
cp -Rp $(HERITAGE) . # local copy of DICO from Resources/DICO
cp -Rp $(MW) . # local copy of MW
......