diff --git a/.gitignore b/.gitignore index 7145b1409705be21130f5038ce7afe1fe1169e15..8e25f8f853c331029dcb56fd4b97e0b5bb5b9934 100644 --- a/.gitignore +++ b/.gitignore @@ -74,6 +74,9 @@ why3.conf /bin/why3session.opt /bin/why3session.byte /bin/why3session +/bin/why3wc.opt +/bin/why3wc.byte +/bin/why3wc # /doc/ /doc/version.tex @@ -165,6 +168,9 @@ pvsbin/ # /src/session /src/session/xml.ml +# /src/tools +/src/tools/why3wc.ml + # /plugins/tptp/ /plugins/tptp/tptp_lexer.ml /plugins/tptp/tptp_parser.ml diff --git a/Makefile.in b/Makefile.in index 69eba8789101ac804b5395f350cef6ac500001d7..bb170acfcefd596f85c60345226db82e2db8f15f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1299,6 +1299,55 @@ install_no_local:: cp -f lib/why3-cpulimit$(EXE) $(LIBDIR)/why3/why3-cpulimit$(EXE) cp -f lib/why3-call-pvs $(LIBDIR)/why3/why3-call-pvs +######## +# why3wc +######## + +WHY3WCGENERATED = src/tools/why3wc.ml + +WHY3WC_FILES = why3wc +WHY3WCMODULES = $(addprefix src/tools/, $(WHY3WC_FILES)) + +WHY3WCDEP = $(addsuffix .dep, $(WHY3WCMODULES)) +WHY3WCCMO = $(addsuffix .cmo, $(WHY3WCMODULES)) +WHY3WCCMX = $(addsuffix .cmx, $(WHY3WCMODULES)) + +$(WHY3WCDEP): DEPFLAGS += -I src/tools +$(WHY3WCCMO) $(WHY3WCCMX): INCLUDES += -I src/tools + +$(WHY3WCDEP): $(WHY3WCGENERATED) + +byte: bin/why3wc.byte +opt: bin/why3wc.opt + +bin/why3wc.opt: $(WHY3WCCMX) + $(if $(QUIET),@echo 'Linking $@' &&) \ + $(OCAMLOPT) $(OFLAGS) -o $@ $(OLINKFLAGS) $^ + +bin/why3wc.byte: $(WHY3WCCMO) + $(if $(QUIET),@echo 'Linking $@' &&) \ + $(OCAMLC) $(BFLAGS) -o $@ $(BLINKFLAGS) $^ + +bin/why3wc: bin/why3wc.@OCAMLBEST@ + ln -sf why3wc.@OCAMLBEST@ $@ + +ifneq "$(MAKECMDGOALS)" "clean" +include $(WHY3WCDEP) +endif + +depend: $(WHY3WCDEP) + +clean:: + rm -f $(WHY3WCGENERATED) + rm -f src/tools/*.cm[iox] src/tools/*.o + rm -f src/tools/*.annot src/tools/*.dep src/tools/*~ + rm -f bin/why3wc.byte bin/why3wc.opt bin/why3wc + +install_no_local:: + cp -f bin/why3wc.@OCAMLBEST@ $(BINDIR)/why3wc$(EXE) + +install_local: bin/why3wc + ######### # why3doc ######### diff --git a/src/tools/why3wc.mll b/src/tools/why3wc.mll new file mode 100644 index 0000000000000000000000000000000000000000..6aa14cc8b323c29a9fdc28415550c4630fdace7b --- /dev/null +++ b/src/tools/why3wc.mll @@ -0,0 +1,322 @@ + +(** Count spec/code tokens/lines and comment lines in Why3 source files *) + +{ + open Lexing + open Format + + let skip_header = ref true + let tokens = ref false + let factor = ref false (* TODO: what kinds of de Bruijn factors? *) + let files = Queue.create () + + (** counters *) + + type counter = + { mutable spec: int; mutable code: int; mutable comment: int } + + let new_counter () = + { spec = 0; code = 0; comment = 0 } + + let (+=) c1 c2 = + c1.spec <- c1.spec + c2.spec; + c1.code <- c1.code + c2.code; + c1.comment <- c1.comment + c2.comment + + let reset c = + c.spec <- 0; c.code <- 0; c.comment <- 0 + + let current_line = new_counter () + let current_file = new_counter () + let grand_total = new_counter () + + let comment_depth = ref 0 + + let reset_counters () = + reset current_line; reset current_file; comment_depth := 0 + + let update_totals () = + grand_total += current_file + + (** state of the automaton *) + + type state = Nothing | Spec | Code + + let state = ref Nothing + + let add n = if !tokens then n+1 else 1 + + let add_token () = match !state with + | Nothing -> () + | Spec -> current_line.spec <- add current_line.spec + | Code -> current_line.code <- add current_line.code + + let new_line () = + current_file += current_line; reset current_line + + let add_comment n = + current_line.comment <- current_line.comment + n + 1; + if n > 0 then new_line () + + let add_string n = + add_token (); + if n > 0 then new_line () + +} + +(** regexps (mostly from src/parser/lexer.mll) *) + +let space = [' ' '\t' '\r'] +let ident = ['a'-'z' 'A'-'Z' '_' '0'-'9' '\'']+ +let spec = + "type" | "constant" | "function" | "predicate" | + "inductive" | "coinductive" | "axiom" | "lemma" | "goal" | "meta" +let code = + "let" | "val" | "exception" +let annotation = + "requires" | "ensures" | "returns" | "raises" | "reads" | "writes" | + "variant" | "invariant" | "assert" | "assume" | "check" + +let digit = ['0'-'9'] +let hexadigit = ['0'-'9' 'a'-'f' 'A'-'F'] +let exponent = ['e' 'E'] (['-' '+']? digit+) +let pexponent = ['p' 'P'] (['-' '+']? digit+) +let number = + ['0'-'9'] ['0'-'9' '_']* + | '0' ['x' 'X'] (['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*) + | '0' ['o' 'O'] (['0'-'7'] ['0'-'7' '_']*) + | '0' ['b' 'B'] (['0'-'1'] ['0'-'1' '_']*) + | digit+ exponent + | digit+ '.' digit* exponent? + | digit* '.' digit+ exponent? + | '0' ['x' 'X'] hexadigit+ pexponent + | '0' ['x' 'X'] hexadigit+ '.' hexadigit* + | '0' ['x' 'X'] hexadigit* '.' hexadigit+ pexponent? + +let op_char_1 = ['=' '<' '>' '~'] +let op_char_2 = ['+' '-'] +let op_char_3 = ['*' '/' '%'] +let op_char_4 = ['!' '$' '&' '?' '@' '^' '.' ':' '|' '#'] +let op_char_34 = op_char_3 | op_char_4 +let op_char_234 = op_char_2 | op_char_34 +let op_char_1234 = op_char_1 | op_char_234 +let op_char_pref = ['!' '?'] + +let token = + number + | "->" | "<-" | "<->" | "&&" | "||" | "/\\" | "\\/" | "<>" + | op_char_pref op_char_4* + | op_char_1234* op_char_1 op_char_1234* + | op_char_234* op_char_2 op_char_234* + | op_char_34* op_char_3 op_char_34* + | op_char_4+ + +(** lexer entries; the entry point is [scan] *) + +rule scan = parse + | space+ + { scan lexbuf } + | "(*" space* '\n'? + { add_comment (comment 0 lexbuf); scan lexbuf } + | '"' + { add_string (string 0 lexbuf); scan lexbuf } + | '\n' space* '\n' (* at least one blank line *) + { new_line (); + state := Nothing; + scan lexbuf } + | '\n' + { new_line (); + scan lexbuf } + | spec + { if !state = Nothing then state := Spec; + add_token (); scan lexbuf } + | code + { if !state = Nothing then state := Code; + add_token (); scan lexbuf } + | "diverges" (* no curly braces for this annotation *) + { current_line.spec <- add current_line.spec; + scan lexbuf } + | "mutable" (* rather imprecise, but better than nothing *) + { state := Code; scan lexbuf } + | annotation + { let oldst = !state in (* most likely already Code *) + state := Spec; add_token (); start_annotation lexbuf; + state := oldst; scan lexbuf } + | ident + { add_token (); scan lexbuf } + | token (* try to be precise wrt Why3 tokens *) + { add_token (); scan lexbuf } + | _ (* otherwise, the fallback is to count single characters *) + { add_token (); scan lexbuf } + | eof + { } + +(** looks for a left curly brace, then scans the annotation until the + matching right curly brace *) + +and start_annotation = parse + | space+ + { start_annotation lexbuf } + | "(*" + { add_comment (comment 0 lexbuf); start_annotation lexbuf } + | "{" + { scan_annotation lexbuf } + | _ + { add_token () (* no curly brace, we give up *) } + | eof + { } + +(** scans an annotation until we find the matching curly brace *) + +and scan_annotation = parse + | space+ + { scan_annotation lexbuf } + | "{" (* nested curly braces (e.g. records) *) + { scan_annotation lexbuf; scan_annotation lexbuf } + | "}" + { () } + | "(*" space* '\n'? + { add_comment (comment 0 lexbuf); scan_annotation lexbuf } + | '"' + { add_string (string 0 lexbuf); scan_annotation lexbuf } + | ('\n' | space)* '\n' + { new_line (); scan_annotation lexbuf } + | ident + { add_token (); scan_annotation lexbuf } + | token + { add_token (); scan_annotation lexbuf } + | _ + { add_token (); scan_annotation lexbuf } + | eof + { } + +(** [string n] reads a string until its end and returns n + the number + of newlines it contains. *) + +and string n = parse + | '"' { n } + | '\\' ('\\' | 'n' | '"') { string n lexbuf } + | '\n' { string (n + 1) lexbuf } + | _ { string n lexbuf } + | eof { n } + +(** [comment n] reads a comment until its end and returns n + the number + of newlines it contains. *) + +and comment n = parse + | ('\n' | space)* "*)" + { n } + | "(*" { comment (comment n lexbuf) lexbuf } + | '"' { comment (string n lexbuf) lexbuf } + | '\n'+ { comment (n + 1) lexbuf } + | _ { comment n lexbuf } + | eof { n } + +(** [read_header] is used to skip the possible header at + the beggining of files (unless option -a is specified). + It stops whenever it encounters an empty line or any character outside + a comment. In this last case, it correctly resets the lexer position + on that character (decreasing [lex_curr_pos] by 1). *) + +and read_header = parse + | "(*" { skip_header_comment lexbuf; skip_until_nl lexbuf; + read_header lexbuf } + | "\n" { () } + | space+ { read_header lexbuf } + | _ { lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - 1 } + | eof { () } + +and skip_header_comment = parse + | "*)" { () } + | _ { skip_header_comment lexbuf } + | eof { () } + +and skip_until_nl = parse + | '\n' { () } + | _ { skip_until_nl lexbuf } + | eof { () } + +{ + + (** printing results *) + + let legend = + let todo = ref true in + fun () -> if !todo then begin + printf " spec code comments@."; todo := false end + + let print_file ?file c = + legend (); + printf "%8d" c.spec; + (* if not !code_only then *) printf " %8d" c.code; + printf " %8d" c.comment; + (match file with Some f -> printf " %s" f | _ -> ()); + if !factor && c.code > 0 then begin + let f = float c.spec /. float c.code in + printf " (%.2f)" f + end; + printf "@." + + let print_totals () = + print_file ~file:"total" grand_total + + (** processing channels/files *) + + let process_channel ch = + let lb = Lexing.from_channel ch in + reset_counters (); + if !skip_header then read_header lb; + scan lb + + let process_file f = + try + let ch = open_in f in + process_channel ch; + close_in ch; + print_file ~file:f current_file; + update_totals () + with Sys_error s -> + flush stdout; eprintf "why3wc: %s@." s + + (** Command line parsing and entry point *) + + let spec = Arg.align [ + "-t", Arg.Set tokens, + " Count tokens"; + "--tokens", Arg.Set tokens, + " same as -t"; + "-l", Arg.Clear tokens, + " Count lines (default)"; + "--lines", Arg.Clear tokens, + " same as -l"; + "-f", Arg.Set factor, + " Print spec/code ratio"; + "--factor", Arg.Set factor, + " same as -f"; + "-a", Arg.Clear skip_header, + " Count heading comments as well"; + "--do-not-skip-header", Arg.Clear skip_header, + " same as -a"; + ] + + let add_file file = + if not (Sys.file_exists file) then begin + eprintf "%s: no such file@." file; exit 1 + end; + Queue.add file files + + let usage = "why3wc [options] files...\n\ + \n\ + Counts tokens/lines in Why3 source files.\n\ + Assumes source files to be lexically well-formed.\n\ + If no source file is given, standard input is analyzed.\n" + + let () = Arg.parse spec add_file usage + + let () = + let n = Queue.length files in + if n = 0 then begin process_channel stdin; print_file current_file end + else if n = 1 then process_file (Queue.pop files) + else begin Queue.iter process_file files; print_totals () end + +}