ErrorReporting.ml 9.01 KB
Newer Older
1
module Make
2
  (I : IncrementalEngine.EVERYTHING)
3 4 5 6 7 8 9 10 11 12 13 14
  (User : sig

    (* In order to submit artificial tokens to the parser, we need a function
       that converts a terminal symbol to a token. Unfortunately, we cannot
       (in general) auto-generate this code, because it requires making up
       semantic values of arbitrary OCaml types. *)

    val terminal2token: _ I.terminal -> I.token

  end)
= struct

15
  open General
16 17 18
  open I
  open User

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
  (* ------------------------------------------------------------------------ *)

  (* Explanations. *)

  type explanation = {
    item: item;
    past: (xsymbol * Lexing.position * Lexing.position) list
  }

  let item explanation =
    explanation.item

  let past explanation =
    explanation.past

  let future explanation =
    let prod, index = explanation.item in
    let rhs = rhs prod in
    drop index rhs

  let goal explanation =
    let prod, _ = explanation.item in
    lhs prod

  (* ------------------------------------------------------------------------ *)

45 46 47 48 49 50 51 52 53 54 55
  (* [items_current env] assumes that [env] is not an initial state (which
     implies that the stack is non-empty). Under this assumption, it extracts
     the automaton's current state, i.e., the LR(1) state found in the top
     stack cell. It then goes through [items] so as to obtain the LR(0) items
     associated with this state. *)

  let items_current env : item list =
    (* Get the current state. *)
    match Lazy.force (stack env) with
    | Nil ->
        (* If we get here, then the stack is empty, which means the parser
POTTIER Francois's avatar
POTTIER Francois committed
56
           is in an initial state. This should not happen. *)
57
        invalid_arg "items_current" (* TEMPORARY it DOES happen! *)
58 59 60 61 62 63 64
    | Cons (Element (current, _, _, _), _) ->
        (* Extract the current state out of the top stack element, and
           convert it to a set of LR(0) items. Returning a set of items
           instead of an ['a lr1state] is convenient; returning [current]
           would require wrapping it in an existential type. *)
        items current

65 66
  (* [is_shift_item t item] determines whether [item] justifies a shift
     transition along the terminal symbol [t]. *)
67

68
  let is_shift_item (t : _ terminal) (prod, index) : bool =
69 70 71 72 73 74 75 76
    let rhs = rhs prod in
    let length = List.length rhs in
    assert (0 < index && index <= length);
    (* We test that there is one symbol after the bullet and this symbol
       is [t] or can generate a word that begins with [t]. (Note that we
       don't need to worry about the case where this symbol is nullable
       and [t] is generated by the following symbol. In that situation,
       we would have to reduce before we can shift [t].) *)
77
    index < length && xfirst (List.nth rhs index) t
78

79 80 81 82 83 84 85 86 87 88
  let compare_explanations x1 x2 =
    let c = compare_items x1.item x2.item in
    (* TEMPORARY checking that if [c] is 0 then the positions are the same *)
    assert (
      c <> 0 || List.for_all2 (fun (_, start1, end1) (_, start2, end2) ->
        start1.Lexing.pos_cnum = start2.Lexing.pos_cnum &&
        end1.Lexing.pos_cnum = end2.Lexing.pos_cnum
      ) x1.past x2.past
    );
    c
89

90
  (* [marry past stack] TEMPORARY comment *)
91 92 93 94 95 96 97 98 99 100

  let rec marry past stack =
    match past, stack with
    | [], _ ->
        []
    | symbol :: past, lazy (Cons (Element (s, _, startp, endp), stack)) ->
        assert (compare_symbols symbol (X (incoming_symbol s)) = 0);
        (symbol, startp, endp) :: marry past stack
    | _ :: _, lazy Nil ->
        assert false
101

102
  (* [investigate t checkpoint] assumes that [checkpoint] has been obtained by
103 104 105
     offering the terminal symbol [t] to the parser. It runs the parser,
     through an arbitrary number of reductions, until the parser either
     accepts this token (i.e., shifts) or rejects it (i.e., signals an
106 107
     error). If the parser decides to shift, then the shift items found
     in the LR(1) state before the shift are used to produce new explanations. *)
108 109 110 111

  (* It is desirable that the semantic actions be side-effect free, or
     that their side-effects be harmless (replayable). *)

112 113
  let rec investigate (t : _ terminal) (checkpoint : _ checkpoint) explanations =
    match checkpoint with
114 115 116 117 118 119
    | Shifting (env, _, _) ->
        (* The parser is about to shift, which means it is willing to
           consume the terminal symbol [t]. In the state before the
           transition, look at the items that justify shifting [t].
           We view these items as explanations: they explain what
           we have read and what we expect to read. *)
120
        let stack = stack env in
121
        List.fold_left (fun explanations item ->
122 123 124 125 126
          if is_shift_item t item then
            let prod, index = item in
            let rhs = rhs prod in
            {
              item = item;
127
              past = List.rev (marry (List.rev (take index rhs)) stack)
128 129 130
            } :: explanations
          else
            explanations
131
        ) explanations (items_current env)
132
          (* TEMPORARY [env] may be an initial state! violating [item_current]'s precondition *)
133
    | AboutToReduce _ ->
134
        (* The parser wishes to reduce. Just follow. *)
135
        investigate t (resume checkpoint) explanations
136 137 138 139 140 141 142 143 144 145 146 147 148
    | HandlingError _ ->
        (* The parser fails, which means the terminal symbol [t] does
           not make sense at this point. Thus, no new explanations of
           what the parser expects need be produced. *)
        explanations
    | InputNeeded _
    | Accepted _
    | Rejected ->
        (* None of these cases can arise. Indeed, after a token is submitted
           to it, the parser must shift, reduce, or signal an error, before
           it can request another token or terminate. *)
        assert false

149
  (* [investigate pos checkpoint] assumes that [checkpoint] is of the form
150 151 152 153 154 155
     [InputNeeded _].  For every terminal symbol [t], it investigates
     how the parser reacts when fed the symbol [t], and returns a list
     of explanations. The position [pos] is where a syntax error was
     detected; it is used when manufacturing dummy tokens. This is
     important because the position of the dummy token may end up in
     the explanations that we produce. *)
156

157
  let investigate pos (checkpoint : _ checkpoint) : explanation list =
158 159 160 161 162 163 164 165
    weed compare_explanations (
      foreach_terminal_but_error (fun symbol explanations ->
        match symbol with
        | X (N _) -> assert false
        | X (T t) ->
            (* Build a dummy token for the terminal symbol [t]. *)
            let token = (terminal2token t, pos, pos) in
            (* Submit it to the parser. Accumulate explanations. *)
166
            investigate t (offer checkpoint token) explanations
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
      ) []
    )

  (* TEMPORARY copied from engine.ml; move/share with [Convert] *)

  type reader =
    unit -> token * Lexing.position * Lexing.position

  let wrap (lexer : Lexing.lexbuf -> token) (lexbuf : Lexing.lexbuf) : reader =
    fun () ->
      let token = lexer lexbuf in
      let startp = lexbuf.Lexing.lex_start_p
      and endp = lexbuf.Lexing.lex_curr_p in
      token, startp, endp

182
  (* The following is a custom version of the loop found in [Engine]. It
183 184 185
     drives the parser in the usual way, but records the last [InputNeeded]
     checkpoint. If a syntax error is detected, it goes back to this
     checkpoint and analyzes it in order to produce a meaningful
186
     diagnostic. *)
187

188
  exception Error of (Lexing.position * Lexing.position) * explanation list
189 190 191

  (* TEMPORARY why loop-style? we should offer a simplified incremental API *)

192 193 194
  type 'a checkpoint = {
    inputneeded: 'a I.checkpoint;
    current: 'a I.checkpoint
195 196
  }

197
  let rec loop (read : reader) ({ inputneeded; current } : 'a checkpoint) : 'a =
198
    match current with
199
    | InputNeeded _ ->
200 201
        (* Update the last recorded [InputNeeded] checkpoint. *)
        let inputneeded = current in
202 203
        let triple = read() in
        let current = offer current triple in
204
        loop read { inputneeded; current }
205 206 207
    | Shifting _
    | AboutToReduce _ ->
        let current = resume current in
208
        loop read { inputneeded; current }
209 210 211 212 213
    | HandlingError env ->
        (* The parser signals a syntax error. Note the position of the
           problematic token, which is useful. Then, go back to the
           checkpoint and investigate. *)
        let (startp, _) as positions = positions env in
214
        raise (Error (positions, investigate startp inputneeded))
215 216 217 218 219 220 221
    | Accepted v ->
        v
    | Rejected ->
        (* The parser rejects this input. This cannot happen, because
           we stop as soon as the parser reports [HandlingError]. *)
        assert false

222
  let entry (start : 'a I.checkpoint) lexer lexbuf =
223 224 225 226 227
    (* The parser cannot accept or reject before it asks for the very first
       character of input. (Indeed, we statically reject a symbol that
       generates the empty language or the singleton language {epsilon}.)
       So, [start] must be [InputNeeded _]. *)
    assert (match start with InputNeeded _ -> true | _ -> false);
228
    loop (wrap lexer lexbuf) { inputneeded = start; current = start }
229

POTTIER Francois's avatar
POTTIER Francois committed
230 231 232
  (* TEMPORARY could also publish a list of the terminal symbols that
     do not cause an error *)

233
end