ErrorReporting.ml 6.59 KB
Newer Older
1
module Make
2
  (I : IncrementalEngine.EVERYTHING)
3 4 5 6 7 8 9 10 11 12 13 14
  (User : sig

    (* In order to submit artificial tokens to the parser, we need a function
       that converts a terminal symbol to a token. Unfortunately, we cannot
       (in general) auto-generate this code, because it requires making up
       semantic values of arbitrary OCaml types. *)

    val terminal2token: _ I.terminal -> I.token

  end)
= struct

15
  open General
16 17 18
  open I
  open User

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
  (* ------------------------------------------------------------------------ *)

  (* Explanations. *)

  type explanation = {
    item: item;
    past: (xsymbol * Lexing.position * Lexing.position) list
  }

  let item explanation =
    explanation.item

  let past explanation =
    explanation.past

  let future explanation =
    let prod, index = explanation.item in
    let rhs = rhs prod in
    drop index rhs

  let goal explanation =
    let prod, _ = explanation.item in
    lhs prod

  (* ------------------------------------------------------------------------ *)

45 46 47 48 49 50 51 52 53 54 55
  (* [items_current env] assumes that [env] is not an initial state (which
     implies that the stack is non-empty). Under this assumption, it extracts
     the automaton's current state, i.e., the LR(1) state found in the top
     stack cell. It then goes through [items] so as to obtain the LR(0) items
     associated with this state. *)

  let items_current env : item list =
    (* Get the current state. *)
    match Lazy.force (stack env) with
    | Nil ->
        (* If we get here, then the stack is empty, which means the parser
POTTIER Francois's avatar
POTTIER Francois committed
56
           is in an initial state. This should not happen. *)
57
        invalid_arg "items_current" (* TEMPORARY it DOES happen! *)
58 59 60 61 62 63 64
    | Cons (Element (current, _, _, _), _) ->
        (* Extract the current state out of the top stack element, and
           convert it to a set of LR(0) items. Returning a set of items
           instead of an ['a lr1state] is convenient; returning [current]
           would require wrapping it in an existential type. *)
        items current

65 66
  (* [is_shift_item t item] determines whether [item] justifies a shift
     transition along the terminal symbol [t]. *)
67

68
  let is_shift_item (t : _ terminal) (prod, index) : bool =
69 70 71 72 73 74 75 76
    let rhs = rhs prod in
    let length = List.length rhs in
    assert (0 < index && index <= length);
    (* We test that there is one symbol after the bullet and this symbol
       is [t] or can generate a word that begins with [t]. (Note that we
       don't need to worry about the case where this symbol is nullable
       and [t] is generated by the following symbol. In that situation,
       we would have to reduce before we can shift [t].) *)
77
    index < length && xfirst (List.nth rhs index) t
78

79 80 81 82 83 84 85 86 87 88
  let compare_explanations x1 x2 =
    let c = compare_items x1.item x2.item in
    (* TEMPORARY checking that if [c] is 0 then the positions are the same *)
    assert (
      c <> 0 || List.for_all2 (fun (_, start1, end1) (_, start2, end2) ->
        start1.Lexing.pos_cnum = start2.Lexing.pos_cnum &&
        end1.Lexing.pos_cnum = end2.Lexing.pos_cnum
      ) x1.past x2.past
    );
    c
89

90
  (* [marry past stack] TEMPORARY comment *)
91 92 93 94 95 96 97 98 99 100

  let rec marry past stack =
    match past, stack with
    | [], _ ->
        []
    | symbol :: past, lazy (Cons (Element (s, _, startp, endp), stack)) ->
        assert (compare_symbols symbol (X (incoming_symbol s)) = 0);
        (symbol, startp, endp) :: marry past stack
    | _ :: _, lazy Nil ->
        assert false
101

POTTIER Francois's avatar
POTTIER Francois committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
  (* [accumulate t env explanations] is called if the parser decides to shift
     the test token [t]. The parameter [env] describes the parser configuration
     before it shifts this token. (Some reductions have taken place.) We use the
     shift items found in [env] to produce new explanations. *)

  let accumulate (t : _ terminal) env explanations =
    (* The parser is about to shift, which means it is willing to
       consume the terminal symbol [t]. In the state before the
       transition, look at the items that justify shifting [t].
       We view these items as explanations: they explain what
       we have read and what we expect to read. *)
    let stack = stack env in
    List.fold_left (fun explanations item ->
      if is_shift_item t item then
        let prod, index = item in
        let rhs = rhs prod in
        {
          item = item;
          past = List.rev (marry (List.rev (take index rhs)) stack)
        } :: explanations
      else
123
        explanations
POTTIER Francois's avatar
POTTIER Francois committed
124 125 126
    ) explanations (items_current env)
      (* TEMPORARY [env] may be an initial state!
         violating [item_current]'s precondition *)
127

128
  (* [investigate pos checkpoint] assumes that [checkpoint] is of the form
129 130 131 132 133 134
     [InputNeeded _].  For every terminal symbol [t], it investigates
     how the parser reacts when fed the symbol [t], and returns a list
     of explanations. The position [pos] is where a syntax error was
     detected; it is used when manufacturing dummy tokens. This is
     important because the position of the dummy token may end up in
     the explanations that we produce. *)
135

136
  let investigate pos (checkpoint : _ checkpoint) : explanation list =
137 138 139 140 141 142 143 144
    weed compare_explanations (
      foreach_terminal_but_error (fun symbol explanations ->
        match symbol with
        | X (N _) -> assert false
        | X (T t) ->
            (* Build a dummy token for the terminal symbol [t]. *)
            let token = (terminal2token t, pos, pos) in
            (* Submit it to the parser. Accumulate explanations. *)
POTTIER Francois's avatar
POTTIER Francois committed
145 146
            let checkpoint = offer checkpoint token in
            I.loop_test (accumulate t) checkpoint explanations
147 148 149
      ) []
    )

POTTIER Francois's avatar
POTTIER Francois committed
150 151 152
  (* We drive the parser in the usual way, but records the last [InputNeeded]
     checkpoint. If a syntax error is detected, we go back to this checkpoint
     and analyze it in order to produce a meaningful diagnostic. *)
153

154
  exception Error of (Lexing.position * Lexing.position) * explanation list
155

156
  let entry (start : 'a I.checkpoint) lexer lexbuf =
POTTIER Francois's avatar
POTTIER Francois committed
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
    let fail (inputneeded : 'a I.checkpoint) (checkpoint : 'a I.checkpoint) =
      (* The parser signals a syntax error. Note the position of the
         problematic token, which is useful. Then, go back to the
         last [InputNeeded] checkpoint and investigate. *)
      match checkpoint with
      | HandlingError env ->
          let (startp, _) as positions = positions env in
          raise (Error (positions, investigate startp inputneeded))
      | _ ->
          assert false
    in
    I.loop_handle_undo
      (fun v -> v)
      fail
      (lexer_lexbuf_to_supplier lexer lexbuf)
      start
173

POTTIER Francois's avatar
POTTIER Francois committed
174 175 176
  (* TEMPORARY could also publish a list of the terminal symbols that
     do not cause an error *)

177
end