invariant.ml 25.5 KB
 POTTIER Francois committed Mar 24, 2017 1 2 3 4 5 6 7 8 9 10 11 12 13 ``````(******************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier, Inria Paris *) (* Yann Régis-Gianas, PPS, Université Paris Diderot *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under the *) (* terms of the GNU General Public License version 2, as described in the *) (* file LICENSE. *) (* *) (******************************************************************************) `````` fpottier committed Mar 01, 2013 14 15 16 17 `````` (* This module discovers information about the shape and content of the stack in each of the automaton's states. *) open Grammar `````` fpottier committed Mar 02, 2013 18 ``````module C = Conflict (* artificial dependency; ensures that [Conflict] runs first *) `````` fpottier committed Mar 01, 2013 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 `````` (* ------------------------------------------------------------------------ *) (* Compute a lower bound on the height of the stack at every state. At the same time, compute which symbols are held in this stack prefix. *) (* In order to compute (a lower bound on) the height of the stack at a state [s], we examine the LR(0) items that compose [s]. For each item, if the bullet is at position [pos], then we can be assured that the height of the stack is at least [pos]. Thus, we compute the maximum of [pos] over all items (of which there is at least one). *) (* The set of items that we use is not closed, but this does not matter; the items that would be added by the closure would not add any information regarding the height of the stack, since the bullet is at position 0 in these items. *) (* Instead of computing just the stack height, we compute, in the same manner, which symbols are on the stack at a state [s]. This is an array of symbols whose length is the height of the stack at [s]. By convention, the top of the stack is the end of the array. *) (* We first compute and tabulate this information at the level of the LR(0) automaton. *) let stack_symbols : Lr0.node -> Symbol.t array = let dummy = `````` POTTIER Francois committed Dec 11, 2014 45 `````` Array.make 0 (Symbol.T Terminal.sharp) `````` fpottier committed Mar 01, 2013 46 47 48 `````` in Misc.tabulate Lr0.n (fun node -> Item.Set.fold (fun item accu -> `````` POTTIER Francois committed Dec 04, 2014 49 `````` let _prod, _nt, rhs, pos, _length = Item.def item in `````` fpottier committed Mar 01, 2013 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 `````` if pos > Array.length accu then Array.sub rhs 0 pos else accu ) (Lr0.items node) dummy ) (* Then, it is easy to extend it to the LR(1) automaton. *) let stack_symbols (node : Lr1.node) : Symbol.t array = stack_symbols (Lr0.core (Lr1.state node)) let stack_height (node : Lr1.node) : int = Array.length (stack_symbols node) (* ------------------------------------------------------------------------ *) (* Above, we have computed a prefix of the stack at every state. We have computed the length of this prefix and the symbols that are held in this prefix of the stack. Now, compute which states may be held in this prefix. *) (* In order to compute this information, we perform an analysis of the automaton, via a least fixed fixed point computation. *) (* It is worth noting that it would be possible to use an analysis based on a least fixed point computation to discover at the same time the length of the stack prefix, the symbols that it contains, and the states that it may contain. This alternate approach, which was used until 2012/08/25, would lead us to discovering a richer invariant, that is, potentially longer prefixes. This extra information, however, was useless; computing it was a waste of time. Hence, as of 2012/08/25, the height of the stack prefix and the symbols that it contains are predicted (see above), and the least fixed computation is used only to populate these prefixes of predictable length with state information. *) (* By the way, this least fixed point analysis remains the most costly computation throughout this module. *) (* Vectors of sets of states. *) module StateVector = struct type property = Lr1.NodeSet.t list let empty = [] let rec equal v1 v2 = match v1, v2 with | [], [] -> true | states1 :: v1, states2 :: v2 -> Lr1.NodeSet.equal states1 states2 && equal v1 v2 | _, _ -> (* Because all heights are known ahead of time, we are able `````` POTTIER Francois committed Apr 28, 2016 104 `````` to (and careful to) compare only vectors of equal length. *) `````` fpottier committed Mar 01, 2013 105 106 107 108 109 `````` assert false let rec join v1 v2 = match v1, v2 with | [], [] -> `````` POTTIER Francois committed Apr 28, 2016 110 `````` [] `````` fpottier committed Mar 01, 2013 111 `````` | states1 :: v1, states2 :: v2 -> `````` POTTIER Francois committed Apr 28, 2016 112 113 `````` Lr1.NodeSet.union states1 states2 :: join v1 v2 `````` fpottier committed Mar 01, 2013 114 115 `````` | _, _ -> (* Because all heights are known ahead of time, we are able `````` POTTIER Francois committed Apr 28, 2016 116 117 `````` to (and careful to) compare only vectors of equal length. *) assert false `````` fpottier committed Mar 01, 2013 118 119 120 121 `````` let push v x = x :: v `````` POTTIER Francois committed Jan 18, 2015 122 123 `````` let truncate = MenhirLib.General.take `````` fpottier committed Mar 01, 2013 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 `````` end (* In order to perform the fixed point computation, we must extend our type of vectors with a bottom element. This element will not appear in the least fixed point, provided every state of the automaton is reachable. *) module StateLattice = struct type property = | Bottom | NonBottom of StateVector.property let bottom = Bottom let empty = NonBottom StateVector.empty let equal v1 v2 = match v1, v2 with | Bottom, Bottom -> true | NonBottom v1, NonBottom v2 -> StateVector.equal v1 v2 | _, _ -> false let join v1 v2 = match v1, v2 with | Bottom, v | v, Bottom -> v | NonBottom v1, NonBottom v2 -> NonBottom (StateVector.join v1 v2) let push v x = match v with | Bottom -> `````` POTTIER Francois committed Apr 29, 2016 163 `````` Bottom `````` fpottier committed Mar 01, 2013 164 165 166 167 168 169 170 171 `````` | NonBottom v -> NonBottom (StateVector.push v x) let truncate h v = match v with | Bottom -> Bottom | NonBottom v -> `````` POTTIER Francois committed Apr 28, 2016 172 `````` NonBottom (StateVector.truncate h v) `````` fpottier committed Mar 01, 2013 173 174 175 176 177 178 179 180 181 182 183 184 185 `````` let is_maximal _ = false end open StateLattice (* Define the fixed point. *) let stack_states : Lr1.node -> property = let module F = `````` POTTIER Francois committed Jul 02, 2015 186 187 188 `````` Fix.Make (Maps.PersistentMapsToImperativeMaps(Lr1.NodeMap)) (StateLattice) `````` fpottier committed Mar 01, 2013 189 190 191 192 193 194 195 196 197 198 `````` in F.lfp (fun node (get : Lr1.node -> property) -> (* We use the fact that a state has incoming transitions if and only if it is not a start state. *) match Lr1.incoming_symbol node with | None -> `````` POTTIER Francois committed Apr 28, 2016 199 `````` assert (Lr1.predecessors node = []); `````` fpottier committed Mar 01, 2013 200 201 `````` assert (stack_height node = 0); `````` POTTIER Francois committed Apr 28, 2016 202 203 `````` (* If [node] is a start state, then the stack at [node] may be (in fact, must be) the empty stack. *) `````` fpottier committed Mar 01, 2013 204 `````` `````` POTTIER Francois committed Apr 28, 2016 205 `````` empty `````` fpottier committed Mar 01, 2013 206 `````` `````` POTTIER Francois committed Dec 04, 2014 207 `````` | Some _symbol -> `````` fpottier committed Mar 01, 2013 208 `````` `````` POTTIER Francois committed Apr 28, 2016 209 210 211 212 213 214 `````` (* If [node] is not a start state, then include the contribution of every incoming transition. We compute a join over all predecessors. The contribution of one predecessor is the abstract value found at this predecessor, extended with a new cell for this transition, and truncated to the stack height at [node], so as to avoid obtaining a vector that is longer than expected/necessary. *) `````` fpottier committed Mar 01, 2013 215 `````` `````` POTTIER Francois committed Apr 28, 2016 216 `````` let height = stack_height node in `````` fpottier committed Mar 01, 2013 217 `````` `````` POTTIER Francois committed Apr 28, 2016 218 219 220 221 222 223 `````` List.fold_left (fun v predecessor -> join v (truncate height (push (get predecessor) (Lr1.NodeSet.singleton predecessor)) ) ) bottom (Lr1.predecessors node) `````` fpottier committed Mar 01, 2013 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 `````` ) (* If every state is reachable, then the least fixed point must be non-bottom everywhere, so we may view it as a function that produces a vector of sets of states. *) let stack_states (node : Lr1.node) : StateVector.property = match stack_states node with | Bottom -> (* apparently this node is unreachable *) assert false | NonBottom v -> v (* ------------------------------------------------------------------------ *) (* From the above information, deduce, for each production, the states that may appear in the stack when this production is reduced. *) (* We are careful to produce a vector of states whose length is exactly that of the production [prod]. *) let production_states : Production.index -> StateLattice.property = Production.tabulate (fun prod -> `````` POTTIER Francois committed Apr 06, 2017 248 `````` let nodes = Lr1.production_where prod in `````` fpottier committed Mar 01, 2013 249 250 251 `````` let height = Production.length prod in Lr1.NodeSet.fold (fun node accu -> join accu `````` POTTIER Francois committed Apr 29, 2016 252 `````` (truncate height `````` POTTIER Francois committed Apr 28, 2016 253 254 `````` (NonBottom (stack_states node)) ) `````` fpottier committed Mar 01, 2013 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 `````` ) nodes bottom ) (* ------------------------------------------------------------------------ *) (* We now determine which states must be represented, that is, explicitly pushed onto the stack. For simplicity, a state is either always represented or never represented. More fine-grained strategies, where a single state is sometimes pushed onto the stack and sometimes not pushed, depending on which outgoing transition is being taken, are conceivable, but quite tricky, and probably not worth the trouble. (1) If two states are liable to appear within a single stack cell, then one is represented if and only if the other is represented. This ensures that the structure of stacks is known everywhere and that we can propose types for stacks. (2) If a state [s] has an outgoing transition along nonterminal symbol [nt], and if the [goto] table for symbol [nt] has more than one target, then state [s] is represented. (3) If a stack cell contains more than one state and if at least one of these states is able to handle the [error] token, then these states are represented. (4) If the semantic action associated with a production mentions the [\$syntaxerror] keyword, then the state that is being reduced to (that is, the state that initiated the recognition of this production) is represented. (Indeed, it will be passed as an argument to [errorcase].) *) (* Data. *) let rep : bool UnionFind.point array = Array.init Lr1.n (fun _ -> UnionFind.fresh false) (* Getter. *) let represented state = rep.(Lr1.number state) (* Setters. *) let represent state = `````` POTTIER Francois committed Nov 30, 2017 299 `````` UnionFind.set (represented state) true `````` fpottier committed Mar 01, 2013 300 301 302 303 304 305 306 307 308 309 `````` let represents states = represent (Lr1.NodeSet.choose states) (* Enforce condition (1) above. *) let share (v : StateVector.property) = List.iter (fun states -> let dummy = UnionFind.fresh false in Lr1.NodeSet.iter (fun state -> `````` POTTIER Francois committed Nov 30, 2017 310 `````` UnionFind.union dummy (represented state) `````` fpottier committed Mar 01, 2013 311 312 313 314 315 316 317 318 319 320 `````` ) states ) v let () = Lr1.iter (fun node -> share (stack_states node) ); Production.iter (fun prod -> match production_states prod with | Bottom -> `````` POTTIER Francois committed Apr 28, 2016 321 `````` () `````` fpottier committed Mar 01, 2013 322 `````` | NonBottom v -> `````` POTTIER Francois committed Apr 28, 2016 323 `````` share v `````` fpottier committed Mar 01, 2013 324 325 326 327 328 329 `````` ) (* Enforce condition (2) above. *) let () = Nonterminal.iter (fun nt -> `````` POTTIER Francois committed Apr 29, 2016 330 `````` let count = `````` fpottier committed Mar 01, 2013 331 `````` Lr1.targets (fun count _ _ -> `````` POTTIER Francois committed Apr 28, 2016 332 `````` count + 1 `````` fpottier committed Mar 01, 2013 333 334 335 336 `````` ) 0 (Symbol.N nt) in if count > 1 then Lr1.targets (fun () sources _ -> `````` POTTIER Francois committed Apr 28, 2016 337 `````` List.iter represent sources `````` fpottier committed Mar 01, 2013 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 `````` ) () (Symbol.N nt) ) (* Enforce condition (3) above. *) let handler state = try let _ = SymbolMap.find (Symbol.T Terminal.error) (Lr1.transitions state) in true with Not_found -> try let _ = TerminalMap.lookup Terminal.error (Lr1.reductions state) in true with Not_found -> false let handlers states = Lr1.NodeSet.exists handler states let () = Lr1.iter (fun node -> let v = stack_states node in List.iter (fun states -> if Lr1.NodeSet.cardinal states >= 2 && handlers states then `````` POTTIER Francois committed Apr 28, 2016 362 `````` represents states `````` fpottier committed Mar 01, 2013 363 364 365 366 367 368 369 370 371 372 `````` ) v ) (* Enforce condition (4) above. *) let () = Production.iterx (fun prod -> if Action.has_syntaxerror (Production.action prod) then match production_states prod with | Bottom -> `````` POTTIER Francois committed Apr 28, 2016 373 `````` () `````` fpottier committed Mar 01, 2013 374 `````` | NonBottom v -> `````` POTTIER Francois committed Apr 06, 2017 375 `````` let sites = Lr1.production_where prod in `````` POTTIER Francois committed Apr 28, 2016 376 377 378 379 380 381 `````` let length = Production.length prod in if length = 0 then Lr1.NodeSet.iter represent sites else let states = List.nth v (length - 1) in represents states `````` fpottier committed Mar 01, 2013 382 383 384 385 386 `````` ) (* Define accessors. *) let represented state = `````` POTTIER Francois committed Nov 30, 2017 387 `````` UnionFind.get (represented state) `````` fpottier committed Mar 01, 2013 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 `````` let representeds states = if Lr1.NodeSet.is_empty states then assert false else represented (Lr1.NodeSet.choose states) (* Statistics. *) let () = Error.logC 1 (fun f -> let count = Lr1.fold (fun count node -> if represented node then count + 1 else count ) 0 in Printf.fprintf f "%d out of %d states are represented.\n" count Lr1.n ) (* ------------------------------------------------------------------------ *) (* Accessors for information about the stack. *) (* We describe a stack prefix as a list of cells, where each cell is a pair of a symbol and a set of states. The top of the stack is the head of the list. *) type cell = Symbol.t * Lr1.NodeSet.t type word = cell list (* This auxiliary function converts a stack-as-an-array (top of stack at the right end) to a stack-as-a-list (top of stack at list head). *) let convert a = let n = Array.length a in let rec loop i accu = if i = n then accu else loop (i + 1) (a.(i) :: accu) in loop 0 [] (* [stack s] describes the stack when the automaton is in state [s]. *) let stack node : word = List.combine (convert (stack_symbols node)) (stack_states node) (* [prodstack prod] describes the stack when production [prod] is about to be reduced. *) let prodstack prod : word = match production_states prod with | Bottom -> (* This production is never reduced. *) assert false | NonBottom v -> List.combine `````` POTTIER Francois committed Apr 28, 2016 447 448 `````` (convert (Production.rhs prod)) v `````` fpottier committed Mar 01, 2013 449 450 451 452 453 454 455 456 457 `````` (* [gotostack nt] is the structure of the stack when a shift transition over nonterminal [nt] is about to be taken. It consists of just one cell. *) let gotostack : Nonterminal.t -> word = Nonterminal.tabulate (fun nt -> let sources = Lr1.targets (fun accu sources _ -> `````` POTTIER Francois committed Apr 28, 2016 458 `````` List.fold_right Lr1.NodeSet.add sources accu `````` fpottier committed Mar 01, 2013 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 `````` ) Lr1.NodeSet.empty (Symbol.N nt) in [ Symbol.N nt, sources ] ) let fold f accu w = List.fold_right (fun (symbol, states) accu -> f accu (representeds states) symbol states ) w accu let fold_top f accu w = match w with | [] -> accu | (symbol, states) :: _ -> f (representeds states) symbol `````` POTTIER Francois committed Oct 01, 2015 476 477 478 ``````let print (w : word) = let b = Buffer.create 64 in fold (fun () _represented symbol _states -> `````` POTTIER Francois committed Oct 31, 2018 479 `````` Buffer.add_char b ' '; `````` 480 `````` Buffer.add_string b (Symbol.print symbol) `````` POTTIER Francois committed Oct 01, 2015 481 482 483 `````` ) () w; Buffer.contents b `````` fpottier committed Mar 01, 2013 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 ``````(* ------------------------------------------------------------------------ *) (* Explain how the stack should be deconstructed when an error is found. We sometimes have a choice as too how many stack cells should be popped. Indeed, several cells in the known suffix of the stack may physically hold a state. If neither of these states handles errors, then we could jump to either. (Indeed, if we jump to one that's nearer, it will in turn pop further stack cells and jump to one that's farther.) In the interests of code size, we should pop as few stack cells as possible. So, we jump to the topmost represented state in the known suffix. *) type state = | Represented | UnRepresented of Lr1.node type instruction = | Die | DownTo of word * state let rewind node : instruction = let w = stack node in let rec rewind w = match w with | [] -> `````` POTTIER Francois committed Apr 28, 2016 512 513 514 515 `````` (* I believe that every stack description either is definite (that is, ends with [TailEmpty]) or contains at least one represented state. Thus, if we find an empty [w], this means that the stack is definitely empty. *) `````` fpottier committed Mar 01, 2013 516 517 518 519 520 `````` Die | ((_, states) as cell) :: w -> `````` POTTIER Francois committed Apr 28, 2016 521 `````` if representeds states then `````` fpottier committed Mar 01, 2013 522 `````` `````` POTTIER Francois committed Apr 28, 2016 523 524 `````` (* Here is a represented state. We will pop this cell and no more. *) `````` fpottier committed Mar 01, 2013 525 `````` `````` POTTIER Francois committed Apr 28, 2016 526 `````` DownTo ([ cell ], Represented) `````` fpottier committed Mar 01, 2013 527 `````` `````` POTTIER Francois committed Apr 28, 2016 528 `````` else if handlers states then begin `````` fpottier committed Mar 01, 2013 529 `````` `````` POTTIER Francois committed Apr 28, 2016 530 531 532 533 `````` (* Here is an unrepresented state that can handle errors. The cell must hold a singleton set of states, so we know which state to jump to, even though it isn't represented. *) `````` fpottier committed Mar 01, 2013 534 `````` `````` POTTIER Francois committed Apr 28, 2016 535 536 537 `````` assert (Lr1.NodeSet.cardinal states = 1); let state = Lr1.NodeSet.choose states in DownTo ([ cell ], UnRepresented state) `````` fpottier committed Mar 01, 2013 538 `````` `````` POTTIER Francois committed Apr 28, 2016 539 540 `````` end else `````` fpottier committed Mar 01, 2013 541 `````` `````` POTTIER Francois committed Apr 28, 2016 542 543 `````` (* Here is an unrepresented state that does not handle errors. Pop this cell and look further. *) `````` fpottier committed Mar 01, 2013 544 `````` `````` POTTIER Francois committed Apr 28, 2016 545 546 547 548 549 `````` match rewind w with | Die -> Die | DownTo (w, st) -> DownTo (cell :: w, st) `````` fpottier committed Mar 01, 2013 550 551 552 553 554 `````` in rewind w (* ------------------------------------------------------------------------ *) `````` POTTIER Francois committed Nov 11, 2015 555 `````` `````` 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 ``````(* Machinery for the computation of which symbols must keep track of their start or end positions. *) open Keyword type variable = Symbol.t * where (* WhereStart or WhereEnd *) module M : Fix.IMPERATIVE_MAPS with type key = variable = struct type key = variable type 'data t = { mutable startp: 'data SymbolMap.t; mutable endp: 'data SymbolMap.t; } open SymbolMap let create() = { startp = empty; endp = empty } let clear m = m.startp <- empty; m.endp <- empty let add (sym, where) data m = match where with | WhereStart -> m.startp <- add sym data m.startp | WhereEnd -> m.endp <- add sym data m.endp | WhereSymbolStart -> assert false let find (sym, where) m = match where with | WhereStart -> find sym m.startp | WhereEnd -> find sym m.endp | WhereSymbolStart -> assert false let iter f m = iter (fun sym -> f (sym, WhereStart)) m.startp; iter (fun sym -> f (sym, WhereEnd)) m.endp end (* ------------------------------------------------------------------------ *) `````` POTTIER Francois committed Nov 11, 2015 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 ``````(* We now determine which positions must be kept track of. For simplicity, we do this on a per-symbol basis. That is, for each symbol, either we never keep track of position information, or we always do. In fact, we do distinguish start and end positions. This leads to computing two sets of symbols -- those that keep track of their start position and those that keep track of their end position. A symbol on the right-hand side of a production must keep track of its (start or end) position if that position is explicitly requested by a semantic action. Furthermore, if the left-hand symbol of a production must keep track of its start (resp. end) position, then the first (resp. last) symbol of its right-hand side (if there is one) must do so as well. That is, unless the right-hand side is empty. *) `````` 614 615 ``````(* 2015/11/11. When a production [prod] is reduced, the top stack cell may be consulted for its end position. This implies that this cell must exist `````` POTTIER Francois committed Nov 11, 2015 616 617 `````` and must store an end position! Now, when does this happen? `````` 618 619 `````` 1- This happens if [prod] is an epsilon production and the left-hand symbol of the production, [nt prod], keeps track of its start or end position. `````` POTTIER Francois committed Nov 11, 2015 620 621 622 623 624 625 626 627 628 629 630 `````` 2- This happens if the semantic action explicitly mentions the keyword [\$endpos(\$0)]. Now, if this happens, what should we do? a- If this happens in a state [s] whose incoming symbol is [sym], then [sym] must keep track of its end position. b- If this happens in an initial state, where the stack may be empty, then the sentinel cell at the bottom of the stack must contain an end position. Point (b) doesn't concern us here, but point (a) does. We must implement the `````` 631 632 `````` constraint (1) \/ (2) -> (a). Point (b) is taken care of in the code back-end, where, for simplicity, we always create a sentinel cell. *) `````` 633 `````` `````` 634 635 636 637 ``````(* I will say that this is a lot more sophisticated than I would like. The code back-end has been known for its efficiency and I am trying to maintain this property -- in particular, I would like to keep track of no positions at all, if the user doesn't use any position keyword. But I am suffering. *) `````` fpottier committed Mar 01, 2013 638 `````` `````` 639 640 ``````module S = FixSolver.Make(M)(Boolean) `````` fpottier committed Mar 01, 2013 641 `````` `````` 642 643 ``````let record_ConVar, record_VarVar, solve = S.create() `````` fpottier committed Mar 01, 2013 644 645 `````` let () = `````` 646 647 648 649 650 `````` (* We gather the constraints explained above in two loops. The first loop looks at every (non-start) production [prod]. The second loop looks at every (non-initial) state [s]. *) `````` fpottier committed Mar 01, 2013 651 `````` Production.iterx (fun prod -> `````` 652 653 `````` let nt, rhs = Production.def prod `````` fpottier committed Mar 01, 2013 654 655 `````` and ids = Production.identifiers prod and action = Production.action prod in `````` 656 657 658 659 660 661 662 663 664 665 666 `````` let length = Array.length rhs in if length > 0 then begin (* If [nt] keeps track of its start position, then the first symbol in the right-hand side must do so as well. *) record_VarVar (Symbol.N nt, WhereStart) (rhs.(0), WhereStart); (* If [nt] keeps track of its end position, then the last symbol in the right-hand side must do so as well. *) record_VarVar (Symbol.N nt, WhereEnd) (rhs.(length - 1), WhereEnd) end; `````` fpottier committed Mar 01, 2013 667 668 `````` KeywordSet.iter (function | SyntaxError -> `````` 669 `````` () `````` POTTIER Francois committed Nov 04, 2015 670 `````` | Position (Before, _, _) -> `````` 671 672 `````` (* Doing nothing here because [\$endpos(\$0)] is dealt with in the second loop. *) `````` POTTIER Francois committed Nov 04, 2015 673 `````` () `````` 674 675 676 `````` | Position (Left, _, _) -> (* [\$startpos] and [\$endpos] have been expanded away. *) assert false `````` Nicolás Ojeda Bär committed Jun 26, 2018 677 678 679 `````` | Position (_, _, FlavorLocation) -> (* [\$loc] and [\$sloc] have been expanded away. *) assert false `````` 680 681 682 `````` | Position (RightNamed _, WhereSymbolStart, _) -> (* [\$symbolstartpos(x)] does not exist. *) assert false `````` fpottier committed Mar 01, 2013 683 `````` | Position (RightNamed id, where, _) -> `````` 684 685 686 `````` (* If the semantic action mentions [\$startpos(\$i)], then the [i]-th symbol in the right-hand side must keep track of its start position. Similarly for end positions. *) `````` POTTIER Francois committed Apr 28, 2016 687 688 `````` Array.iteri (fun i id' -> if id = id' then `````` 689 `````` record_ConVar true (rhs.(i), where) `````` POTTIER Francois committed Apr 28, 2016 690 `````` ) ids `````` fpottier committed Mar 01, 2013 691 `````` ) (Action.keywords action) `````` 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 `````` ); (* end of loop on productions *) Lr1.iterx (fun s -> (* Let [sym] be the incoming symbol of state [s]. *) let sym = Misc.unSome (Lr1.incoming_symbol s) in (* Condition (1) in the long comment above (2015/11/11). If an epsilon production [prod] can be reduced in state [s], if its left-hand side [nt] keeps track of its start or end position, then [sym] must keep track of its end position. *) TerminalMap.iter (fun _ prods -> let prod = Misc.single prods in let nt, rhs = Production.def prod in let length = Array.length rhs in if length = 0 then begin record_VarVar (Symbol.N nt, WhereStart) (sym, WhereEnd); record_VarVar (Symbol.N nt, WhereEnd) (sym, WhereEnd) end ) (Lr1.reductions s); (* Condition (2) in the long comment above (2015/11/11). If a production can be reduced in state [s] and mentions [\$endpos(\$0)], then [sym] must keep track of its end position. *) if Lr1.has_beforeend s then record_ConVar true (sym, WhereEnd) `````` fpottier committed Mar 01, 2013 719 720 `````` ) `````` 721 722 723 724 725 726 727 728 729 730 731 732 ``````let track : variable -> bool = solve() let startp symbol = track (symbol, WhereStart) let endp symbol = track (symbol, WhereEnd) let for_every_symbol (f : Symbol.t -> unit) : unit = Terminal.iter (fun t -> f (Symbol.T t)); Nonterminal.iter (fun nt -> f (Symbol.N nt)) `````` fpottier committed Mar 01, 2013 733 `````` `````` 734 735 736 737 ``````let sum_over_every_symbol (f : Symbol.t -> bool) : int = let c = ref 0 in for_every_symbol (fun sym -> if f sym then c := !c + 1); !c `````` fpottier committed Mar 01, 2013 738 739 740 741 742 743 `````` let () = Error.logC 1 (fun f -> Printf.fprintf f "%d out of %d symbols keep track of their start position.\n\ %d out of %d symbols keep track of their end position.\n" `````` 744 745 `````` (sum_over_every_symbol startp) (Terminal.n + Nonterminal.n) (sum_over_every_symbol endp) (Terminal.n + Nonterminal.n)) `````` fpottier committed Mar 01, 2013 746 747 748 749 750 751 752 753 754 755 756 `````` (* ------------------------------------------------------------------------- *) (* Miscellaneous. *) let universal symbol = Lr1.fold (fun universal s -> universal && (if represented s then SymbolMap.mem symbol (Lr1.transitions s) else true) ) true (* ------------------------------------------------------------------------ *) (* Discover which states can peek at an error. These are the states `````` POTTIER Francois committed Dec 03, 2014 757 758 `````` where an error token may be on the stream. These are the states that are targets of a reduce action on [error]. *) `````` fpottier committed Mar 01, 2013 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 `````` (* 2012/08/25 I am optimizing this code, whose original version I found had quadratic complexity. The problem is as follows. We can easily iterate over all states to find which states [s] have a reduce action on error. What we must find out, then, is into which state [t] this reduce action takes us. This is not easy to predict, as it depends on the contents of the stack. The original code used an overapproximation, as follows: if the reduction concerns a production whose head symbol is [nt], then all of the states that have an incoming transition labeled [nt] are potential targets. The new version of the code below relies on the same approximation, but uses two successive loops instead of two nested loops. *) let errorpeekers = (* First compute a set of symbols [nt]... *) let nts : SymbolSet.t = Lr1.fold (fun nts node -> try `````` POTTIER Francois committed Apr 28, 2016 776 777 778 779 `````` let prods = TerminalMap.lookup Terminal.error (Lr1.reductions node) in let prod = Misc.single prods in let nt = Production.nt prod in SymbolSet.add (Symbol.N nt) nts `````` fpottier committed Mar 01, 2013 780 `````` with Not_found -> `````` POTTIER Francois committed Apr 28, 2016 781 `````` nts `````` fpottier committed Mar 01, 2013 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 `````` ) SymbolSet.empty in (* ... then compute the set of all target states of all transitions labeled by some symbol in the set [nt]. *) SymbolSet.fold (fun nt errorpeekers -> Lr1.targets (fun errorpeekers _ target -> Lr1.NodeSet.add target errorpeekers ) errorpeekers nt ) nts Lr1.NodeSet.empty let errorpeeker node = Lr1.NodeSet.mem node errorpeekers (* ------------------------------------------------------------------------ *) let () = Time.tick "Constructing the invariant"``````