From a0f93d9f2e4d6fa64338552aecd349065c8f25ad Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 10:54:03 -0400 Subject: [PATCH 1/8] nitcc: Gram do the state analysis in a 2nd step Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index 0ddb646c73..1d24fc55f5 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -149,7 +149,10 @@ class Gram #print state automaton.states.add(state) - state.analysis + # Extends the core + for i in state.items.to_a do + state.extends(i) + end var nexts = new HashMap[Element, LRState] for i in state.items do @@ -198,6 +201,9 @@ class Gram next.ins.add t end end + for state in automaton.states do + state.analysis + end return automaton end @@ -1020,10 +1026,6 @@ class LRState # Compute guards and conflicts fun analysis do - # Extends the core - for i in items.to_a do - extends(i) - end # Collect action and conflicts for i in items do From fa6de054f0b4c56da558756e0d8bc7a02dbe01db Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 10:56:28 -0400 Subject: [PATCH 2/8] nitcc: add Production sample Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index 1d24fc55f5..0b0a48e743 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -43,6 +43,10 @@ class Gram if a == last then res.append(" ;\n") else res.append(" |\n") end if p.is_nullable then res.append "\t// is nullable\n" + if p.sample_alternative != null then + res.append "\t// sample: {p.sample_to_s}\n" + end + if not p.firsts.is_empty then res.append "\t// firsts:\n" for x in p.firsts do res.append "\t// {x}\n" @@ -207,6 +211,37 @@ class Gram return automaton end + fun compute_sample_length + do + loop + var changed = false + for p in prods do + for a in p.alts do + if a.phony then continue + var sample_length = 0 + for e in a.elems do + if e isa Token then + sample_length += 1 + else if e isa Production then + var e_len = e.sample_length + if e_len == null then continue label alts + sample_length += e_len + else + abort + end + end + var e_len = p.sample_length + if e_len == null or e_len > sample_length then + p.sample_length = sample_length + p.sample_alternative = a + changed = true + end + end label alts + end + if not changed then break + end + end + # Compute `nullables`, `firsts` and `afters` of productions fun analyse do @@ -237,6 +272,8 @@ class Gram if not changed then break end + compute_sample_length + loop var changed = false for p in prods do @@ -352,6 +389,24 @@ class Production # Is the production nullable var is_nullable = false + # The lenght (in tokens) of the smallest sample + var sample_length: nullable Int = null + + # The allternative used as the smallest sample + var sample_alternative: nullable Alternative = null + + redef fun sample_to_s: String + do + var alt = sample_alternative.as(not null) + var res = new Buffer + for e in alt.elems do + if not res.is_empty then res.add ' ' + res.append(e.sample_to_s) + end + return res.to_s + end + + # The first tokens of the production var firsts = new HashSet[Item] @@ -504,6 +559,9 @@ abstract class Element var name: String redef fun to_s do return name + # An example of a string + fun sample_to_s: String is abstract + private var acname_cache: nullable String = null # The mangled name of the element @@ -530,6 +588,11 @@ class Token var shifts = new ArraySet[LRState] # States of the LR automaton that reduce on self in the lookahead(1) var reduces = new ArraySet[LRState] + + redef fun sample_to_s + do + return to_s + end end # From 39eb5b42b336eea1413c7cdcdab2f957151a575c Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 11:12:11 -0400 Subject: [PATCH 3/8] nitcc: LRState remove cname Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index 0b0a48e743..98683d62c4 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -764,11 +764,11 @@ private class Generator add "class Parser_{name}" add "\tsuper Parser" - add "\tredef fun start_state do return state_{states.first.cname}" + add "\tredef fun start_state do return state_{states.first.number}" add "end" for s in states do - add "private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}" + add "private fun state_{s.number}: LRState{s.number} do return once new LRState{s.number}" end for p in gram.prods do add "private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}" @@ -784,7 +784,7 @@ private class Generator if not s.need_guard then continue add "\t# guarded action for state {s.name}" add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)" - add "\tprivate fun action_s{s.cname}(parser: Parser) do" + add "\tprivate fun action_s{s.number}(parser: Parser) do" if s.reduces.length != 1 then add "\t\tparser.parse_error" else @@ -804,14 +804,14 @@ private class Generator add "\tsuper NToken" for s in t.shifts do if not s.need_guard then continue - add "\tredef fun action_s{s.cname}(parser) do" + add "\tredef fun action_s{s.number}(parser) do" gen_shift_to_nit(s, t) add "\tend" end for s in t.reduces do if not s.need_guard then continue if s.reduces.length <= 1 then continue - add "\tredef fun action_s{s.cname}(parser) do" + add "\tredef fun action_s{s.number}(parser) do" add "\t\treduce_{s.guarded_reduce[t].first.alt.cname}(parser)" #gen_reduce_to_nit(s.guarded_reduce[t].first.alt) add "\tend" @@ -823,7 +823,7 @@ private class Generator add "redef class LRGoto" for s in states do if s.gotos.length <= 1 then continue - add "\tprivate fun goto_s{s.cname}(parser: Parser) do abort" + add "\tprivate fun goto_s{s.number}(parser: Parser) do abort" end add "end" @@ -832,7 +832,7 @@ private class Generator add "\tsuper LRGoto" for s in p.gotos do if s.gotos.length <= 1 then continue - add "\tredef fun goto_s{s.cname}(parser) do" + add "\tredef fun goto_s{s.number}(parser) do" gen_goto_to_nit(s, p) add "\tend" end @@ -888,7 +888,7 @@ private class Generator for s in states do add "# State {s.name}" - add "private class LRState{s.cname}" + add "private class LRState{s.number}" add "\tsuper LRState" add "\tredef fun to_s do return \"{s.name.escape_to_nit}\"" @@ -907,7 +907,7 @@ private class Generator add "\tredef fun action(parser) do" if s.need_guard then - add "\t\tparser.peek_token.action_s{s.cname}(parser)" + add "\t\tparser.peek_token.action_s{s.number}(parser)" else if s.reduces.length == 1 then add "\t\treduce_{s.reduces.first.cname}(parser)" #gen_reduce_to_nit(s.reduces.first) @@ -919,7 +919,7 @@ private class Generator if not s.gotos.is_empty then add "\tredef fun goto(parser, goto) do" if s.gotos.length > 1 then - add "\t\tgoto.goto_s{s.cname}(parser)" + add "\t\tgoto.goto_s{s.number}(parser)" else gen_goto_to_nit(s, s.gotos.first) end @@ -935,14 +935,14 @@ private class Generator fun gen_shift_to_nit(s: LRState, t: Token) do var dest = s.trans(t) - add "\t\tparser.shift(state_{dest.cname})" + add "\t\tparser.shift(state_{dest.number})" end fun gen_goto_to_nit(s: LRState, p: Production) do var dest = s.trans(p) - add "\t\tparser.push(state_{dest.cname})" + add "\t\tparser.push(state_{dest.number})" end fun gen_reduce_to_nit(alt: Alternative) @@ -1012,8 +1012,6 @@ class LRState # Name of the automaton (short part from the start) var name: String - # Mangled name - var cname: String is lazy do return name.to_cmangle # Number var number = -1 From ada17582dfd9806ce6457068d2ed670e913c2e56 Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 13:16:32 -0400 Subject: [PATCH 4/8] nitcc: LRState instead of a name, use a real prefix of elements Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 43 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index 98683d62c4..1dcbc5736f 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -137,7 +137,7 @@ class Gram analyse - var first = new LRState("Start") + var first = new LRState first.number = 0 for i in start.start_state do first.add(i) @@ -165,13 +165,10 @@ class Gram if nexts.has_key(e) then nexts[e].add(i.avance) else - var name - if state == automaton.states.first then - name = e.to_s - else - name = "{state.name} {e}" - end - var next = new LRState(name) + var next = new LRState + next.prev = state + next.prefix.add_all(state.prefix) + next.prefix.add(e) nexts[e] = next next.add(i.avance) end @@ -185,6 +182,10 @@ class Gram var new_state = true for n in seen do if n == next then + if next.prefix.length < n.prefix.length then + n.prefix = next.prefix + n.prev = next.prev + end next = n new_state = false break @@ -611,7 +612,7 @@ class LRAutomaton var res = new Array[String] res.add "* LRAutomaton: {states.length} states\n" for s in states do - res.add "s{s.number} {s.name}\n" + res.add "STATE {s}\n" res.add "\tCORE\n" for i in s.core do res.add "\t\t{i}\n" @@ -700,7 +701,7 @@ class LRAutomaton f.write("node[shape=Mrecord,height=0];\n") for s in states do - f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|" + f.write "s{s.number} [label=\"{s.to_s.escape_to_dot}|" for i in s.core do f.write "{i.to_s.escape_to_dot}\\l" end @@ -782,7 +783,7 @@ private class Generator add "redef class NToken" for s in states do if not s.need_guard then continue - add "\t# guarded action for state {s.name}" + add "\t# guarded action for state {s}" add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)" add "\tprivate fun action_s{s.number}(parser: Parser) do" if s.reduces.length != 1 then @@ -887,11 +888,11 @@ private class Generator end for s in states do - add "# State {s.name}" + add "# State {s}" add "private class LRState{s.number}" add "\tsuper LRState" - add "\tredef fun to_s do return \"{s.name.escape_to_nit}\"" + add "\tredef fun to_s do return \"{s.to_s.escape_to_nit}\"" var err = new Array[String] for t in s.outs do @@ -1009,9 +1010,13 @@ end # A state in a LR automaton class LRState - # Name of the automaton (short part from the start) - var name: String + # Shortest prefix to go to this state + # Is empty for the start state + var prefix = new Array[Element] + # The previous node according to the prefix + # Is null for the start state + var prev: nullable LRState = null # Number var number = -1 @@ -1038,7 +1043,7 @@ class LRState redef fun ==(o) do return o isa LRState and core == o.core redef fun hash do return items.length - redef fun to_s do return items.join(" ; ") + redef fun to_s do return "{number} {prefix.join(" ")}" # Add and item in the core fun add(i: Item): Bool @@ -1120,7 +1125,7 @@ class LRState var removed_reduces = new Array[Token] for t, a in guarded_reduce do if a.length > 1 then - print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:" + print "REDUCE/REDUCE Conflict on state {self} for token {t}:" for i in a do print "\treduce: {i}" conflicting_items.add_all a end @@ -1153,12 +1158,12 @@ class LRState end end if confs.is_empty then - print "Automatic Dangling on state {self.number} {self.name} for token {t}:" + print "Automatic Dangling on state {self} for token {t}:" print "\treduce: {ri}" for r in ress do print r removed_reduces.add t else - print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:" + print "SHIFT/REDUCE Conflict on state {self} for token {t}:" print "\treduce: {ri}" for i in guarded_shift[t] do print "\tshift: {i}" removed_reduces.add t From 5f295b4342f96d6ec490daa1daa43052ffdc4fe5 Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 14:59:34 -0400 Subject: [PATCH 5/8] nitcc: add LREngine to simulate an LR automaton and build CST Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 132 ++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index 1dcbc5736f..f6027eff42 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -1204,6 +1204,138 @@ class LRState end end +# Execution engine simulator on a LR automaton. +# It has a stack of LR states and AST nodes. +class LREngine + # The stack of stale, starts with start state + var state_stack = new Array[nullable LRState] + + # A stack of AST node, reduced production and shifted token are pushed onto + var node_stack = new Array[CSTNode] + + # The sequence of elements shifted for the first state + var past = new Array[Element] + + # The sequence of elements shifted since the first state + var future = new Array[Element] + + # The current state (top of the stack) + fun state: nullable LRState do return state_stack.last + + # Initialize the engine on a given `state`. + # A consistent `past` is created to reach `state`. + # Subsequent shifts will be added to the `future`. + fun start(state: LRState) + do + var start = state + loop + var prev = start.prev + if prev == null then break + start = prev + end + state_stack.add(start) + for e in state.prefix do shift(e) + assert self.state == state + + var tmp = future + future = past + past = tmp + end + + # Perform a shift on `e` for the current `state` and add it to `future`. + # Both tokens and productions can be shifted. + # If the shift is impossible, the current state become null, + fun shift(e: Element) + do + state_stack.add state.trans(e) + future.add(e) + node_stack.add(new CSTNode(e)) + end + + # Perform a reduction on an alternative `a` on the current state. + fun reduce(a: Alternative) + do + assert can_reduce(a) + var len = a.elems.length + for i in [0..len[ do state_stack.pop + var node = new CSTNode(a.prod) + for i in [0..len[ do node.children.unshift(node_stack.pop) + node_stack.add(node) + + # TODO something smart when accepting + if a.prod.accept then + state_stack.add null + return + end + + state_stack.add state.trans(a.prod) + end + + # Return true if the elements in the stack are compatible with the reduction. + fun can_reduce(alternative: Alternative): Bool + do + var idx = node_stack.length - alternative.elems.length + if idx < 0 then return false + for i in [0..alternative.elems.length[ do + if alternative.elems[i] != node_stack[idx+i].element then return false + end + return true + end + + # Try to shift on the current state. + # If not doable, try to reduce something, then shift. + # Return true is a shift was done. + fun try_shift(e: Element): Bool + do + var s = state.trans(e) + if s == null then + if not try_reduce then + return false + end + return try_shift(e) + end + shift(e) + return true + end + + # Try to reduce something on the current state. + fun try_reduce: Bool + do + for i in state.core do + # Filter out items that are not reduction + if i.next != null then continue + if can_reduce(i.alt) then + reduce(i.alt) + return true + end + end + return false + end + + fun tree: CSTNode do return node_stack.last +end + +# A CST node of the LREngine +class CSTNode + var element: Element + var children = new Array[CSTNode] + + fun dump(prefix: nullable String): String + do + if prefix == null then prefix = "" + var res = element.to_s + "\n" + if children.length == 0 then return res + var p2 = prefix + "│ " + for c in [0..children.length-1[ do + res += prefix + "├╴" + res += children[c].dump(p2) + end + res += prefix + "└╴" + res += children.last.dump(prefix + " ") + return res + end +end + # A transition in a LR automaton class LRTransition # The origin state From b0243a0c4375346afe32ac80c5e82140fa4a3eab Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 15:04:48 -0400 Subject: [PATCH 6/8] nitcc: show example of S/R R/R conflict (and possible ambiguities) Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 100 ++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 4 deletions(-) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index f6027eff42..fc9e13280a 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -630,6 +630,9 @@ class LRAutomaton res.add "\t\t\t{i2}\n" end end + var engine = new LREngine + engine.start(s) + res.add "\tPOSSIBLE EXIT {engine.find_exit}\n" res.add "\tTRANSITIONS {s.outs.length}\n" for t in s.outs do res.add "\t\t{t.elem} |-> s{t.to.number}\n" @@ -1089,10 +1092,25 @@ class LRState # Is the state LR0? fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty + # The item that reduces the most + # Is used by `find_exit` + fun exit_item: Item + do + var exit_candidate = core.first + for i in core do + if i.alt.prod.accept then return i + if i.pos > exit_candidate.pos then + exit_candidate = i + else if i.pos == exit_candidate.pos and i.alt.elems.length < exit_candidate.alt.elems.length then + exit_candidate = i + end + end + return exit_candidate + end + # Compute guards and conflicts fun analysis do - # Collect action and conflicts for i in items do var n = i.next @@ -1125,9 +1143,35 @@ class LRState var removed_reduces = new Array[Token] for t, a in guarded_reduce do if a.length > 1 then + print "---" print "REDUCE/REDUCE Conflict on state {self} for token {t}:" - for i in a do print "\treduce: {i}" + print "A possible past: {prefix}" conflicting_items.add_all a + var worst_exit = null + for i in a do + var engine = new LREngine + engine.start(self) + engine.reduce(i.alt) + var exit = engine.find_exit + if worst_exit == null or exit.length > worst_exit.length then worst_exit = exit + end + var amb = 0 + for i in a do + var engine = new LREngine + engine.start(self) + engine.reduce(i.alt) + for e in worst_exit.as(not null) do + if not engine.try_shift(e) then break + end + print "REDUCE on item: {i}" + var exit = engine.find_exit + print "A possible future: {exit}" + print engine.tree.dump + if exit == worst_exit then amb += 1 + end + if amb > 1 then + print "AMBIGUITY detected: same elements, different trees" + end end if guarded_shift.has_key(t) then var ri = a.first @@ -1158,17 +1202,46 @@ class LRState end end if confs.is_empty then + print "---" print "Automatic Dangling on state {self} for token {t}:" print "\treduce: {ri}" for r in ress do print r removed_reduces.add t else + print "---" print "SHIFT/REDUCE Conflict on state {self} for token {t}:" - print "\treduce: {ri}" - for i in guarded_shift[t] do print "\tshift: {i}" + print "A possible past: {prefix}" removed_reduces.add t conflicting_items.add_all a conflicting_items.add_all guarded_shift[t] + + var worst_exit = null + for i in guarded_shift[t] do + print "SHIFT on item: {i}" + var engine = new LREngine + engine.start(self) + for e in i.future do engine.shift(e) + var exit = engine.find_exit + print "A possible future: {exit}" + print engine.tree.dump + if worst_exit == null or exit.length < worst_exit.length then + worst_exit = exit + end + end + var engine = new LREngine + engine.start(self) + engine.reduce(ri.alt) + for e in worst_exit.as(not null) do + if not engine.try_shift(e) then break + end + var reduce_exit = engine.find_exit + print "REDUCE on item: {ri}" + var exit = engine.find_exit + print "A possible future: {exit}" + print engine.tree.dump + if exit == worst_exit then + print "AMBIGUITY detected: same elements, different trees" + end end end end @@ -1313,6 +1386,16 @@ class LREngine end fun tree: CSTNode do return node_stack.last + + fun find_exit: Array[Element] + do + while state != null do + var item = state.exit_item + for e in item.future do shift(e) + reduce(item.alt) + end + return future + end end # A CST node of the LREngine @@ -1401,4 +1484,13 @@ class Item var res = new Item(alt, pos+1) return res end + + fun future: Array[Element] + do + var res = new Array[Element] + for i in [pos .. alt.elems.length[ do + res.add alt.elems[i] + end + return res + end end From b1dddbf08f0cf27ac33674e9bbe87f34b6ff8aed Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 18 Jul 2024 15:40:43 -0400 Subject: [PATCH 7/8] nitcc: add new tests of grammars (some with conflicts) Signed-off-by: Jean Privat --- contrib/nitcc/tests/amb.input | 1 + contrib/nitcc/tests/amb.sablecc | 4 ++++ contrib/nitcc/tests/amb2.input | 1 + contrib/nitcc/tests/amb2.sablecc | 6 +++++ contrib/nitcc/tests/amb3.input | 1 + contrib/nitcc/tests/amb3.sablecc | 5 ++++ contrib/nitcc/tests/calc0.input | 1 + contrib/nitcc/tests/calc0.sablecc | 11 +++++++++ contrib/nitcc/tests/eq.input | 1 + contrib/nitcc/tests/eq.sablecc | 9 +++++++ contrib/nitcc/tests/eq2.input | 1 + contrib/nitcc/tests/eq2.sablecc | 10 ++++++++ contrib/nitcc/tests/eq3.input | 1 + contrib/nitcc/tests/eq3.sablecc | 10 ++++++++ contrib/nitcc/tests/geom.input | 1 + contrib/nitcc/tests/geom.sablecc | 8 +++++++ contrib/nitcc/tests/lg.input | 1 + contrib/nitcc/tests/lg.input2 | 1 + contrib/nitcc/tests/lg.sablecc | 6 +++++ contrib/nitcc/tests/lg2.input | 1 + contrib/nitcc/tests/lg2.input2 | 1 + contrib/nitcc/tests/lg2.sablecc | 5 ++++ contrib/nitcc/tests/sav/amb.input.res | 4 ++++ contrib/nitcc/tests/sav/amb.res | 1 + contrib/nitcc/tests/sav/amb2.input.res | 10 ++++++++ contrib/nitcc/tests/sav/amb2.res | 1 + contrib/nitcc/tests/sav/amb3.input.res | 6 +++++ contrib/nitcc/tests/sav/amb3.res | 1 + contrib/nitcc/tests/sav/calc0.input.res | 23 ++++++++++++++++++ contrib/nitcc/tests/sav/eq.input.res | 13 ++++++++++ contrib/nitcc/tests/sav/eq.res | 1 + contrib/nitcc/tests/sav/eq2.input.res | 13 ++++++++++ contrib/nitcc/tests/sav/eq2.res | 1 + contrib/nitcc/tests/sav/eq3.input.res | 13 ++++++++++ contrib/nitcc/tests/sav/eq3.res | 1 + contrib/nitcc/tests/sav/geom.input.res | 32 +++++++++++++++++++++++++ contrib/nitcc/tests/sav/lg.input.res | 19 +++++++++++++++ contrib/nitcc/tests/sav/lg.input2.res | 19 +++++++++++++++ contrib/nitcc/tests/sav/lg.res | 1 + contrib/nitcc/tests/sav/lg2.input.res | 15 ++++++++++++ contrib/nitcc/tests/sav/lg2.input2.res | 17 +++++++++++++ contrib/nitcc/tests/sav/lg2.res | 1 + 42 files changed, 278 insertions(+) create mode 100644 contrib/nitcc/tests/amb.input create mode 100644 contrib/nitcc/tests/amb.sablecc create mode 100644 contrib/nitcc/tests/amb2.input create mode 100644 contrib/nitcc/tests/amb2.sablecc create mode 100644 contrib/nitcc/tests/amb3.input create mode 100644 contrib/nitcc/tests/amb3.sablecc create mode 100644 contrib/nitcc/tests/calc0.input create mode 100644 contrib/nitcc/tests/calc0.sablecc create mode 100644 contrib/nitcc/tests/eq.input create mode 100644 contrib/nitcc/tests/eq.sablecc create mode 100644 contrib/nitcc/tests/eq2.input create mode 100644 contrib/nitcc/tests/eq2.sablecc create mode 100644 contrib/nitcc/tests/eq3.input create mode 100644 contrib/nitcc/tests/eq3.sablecc create mode 100644 contrib/nitcc/tests/geom.input create mode 100644 contrib/nitcc/tests/geom.sablecc create mode 100644 contrib/nitcc/tests/lg.input create mode 100644 contrib/nitcc/tests/lg.input2 create mode 100644 contrib/nitcc/tests/lg.sablecc create mode 100644 contrib/nitcc/tests/lg2.input create mode 100644 contrib/nitcc/tests/lg2.input2 create mode 100644 contrib/nitcc/tests/lg2.sablecc create mode 100644 contrib/nitcc/tests/sav/amb.input.res create mode 100644 contrib/nitcc/tests/sav/amb.res create mode 100644 contrib/nitcc/tests/sav/amb2.input.res create mode 100644 contrib/nitcc/tests/sav/amb2.res create mode 100644 contrib/nitcc/tests/sav/amb3.input.res create mode 100644 contrib/nitcc/tests/sav/amb3.res create mode 100644 contrib/nitcc/tests/sav/calc0.input.res create mode 100644 contrib/nitcc/tests/sav/eq.input.res create mode 100644 contrib/nitcc/tests/sav/eq.res create mode 100644 contrib/nitcc/tests/sav/eq2.input.res create mode 100644 contrib/nitcc/tests/sav/eq2.res create mode 100644 contrib/nitcc/tests/sav/eq3.input.res create mode 100644 contrib/nitcc/tests/sav/eq3.res create mode 100644 contrib/nitcc/tests/sav/geom.input.res create mode 100644 contrib/nitcc/tests/sav/lg.input.res create mode 100644 contrib/nitcc/tests/sav/lg.input2.res create mode 100644 contrib/nitcc/tests/sav/lg.res create mode 100644 contrib/nitcc/tests/sav/lg2.input.res create mode 100644 contrib/nitcc/tests/sav/lg2.input2.res create mode 100644 contrib/nitcc/tests/sav/lg2.res diff --git a/contrib/nitcc/tests/amb.input b/contrib/nitcc/tests/amb.input new file mode 100644 index 0000000000..074d1eeb40 --- /dev/null +++ b/contrib/nitcc/tests/amb.input @@ -0,0 +1 @@ +id diff --git a/contrib/nitcc/tests/amb.sablecc b/contrib/nitcc/tests/amb.sablecc new file mode 100644 index 0000000000..bbe895fe49 --- /dev/null +++ b/contrib/nitcc/tests/amb.sablecc @@ -0,0 +1,4 @@ +Grammar amb; +Parser +Ignored #10, #32; +e = 'id' | e | e; diff --git a/contrib/nitcc/tests/amb2.input b/contrib/nitcc/tests/amb2.input new file mode 100644 index 0000000000..7d5c7b2b5e --- /dev/null +++ b/contrib/nitcc/tests/amb2.input @@ -0,0 +1 @@ +-0+ diff --git a/contrib/nitcc/tests/amb2.sablecc b/contrib/nitcc/tests/amb2.sablecc new file mode 100644 index 0000000000..ad1d48f5a9 --- /dev/null +++ b/contrib/nitcc/tests/amb2.sablecc @@ -0,0 +1,6 @@ +Grammar amb; +Parser +Ignored #10, #32; +e = '-' a | b '+' | '0'; +a = e ; +b = e ; diff --git a/contrib/nitcc/tests/amb3.input b/contrib/nitcc/tests/amb3.input new file mode 100644 index 0000000000..b2901ea97c --- /dev/null +++ b/contrib/nitcc/tests/amb3.input @@ -0,0 +1 @@ +a b diff --git a/contrib/nitcc/tests/amb3.sablecc b/contrib/nitcc/tests/amb3.sablecc new file mode 100644 index 0000000000..bdc84b7444 --- /dev/null +++ b/contrib/nitcc/tests/amb3.sablecc @@ -0,0 +1,5 @@ +Grammar amb; +Parser +Ignored #10, #32; +e = x 'b' | 'a' x ; +x = 'a' | 'b'; diff --git a/contrib/nitcc/tests/calc0.input b/contrib/nitcc/tests/calc0.input new file mode 100644 index 0000000000..0523cb5cd7 --- /dev/null +++ b/contrib/nitcc/tests/calc0.input @@ -0,0 +1 @@ +1+2+3*(4+5) diff --git a/contrib/nitcc/tests/calc0.sablecc b/contrib/nitcc/tests/calc0.sablecc new file mode 100644 index 0000000000..87386e79dd --- /dev/null +++ b/contrib/nitcc/tests/calc0.sablecc @@ -0,0 +1,11 @@ +Grammar calc; +Lexer +n = ('0'..'9')+ ; +Parser +Ignored #10, #32; +e + = e '+' e + | e '*' e + | '(' e ')' + | n + ; diff --git a/contrib/nitcc/tests/eq.input b/contrib/nitcc/tests/eq.input new file mode 100644 index 0000000000..e3280dd06b --- /dev/null +++ b/contrib/nitcc/tests/eq.input @@ -0,0 +1 @@ +id=id=id diff --git a/contrib/nitcc/tests/eq.sablecc b/contrib/nitcc/tests/eq.sablecc new file mode 100644 index 0000000000..7f1e196143 --- /dev/null +++ b/contrib/nitcc/tests/eq.sablecc @@ -0,0 +1,9 @@ +Grammar eq; +Parser +Ignored #10, #32; +e = + var '=' e | + var ; +c = + e '=' e ; +var = 'id' ; diff --git a/contrib/nitcc/tests/eq2.input b/contrib/nitcc/tests/eq2.input new file mode 100644 index 0000000000..e3280dd06b --- /dev/null +++ b/contrib/nitcc/tests/eq2.input @@ -0,0 +1 @@ +id=id=id diff --git a/contrib/nitcc/tests/eq2.sablecc b/contrib/nitcc/tests/eq2.sablecc new file mode 100644 index 0000000000..7c5db34ec1 --- /dev/null +++ b/contrib/nitcc/tests/eq2.sablecc @@ -0,0 +1,10 @@ +Grammar eq; +Parser +Ignored #10, #32; +e = + var '=' e | + 'assert' c | + var ; +c = + e '=' e ; +var = 'id' ; diff --git a/contrib/nitcc/tests/eq3.input b/contrib/nitcc/tests/eq3.input new file mode 100644 index 0000000000..e3280dd06b --- /dev/null +++ b/contrib/nitcc/tests/eq3.input @@ -0,0 +1 @@ +id=id=id diff --git a/contrib/nitcc/tests/eq3.sablecc b/contrib/nitcc/tests/eq3.sablecc new file mode 100644 index 0000000000..33bb02da82 --- /dev/null +++ b/contrib/nitcc/tests/eq3.sablecc @@ -0,0 +1,10 @@ +Grammar eq; +Parser +Ignored #10, #32; +e = + var '=' e | + c | + var ; +c = + e '=' e ; +var = 'id' ; diff --git a/contrib/nitcc/tests/geom.input b/contrib/nitcc/tests/geom.input new file mode 100644 index 0000000000..4b169b1f5a --- /dev/null +++ b/contrib/nitcc/tests/geom.input @@ -0,0 +1 @@ +triangle 0x0 0@0 centre circle 0x0 0@0 diff --git a/contrib/nitcc/tests/geom.sablecc b/contrib/nitcc/tests/geom.sablecc new file mode 100644 index 0000000000..8026a54e76 --- /dev/null +++ b/contrib/nitcc/tests/geom.sablecc @@ -0,0 +1,8 @@ +Grammar geom; +Parser +Ignored #10, #32; +forme = + 'triangle' point point point | + 'circle' point point ; +point = num 'x' num | num '@' num | 'centre' forme; +num = '0'; diff --git a/contrib/nitcc/tests/lg.input b/contrib/nitcc/tests/lg.input new file mode 100644 index 0000000000..20afe73e05 --- /dev/null +++ b/contrib/nitcc/tests/lg.input @@ -0,0 +1 @@ +<0><0>> diff --git a/contrib/nitcc/tests/lg.input2 b/contrib/nitcc/tests/lg.input2 new file mode 100644 index 0000000000..9b09d7781c --- /dev/null +++ b/contrib/nitcc/tests/lg.input2 @@ -0,0 +1 @@ +<0><0>0 diff --git a/contrib/nitcc/tests/lg.sablecc b/contrib/nitcc/tests/lg.sablecc new file mode 100644 index 0000000000..c2a1e74804 --- /dev/null +++ b/contrib/nitcc/tests/lg.sablecc @@ -0,0 +1,6 @@ +Grammar lg; +Parser +Ignored #10, #32; +e = e '<' a | e '>' a | a ; +a = n | '<' e '>' ; +n = '0' ; diff --git a/contrib/nitcc/tests/lg2.input b/contrib/nitcc/tests/lg2.input new file mode 100644 index 0000000000..20afe73e05 --- /dev/null +++ b/contrib/nitcc/tests/lg2.input @@ -0,0 +1 @@ +<0><0>> diff --git a/contrib/nitcc/tests/lg2.input2 b/contrib/nitcc/tests/lg2.input2 new file mode 100644 index 0000000000..9b09d7781c --- /dev/null +++ b/contrib/nitcc/tests/lg2.input2 @@ -0,0 +1 @@ +<0><0>0 diff --git a/contrib/nitcc/tests/lg2.sablecc b/contrib/nitcc/tests/lg2.sablecc new file mode 100644 index 0000000000..4277f3c56f --- /dev/null +++ b/contrib/nitcc/tests/lg2.sablecc @@ -0,0 +1,5 @@ +Grammar lg; +Parser +Ignored #10, #32; +e = e '<' e | e '>' e | n | '<' e '>' ; +n = '0' ; diff --git a/contrib/nitcc/tests/sav/amb.input.res b/contrib/nitcc/tests/sav/amb.input.res new file mode 100644 index 0000000000..e8b4706101 --- /dev/null +++ b/contrib/nitcc/tests/sav/amb.input.res @@ -0,0 +1,4 @@ +Start + e_0 + 'id'@(1:1-1:3) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/amb.res b/contrib/nitcc/tests/sav/amb.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/amb.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/amb2.input.res b/contrib/nitcc/tests/sav/amb2.input.res new file mode 100644 index 0000000000..39f373159e --- /dev/null +++ b/contrib/nitcc/tests/sav/amb2.input.res @@ -0,0 +1,10 @@ +Start + e_1 + b + e_0 + '-'@(1:1-1:2) + a + e_2 + '0'@(1:2-1:3) + '+'@(1:3-1:4) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/amb2.res b/contrib/nitcc/tests/sav/amb2.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/amb2.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/amb3.input.res b/contrib/nitcc/tests/sav/amb3.input.res new file mode 100644 index 0000000000..1c07b4c2cc --- /dev/null +++ b/contrib/nitcc/tests/sav/amb3.input.res @@ -0,0 +1,6 @@ +Start + e_1 + 'a'@(1:1-1:2) + x_1 + 'b'@(1:3-1:4) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/amb3.res b/contrib/nitcc/tests/sav/amb3.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/amb3.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/calc0.input.res b/contrib/nitcc/tests/sav/calc0.input.res new file mode 100644 index 0000000000..fb9d423071 --- /dev/null +++ b/contrib/nitcc/tests/sav/calc0.input.res @@ -0,0 +1,23 @@ +Start + e_0 + e_3 + n@(1:1-1:2)='1' + '+'@(1:2-1:3) + e_0 + e_3 + n@(1:3-1:4)='2' + '+'@(1:4-1:5) + e_1 + e_3 + n@(1:5-1:6)='3' + '*'@(1:6-1:7) + e_2 + '('@(1:7-1:8) + e_0 + e_3 + n@(1:8-1:9)='4' + '+'@(1:9-1:10) + e_3 + n@(1:10-1:11)='5' + ')'@(1:11-1:12) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/eq.input.res b/contrib/nitcc/tests/sav/eq.input.res new file mode 100644 index 0000000000..7e18e33f4b --- /dev/null +++ b/contrib/nitcc/tests/sav/eq.input.res @@ -0,0 +1,13 @@ +Start + e_0 + var + 'id'@(1:1-1:3) + '='@(1:3-1:4) + e_0 + var + 'id'@(1:4-1:6) + '='@(1:6-1:7) + e_1 + var + 'id'@(1:7-1:9) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/eq.res b/contrib/nitcc/tests/sav/eq.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/eq.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/eq2.input.res b/contrib/nitcc/tests/sav/eq2.input.res new file mode 100644 index 0000000000..b7528645f3 --- /dev/null +++ b/contrib/nitcc/tests/sav/eq2.input.res @@ -0,0 +1,13 @@ +Start + e_0 + var + 'id'@(1:1-1:3) + '='@(1:3-1:4) + e_0 + var + 'id'@(1:4-1:6) + '='@(1:6-1:7) + e_2 + var + 'id'@(1:7-1:9) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/eq2.res b/contrib/nitcc/tests/sav/eq2.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/eq2.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/eq3.input.res b/contrib/nitcc/tests/sav/eq3.input.res new file mode 100644 index 0000000000..b7528645f3 --- /dev/null +++ b/contrib/nitcc/tests/sav/eq3.input.res @@ -0,0 +1,13 @@ +Start + e_0 + var + 'id'@(1:1-1:3) + '='@(1:3-1:4) + e_0 + var + 'id'@(1:4-1:6) + '='@(1:6-1:7) + e_2 + var + 'id'@(1:7-1:9) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/eq3.res b/contrib/nitcc/tests/sav/eq3.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/eq3.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/geom.input.res b/contrib/nitcc/tests/sav/geom.input.res new file mode 100644 index 0000000000..516cc64e5d --- /dev/null +++ b/contrib/nitcc/tests/sav/geom.input.res @@ -0,0 +1,32 @@ +Start + forme_0 + 'triangle'@(1:1-1:9) + point_0 + num + '0'@(1:10-1:11) + 'x'@(1:11-1:12) + num + '0'@(1:12-1:13) + point_1 + num + '0'@(1:14-1:15) + '@'@(1:15-1:16) + num + '0'@(1:16-1:17) + point_2 + 'centre'@(1:18-1:24) + forme_1 + 'circle'@(1:25-1:31) + point_0 + num + '0'@(1:32-1:33) + 'x'@(1:33-1:34) + num + '0'@(1:34-1:35) + point_1 + num + '0'@(1:36-1:37) + '@'@(1:37-1:38) + num + '0'@(1:38-1:39) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/lg.input.res b/contrib/nitcc/tests/sav/lg.input.res new file mode 100644 index 0000000000..36b1985027 --- /dev/null +++ b/contrib/nitcc/tests/sav/lg.input.res @@ -0,0 +1,19 @@ +Start + e_2 + a_1 + '<'@(1:1-1:2) + e_1 + e_2 + a_0 + n + '0'@(1:2-1:3) + '>'@(1:3-1:4) + a_1 + '<'@(1:4-1:5) + e_2 + a_0 + n + '0'@(1:5-1:6) + '>'@(1:6-1:7) + '>'@(1:7-1:8) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/lg.input2.res b/contrib/nitcc/tests/sav/lg.input2.res new file mode 100644 index 0000000000..f825a5aa70 --- /dev/null +++ b/contrib/nitcc/tests/sav/lg.input2.res @@ -0,0 +1,19 @@ +NParserError@(2:1-2:1)='' +Nodes[Node] + '<'@(1:1-1:2) + e_2 + a_0 + n + '0'@(1:2-1:3) + '>'@(1:3-1:4) + '<'@(1:4-1:5) + e_1 + e_2 + a_0 + n + '0'@(1:5-1:6) + '>'@(1:6-1:7) + a_0 + n + '0'@(1:7-1:8) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/lg.res b/contrib/nitcc/tests/sav/lg.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/lg.res @@ -0,0 +1 @@ +Error: there is conflicts diff --git a/contrib/nitcc/tests/sav/lg2.input.res b/contrib/nitcc/tests/sav/lg2.input.res new file mode 100644 index 0000000000..30257925c5 --- /dev/null +++ b/contrib/nitcc/tests/sav/lg2.input.res @@ -0,0 +1,15 @@ +NParserError@(2:1-2:1)='' +Nodes[Node] + '<'@(1:1-1:2) + e_2 + n + '0'@(1:2-1:3) + '>'@(1:3-1:4) + e_3 + '<'@(1:4-1:5) + e_2 + n + '0'@(1:5-1:6) + '>'@(1:6-1:7) + '>'@(1:7-1:8) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/lg2.input2.res b/contrib/nitcc/tests/sav/lg2.input2.res new file mode 100644 index 0000000000..f6b978e66b --- /dev/null +++ b/contrib/nitcc/tests/sav/lg2.input2.res @@ -0,0 +1,17 @@ +NParserError@(2:1-2:1)='' +Nodes[Node] + '<'@(1:1-1:2) + e_2 + n + '0'@(1:2-1:3) + '>'@(1:3-1:4) + '<'@(1:4-1:5) + e_1 + e_2 + n + '0'@(1:5-1:6) + '>'@(1:6-1:7) + e_2 + n + '0'@(1:7-1:8) + Eof@(2:1-2:1)='' diff --git a/contrib/nitcc/tests/sav/lg2.res b/contrib/nitcc/tests/sav/lg2.res new file mode 100644 index 0000000000..05b05f7966 --- /dev/null +++ b/contrib/nitcc/tests/sav/lg2.res @@ -0,0 +1 @@ +Error: there is conflicts From a45fd3d85891dc1ef17d9be11caa5e2925915964 Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Sat, 20 Jul 2024 11:39:42 -0400 Subject: [PATCH 8/8] nitcc: grammar make find_exit not loop Signed-off-by: Jean Privat --- contrib/nitcc/src/grammar.nit | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index fc9e13280a..f4b724b791 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -1092,22 +1092,6 @@ class LRState # Is the state LR0? fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty - # The item that reduces the most - # Is used by `find_exit` - fun exit_item: Item - do - var exit_candidate = core.first - for i in core do - if i.alt.prod.accept then return i - if i.pos > exit_candidate.pos then - exit_candidate = i - else if i.pos == exit_candidate.pos and i.alt.elems.length < exit_candidate.alt.elems.length then - exit_candidate = i - end - end - return exit_candidate - end - # Compute guards and conflicts fun analysis do @@ -1389,8 +1373,17 @@ class LREngine fun find_exit: Array[Element] do - while state != null do - var item = state.exit_item + var set = new HashSet[LRState] + loop + var state = self.state + if state == null then break + if set.has(state) then + # We are looping, just abort + break + end + set.add(state) + # Heuristic, the first item is an accepting one or something that exit without looping + var item = state.core.first for e in item.future do shift(e) reduce(item.alt) end