Skip to content

Commit

Permalink
Implement redundant epsilon removing optimization
Browse files Browse the repository at this point in the history
* Introduced `RedundantEpsilonRemover` that performs the following ATN optimizations (described below):
  * Removing of single incoming epsilon transition
  * Removing of single outgoing epsilon transition with several incoming transitions
* Removed all optimizations from `ParserATNFactory` since they are useless, not fully correct and are performed on the separated optimization step
* Introduced `ATNOptimizerHelper` that do the following:
  * Calculates incoming transitions that are used in ATN optimizers
  * Tracks replacement of state that are being removed during optimization. Old states are being replaced on the new ones in the final step (`updateAstNodes`)
  * Compresses array of ATN states (removes null items after previous optimization steps, `compressStates`)
* Fixed the previous `ATNOptimizer` and renamed to `SetMerger`. Now it considers incoming transitions and `ATNOptimizerHelper` accurately tracks replacements.

Implemented optimizations decreases ATN especially for lexers and should improve performance for generated parsers because of decreased number of method calls. Also they don't affect runtime code except of interpreter part (that is buggy anyway).

Signed-off-by: Ivan Kochurkin <[email protected]>
  • Loading branch information
KvanTTT authored and ericvergnaud committed Feb 21, 2024
1 parent e09711b commit 0eb852b
Show file tree
Hide file tree
Showing 18 changed files with 1,078 additions and 957 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ s
1
2
3
1 [13 6]
1 [8 0]

Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,16 @@ public void testEpsilonAltSubrule() throws Exception {
"a : A (B | ) C ;\n";
Grammar g = new Grammar(gtext);
String atnText =
"RuleStart_a_0->s2\n"+
"s2-A->BlockStart_5\n"+
"BlockStart_5->s3\n"+
"BlockStart_5->s4\n"+
"s3-B->BlockEnd_6\n"+
"s4->BlockEnd_6\n"+
"BlockEnd_6->s7\n"+
"s7-C->s8\n"+
"s8->RuleStop_a_1\n"+
"RuleStop_a_1-EOF->s9\n";
"RuleStart_a_0-A->BlockStart_3\n" +
"BlockStart_3->s2\n" +
"BlockStart_3->BlockEnd_4\n" +
"s2-B->BlockEnd_4\n" +
"BlockEnd_4-C->RuleStop_a_1\n" +
"RuleStop_a_1-EOF->s5\n";
RuntimeTestUtils.checkRuleATN(g, "a", atnText);

ATN atn = g.getATN();
int blkStartStateNumber = 5;
int blkStartStateNumber = 3;
IntervalSet tokens = atn.getExpectedTokens(blkStartStateNumber, null);
assertEquals("{B, C}", tokens.toString(g.getTokenNames()));
}
Expand All @@ -47,19 +43,16 @@ public void testEpsilonAltSubrule() throws Exception {
"a : A B? C ;\n";
Grammar g = new Grammar(gtext);
String atnText =
"RuleStart_a_0->s2\n"+
"s2-A->BlockStart_4\n"+
"BlockStart_4->s3\n"+
"BlockStart_4->BlockEnd_5\n"+
"s3-B->BlockEnd_5\n"+
"BlockEnd_5->s6\n"+
"s6-C->s7\n"+
"s7->RuleStop_a_1\n"+
"RuleStop_a_1-EOF->s8\n";
"RuleStart_a_0-A->BlockStart_3\n" +
"BlockStart_3->s2\n" +
"BlockStart_3->BlockEnd_4\n" +
"s2-B->BlockEnd_4\n" +
"BlockEnd_4-C->RuleStop_a_1\n" +
"RuleStop_a_1-EOF->s5\n";
RuntimeTestUtils.checkRuleATN(g, "a", atnText);

ATN atn = g.getATN();
int blkStartStateNumber = 4;
int blkStartStateNumber = 3;
IntervalSet tokens = atn.getExpectedTokens(blkStartStateNumber, null);
assertEquals("{B, C}", tokens.toString(g.getTokenNames()));
}
Expand All @@ -71,31 +64,28 @@ public void testEpsilonAltSubrule() throws Exception {
"b : B | ;";
Grammar g = new Grammar(gtext);
String atnText =
"RuleStart_a_0->s4\n"+
"s4-b->RuleStart_b_2\n"+
"s5-A->s6\n"+
"s6->RuleStop_a_1\n"+
"RuleStop_a_1-EOF->s11\n";
"RuleStart_a_0-b->RuleStart_b_2\n" +
"s4-A->RuleStop_a_1\n" +
"RuleStop_a_1-EOF->s8\n";
RuntimeTestUtils.checkRuleATN(g, "a", atnText);
atnText =
"RuleStart_b_2->BlockStart_9\n"+
"BlockStart_9->s7\n"+
"BlockStart_9->s8\n"+
"s7-B->BlockEnd_10\n"+
"s8->BlockEnd_10\n"+
"BlockEnd_10->RuleStop_b_3\n"+
"RuleStop_b_3->s5\n";
"RuleStart_b_2->BlockStart_6\n" +
"BlockStart_6->s5\n" +
"BlockStart_6->BlockEnd_7\n" +
"s5-B->BlockEnd_7\n" +
"BlockEnd_7->RuleStop_b_3\n" +
"RuleStop_b_3->s4\n";
RuntimeTestUtils.checkRuleATN(g, "b", atnText);

ATN atn = g.getATN();

// From the start of 'b' with empty stack, can only see B and EOF
int blkStartStateNumber = 9;
int blkStartStateNumber = 6;
IntervalSet tokens = atn.getExpectedTokens(blkStartStateNumber, ParserRuleContext.EMPTY);
assertEquals("{<EOF>, B}", tokens.toString(g.getTokenNames()));

// Now call from 'a'
tokens = atn.getExpectedTokens(blkStartStateNumber, new ParserRuleContext(ParserRuleContext.EMPTY, 4));
tokens = atn.getExpectedTokens(blkStartStateNumber, new ParserRuleContext(ParserRuleContext.EMPTY, 0));
assertEquals("{A, B}", tokens.toString(g.getTokenNames()));
}

Expand All @@ -111,28 +101,26 @@ public void testEpsilonAltSubrule() throws Exception {
" ;\n";
Grammar g = new Grammar(gtext);
String atnText =
"RuleStart_expr_2->BlockStart_13\n"+
"BlockStart_13->s7\n"+
"BlockStart_13->s12\n"+
"s7-action_1:-1->s8\n"+
"s12-ID->BlockEnd_14\n"+
"s8-L->s9\n"+
"BlockEnd_14->StarLoopEntry_20\n"+
"s9-expr->RuleStart_expr_2\n"+
"StarLoopEntry_20->StarBlockStart_18\n"+
"StarLoopEntry_20->s21\n"+
"s10-R->s11\n"+
"StarBlockStart_18->s15\n"+
"s21->RuleStop_expr_3\n"+
"s11->BlockEnd_14\n"+
"s15-2 >= _p->s16\n"+
"RuleStop_expr_3->s5\n"+
"RuleStop_expr_3->s10\n"+
"RuleStop_expr_3->BlockEnd_19\n"+
"s16-PLUS->s17\n"+
"s17-expr->RuleStart_expr_2\n"+
"BlockEnd_19->StarLoopBack_22\n"+
"StarLoopBack_22->StarLoopEntry_20\n";
"RuleStart_expr_2->BlockStart_10\n" +
"BlockStart_10->s5\n" +
"BlockStart_10->s9\n" +
"s5-action_1:-1->s6\n" +
"s9-ID->BlockEnd_11\n" +
"s6-L->s7\n" +
"BlockEnd_11->StarLoopEntry_16\n" +
"s7-expr->RuleStart_expr_2\n" +
"StarLoopEntry_16->StarBlockStart_14\n" +
"StarLoopEntry_16->s17\n" +
"s8-R->BlockEnd_11\n" +
"StarBlockStart_14-2 >= _p->s12\n" +
"s17->RuleStop_expr_3\n" +
"s12-PLUS->s13\n" +
"RuleStop_expr_3->s4\n" +
"RuleStop_expr_3->s8\n" +
"RuleStop_expr_3->BlockEnd_15\n" +
"s13-expr->RuleStart_expr_2\n" +
"BlockEnd_15->StarLoopBack_18\n" +
"StarLoopBack_18->StarLoopEntry_16\n";
RuntimeTestUtils.checkRuleATN(g, "expr", atnText);

ATN atn = g.getATN();
Expand All @@ -142,14 +130,14 @@ public void testEpsilonAltSubrule() throws Exception {
// System.out.println(dot);

// Simulate call stack after input '(x' from rule s
ParserRuleContext callStackFrom_s = new ParserRuleContext(null, 4);
ParserRuleContext callStackFrom_expr = new ParserRuleContext(callStackFrom_s, 9);
int afterID = 14;
ParserRuleContext callStackFrom_s = new ParserRuleContext(null, 0);
ParserRuleContext callStackFrom_expr = new ParserRuleContext(callStackFrom_s, 7);
int afterID = 11;
IntervalSet tokens = atn.getExpectedTokens(afterID, callStackFrom_expr);
assertEquals("{R, PLUS}", tokens.toString(g.getTokenNames()));

// Simulate call stack after input '(x' from within rule expr
callStackFrom_expr = new ParserRuleContext(null, 9);
callStackFrom_expr = new ParserRuleContext(null, 7);
tokens = atn.getExpectedTokens(afterID, callStackFrom_expr);
assertEquals("{R, PLUS}", tokens.toString(g.getTokenNames()));
}
Expand Down
4 changes: 3 additions & 1 deletion runtime/Java/src/main/java/org/antlr/v5/runtime/atn/ATN.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ public void addState(ATNState state) {
states.add(state);
}

public void removeState(ATNState state) {
public ATNState removeState(ATNState state) {
ATNState removingState = states.get(state.stateNumber);
states.set(state.stateNumber, null); // just free mem, don't shift states in list
return removingState;
}

public int defineDecisionState(DecisionState s) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ else if (state instanceof StarLoopbackState) {

ATNState endState;
Transition excludeTransition = null;
if (atn.ruleToStartState[i].isLeftRecursiveRule) {
RuleStartState ruleStartState = atn.ruleToStartState[i];
if (ruleStartState.isLeftRecursiveRule) {
// wrap from the beginning of the rule to the StarLoopEntryState
endState = null;
for (ATNState state : atn.states) {
Expand All @@ -302,7 +303,7 @@ else if (state instanceof StarLoopbackState) {
continue;
}

if (maybeLoopEndState.epsilonOnlyTransitions && maybeLoopEndState.transition(0).target instanceof RuleStopState) {
if (maybeLoopEndState.onlyHasEpsilonTransitions() && maybeLoopEndState.transition(0).target instanceof RuleStopState) {
endState = state;
break;
}
Expand Down Expand Up @@ -332,19 +333,29 @@ else if (state instanceof StarLoopbackState) {
}

// all transitions leaving the rule start state need to leave blockStart instead
while (atn.ruleToStartState[i].getNumberOfTransitions() > 0) {
Transition transition = atn.ruleToStartState[i].removeTransition(atn.ruleToStartState[i].getNumberOfTransitions() - 1);
while (ruleStartState.getNumberOfTransitions() > 0) {
Transition transition = ruleStartState.removeTransition(ruleStartState.getNumberOfTransitions() - 1);
bypassStart.addTransition(transition);
}

// link the new states
atn.ruleToStartState[i].addTransition(new EpsilonTransition(bypassStart));
ruleStartState.addTransition(new EpsilonTransition(bypassStart));
bypassStop.addTransition(new EpsilonTransition(endState));

ATNState matchState = new BasicState();
atn.addState(matchState);
matchState.addTransition(new AtomTransition(bypassStop, atn.ruleToTokenType[i]));
bypassStart.addTransition(new EpsilonTransition(matchState));

if (bypassStart.onlyHasEpsilonTransitions()) {
bypassStart.addTransition(new EpsilonTransition(matchState));
} else {
ATNState matchState2 = new BasicState();
atn.addState(matchState2);
matchState2.addTransition(bypassStart.transition(0));

bypassStart.removeTransition(0);
bypassStart.addTransition(new EpsilonTransition(matchState));
bypassStart.addTransition(new EpsilonTransition(matchState2));
}
}

if (deserializationOptions.isVerifyATN()) {
Expand Down Expand Up @@ -398,7 +409,7 @@ protected void markPrecedenceDecisions(ATN atn) {
if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) {
ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target;
if (maybeLoopEndState instanceof LoopEndState) {
if (maybeLoopEndState.epsilonOnlyTransitions && maybeLoopEndState.transition(0).target instanceof RuleStopState) {
if (maybeLoopEndState.onlyHasEpsilonTransitions() && maybeLoopEndState.transition(0).target instanceof RuleStopState) {
((StarLoopEntryState)state).isPrecedenceDecision = true;
}
}
Expand Down
50 changes: 41 additions & 9 deletions runtime/Java/src/main/java/org/antlr/v5/runtime/atn/ATNState.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.function.Predicate;

/**
* The following images show the relation of states and
Expand Down Expand Up @@ -118,7 +119,7 @@ public abstract class ATNState {

public int ruleIndex; // at runtime, we don't have Rule objects

public boolean epsilonOnlyTransitions = false;
private Boolean epsilonOnlyTransitions = null;

/** Track the transitions emanating from this ATN state. */
protected final List<Transition> transitions =
Expand Down Expand Up @@ -159,12 +160,8 @@ public void addTransition(Transition e) {
}

public void addTransition(int index, Transition e) {
if (transitions.isEmpty()) {
epsilonOnlyTransitions = e.isEpsilon();
}
else if (epsilonOnlyTransitions != e.isEpsilon()) {
if (epsilonOnlyTransitions != null && epsilonOnlyTransitions != e.isEpsilon()) {
System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
epsilonOnlyTransitions = false;
}

boolean alreadyPresent = false;
Expand All @@ -184,24 +181,59 @@ else if ( t.isEpsilon() && e.isEpsilon() ) {
}
if ( !alreadyPresent ) {
transitions.add(index, e);
recalculateEpsilonOnlyTransitions();
}
}

public Transition transition(int i) { return transitions.get(i); }

public void setTransition(int i, Transition e) {
transitions.set(i, e);
transitions.remove(i);
recalculateEpsilonOnlyTransitions();
if (epsilonOnlyTransitions != null && epsilonOnlyTransitions != e.isEpsilon()) {
System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
}
transitions.add(i, e);
recalculateEpsilonOnlyTransitions();
}

public Transition removeTransition(int index) {
return transitions.remove(index);
Transition result = transitions.remove(index);
recalculateEpsilonOnlyTransitions();
return result;
}

public boolean removeTransition(Transition transition) {
boolean result = transitions.remove(transition);
recalculateEpsilonOnlyTransitions();
return result;
}

public Transition getTransition(Predicate<Transition> predicate) {
return transitions.stream().filter(predicate).findFirst().orElse(null);
}

public int getTransitionIndex(Transition transition) {
return transitions.indexOf(transition);
}

public abstract int getStateType();

public final boolean onlyHasEpsilonTransitions() {
return epsilonOnlyTransitions;
return epsilonOnlyTransitions != null && epsilonOnlyTransitions;
}

private void recalculateEpsilonOnlyTransitions() {
if (transitions.size() == 0) {
epsilonOnlyTransitions = null;
} else {
epsilonOnlyTransitions = transitions.stream().allMatch(Transition::isEpsilon);
}
}

public void setRuleIndex(int ruleIndex) { this.ruleIndex = ruleIndex; }

public void clearTransitions() {
transitions.clear();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@
* 32-bit int/chars in the ATN serialization, this is no longer necessary.
*/
public abstract class CodePointTransitions {
/** Return new {@link AtomTransition} */
public static Transition createWithCodePoint(ATNState target, int codePoint) {
return createWithCodePointRange(target, codePoint, codePoint);
}

/** Return new {@link AtomTransition} if range represents one atom else {@link SetTransition}. */
public static Transition createWithCodePointRange(ATNState target, int codePointFrom, int codePointTo) {
return codePointFrom == codePointTo
Expand Down
Loading

0 comments on commit 0eb852b

Please sign in to comment.