How to solve such ambiguity ? #1812
-
I don't know if i should call it an ambiguity but lets say i want these both expression the to
it does give proper results but i don't think so it should be done that way (also diagram looks ugly as hell)
|
Beta Was this translation helpful? Give feedback.
Replies: 4 comments 4 replies
-
I believe there's an alternative to this solution that is a bit easier to read, and that doesn't require two const nextParenthesis = tokenMatcher($.LA(1), LParen);
$.OR([
{
ALT: () => $.CONSUME(MultiplicationOperator)
},
{
GATE: () => nextParenthesis,
ALT: EMPTY_ALT()
}
]); |
Beta Was this translation helpful? Give feedback.
-
Thanks, $.MANY(() => {
$.OR([
{
ALT: ()=>{
op = $.CONSUME(MultiplicationOperator)
rhsVal = $.SUBRULE2($.atomicExpression)
if (tokenMatcher(op, Multi)) {
value *= rhsVal
} else { // op instanceof Div
value /= rhsVal
}
}},
{
ALT: ()=>{
rhsVal = $.SUBRULE($.parenthesisExpression)
value *= rhsVal
}
}
])
}) (function calculatorExample() {
// ----------------- lexer -----------------
const createToken = chevrotain.createToken;
const tokenMatcher = chevrotain.tokenMatcher;
const Lexer = chevrotain.Lexer;
const EmbeddedActionsParser = chevrotain.EmbeddedActionsParser;
// using the NA pattern marks this Token class as 'irrelevant' for the Lexer.
// AdditionOperator defines a Tokens hierarchy but only leafs in this hierarchy
// define actual Tokens that can appear in the text
const AdditionOperator = createToken({name: "AdditionOperator", pattern: Lexer.NA});
const Plus = createToken({name: "Plus", pattern: /\+/, categories: AdditionOperator});
const Minus = createToken({name: "Minus", pattern: /-/, categories: AdditionOperator});
const MultiplicationOperator = createToken({name: "MultiplicationOperator", pattern: Lexer.NA});
const Multi = createToken({name: "Multi", pattern: /\*/, categories: MultiplicationOperator});
const Div = createToken({name: "Div", pattern: /\//, categories: MultiplicationOperator});
const LParen = createToken({name: "LParen", pattern: /\(/});
const RParen = createToken({name: "RParen", pattern: /\)/});
const NumberLiteral = createToken({name: "NumberLiteral", pattern: /[1-9]\d*/});
const PowerFunc = createToken({name: "PowerFunc", pattern: /power/});
const Comma = createToken({name: "Comma", pattern: /,/});
const WhiteSpace = createToken({
name: "WhiteSpace",
pattern: /\s+/,
group: Lexer.SKIPPED
});
// whitespace is normally very common so it is placed first to speed up the lexer
const allTokens = [WhiteSpace,
Plus, Minus, Multi, Div, LParen, RParen,
NumberLiteral, AdditionOperator, MultiplicationOperator,
PowerFunc, Comma];
const CalculatorLexer = new Lexer(allTokens);
class Calculator extends EmbeddedActionsParser {
constructor() {
super(allTokens);
const $ = this;
$.RULE("expression", () => {
// uncomment the debugger statement and open dev tools in chrome/firefox
// to debug the parsing flow.
// debugger;
return $.SUBRULE($.additionExpression)
});
// Lowest precedence thus it is first in the rule chain
// The precedence of binary expressions is determined by
// how far down the Parse Tree the binary expression appears.
$.RULE("additionExpression", () => {
let value, op, rhsVal;
// parsing part
value = $.SUBRULE($.multiplicationExpression);
$.MANY(() => {
// consuming 'AdditionOperator' will consume
// either Plus or Minus as they are subclasses of AdditionOperator
op = $.CONSUME(AdditionOperator);
// the index "2" in SUBRULE2 is needed to identify the unique
// position in the grammar during runtime
rhsVal = $.SUBRULE2($.multiplicationExpression);
// interpreter part
// tokenMatcher acts as ECMAScript instanceof operator
if (tokenMatcher(op, Plus)) {
value += rhsVal
} else { // op "instanceof" Minus
value -= rhsVal
}
});
return value
});
$.RULE("multiplicationExpression", () => {
let value, op, rhsVal;
// parsing part
value = $.SUBRULE($.atomicExpression);
$.MANY(() => {
$.OR([
{
ALT: ()=>{
op = $.CONSUME(MultiplicationOperator)
rhsVal = $.SUBRULE2($.atomicExpression)
if (tokenMatcher(op, Multi)) {
value *= rhsVal
} else { // op instanceof Div
value /= rhsVal
}
}},
{
ALT: ()=>{
rhsVal = $.SUBRULE($.parenthesisExpression)
value *= rhsVal
}
}
])
})
return value
});
$.RULE("atomicExpression", () => $.OR([
// parenthesisExpression has the highest precedence and thus it
// appears in the "lowest" leaf in the expression ParseTree.
{ALT: () => $.SUBRULE($.parenthesisExpression)},
{ALT: () => parseInt($.CONSUME(NumberLiteral).image, 10)},
{ALT: () => $.SUBRULE($.powerFunction)}
]));
$.RULE("parenthesisExpression", () => {
let expValue;
$.CONSUME(LParen);
expValue = $.SUBRULE($.expression);
$.CONSUME(RParen);
return expValue
});
$.RULE("powerFunction", () => {
let base, exponent;
$.CONSUME(PowerFunc);
$.CONSUME(LParen);
base = $.SUBRULE($.expression);
$.CONSUME(Comma);
exponent = $.SUBRULE2($.expression);
$.CONSUME(RParen);
return Math.pow(base, exponent)
});
// very important to call this after all the rules have been defined.
// otherwise the parser may not work correctly as it will lack information
// derived during the self analysis phase.
this.performSelfAnalysis();
}
}
// for the playground to work the returned object must contain these fields
return {
lexer: CalculatorLexer,
parser: Calculator,
defaultRule: "expression"
};
}()) |
Beta Was this translation helpful? Give feedback.
-
Hello @4silvertooth Not directly related, but you may be interested in knowing an alternative approach to building
The main problem with modeling the precedence in the grammar is that it creates very deep parse trees, hence On the other hand removing the precedence would also mandate having a separate "evaluation" step for the value. |
Beta Was this translation helpful? Give feedback.
-
Hi, https://github.com/4silvertooth/QwikTape The parser is here Not playground friendly as it depends on QuickJs for BigNum. |
Beta Was this translation helpful? Give feedback.
I believe there's an alternative to this solution that is a bit easier to read, and that doesn't require two
CONSUME(MultiplicationOperator)
calls. Putting your requirement into other words; the parser has to read a*
token if the token afterwards isn't a(
, but if it is(
, we know that we don't have to parse another*
: