Skip to content

Commit

Permalink
More parsing fixes and tons of error message improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
bdw429s committed Apr 21, 2024
1 parent 57f62ba commit 50e012e
Show file tree
Hide file tree
Showing 14 changed files with 216 additions and 82 deletions.
8 changes: 5 additions & 3 deletions src/main/antlr/BoxScriptGrammar.g4
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function:
eos*;

// Declared arguments for a function
functionParamList: functionParam (COMMA functionParam)*;
functionParamList: functionParam (COMMA functionParam)* COMMA?;

// required String param1="default" inject="something"
functionParam: (REQUIRED)? (type)? identifier (
Expand Down Expand Up @@ -94,10 +94,12 @@ functionOrStatement: function | statement;

// import java:foo.bar.Baz as myAlias;
importStatement:
IMPORT (prefix = identifier COLON)? fqn (DOT STAR)? (
IMPORT (prefix = identifier COLON)? importFQN (
AS alias = identifier
)? eos?;

importFQN: fqn (DOT STAR)?;

// property name="foo" type="string" default="bar" inject="something";
property:
javadoc? (preannotation)* PROPERTY postannotation* eos;
Expand Down Expand Up @@ -208,7 +210,7 @@ assignmentRight: expression;
argumentList:
(namedArgument | positionalArgument) (
COMMA (namedArgument | positionalArgument)
)*;
)* COMMA?;

/*
func( foo = bar, baz = qux )
Expand Down
35 changes: 15 additions & 20 deletions src/main/antlr/CFScriptGrammar.g4
Original file line number Diff line number Diff line change
Expand Up @@ -117,31 +117,30 @@ classOrInterface: boxClass | interface;
script: importStatement* functionOrStatement* | EOF;

// import java:foo.bar.Baz as myAlias;
importStatement: IMPORT fqn eos?;
importStatement: IMPORT importFQN eos?;

importFQN: fqn (DOT STAR)?;

// include "myFile.bxm";
include: INCLUDE expression;

// class {}
boxClass:
importStatement* javadoc? (preannotation)* ABSTRACT? boxClassName postannotation* LBRACE
property* functionOrStatement* RBRACE;
importStatement* javadoc? ABSTRACT? boxClassName postannotation* LBRACE property*
functionOrStatement* RBRACE;

boxClassName: CLASS_NAME;

interface:
importStatement* javadoc? (preannotation)* INTERFACE postannotation* LBRACE interfaceFunction*
RBRACE;
importStatement* javadoc? INTERFACE postannotation* LBRACE interfaceFunction* RBRACE;

// TODO: default method implementations
interfaceFunction: (preannotation)* functionSignature (
postannotation
)* eos;
interfaceFunction: functionSignature ( postannotation)* eos;

// public String myFunction( String foo, String bar )
functionSignature:
javadoc? (preannotation)* accessModifier? STATIC? returnType? FUNCTION identifier LPAREN
functionParamList? RPAREN;
javadoc? accessModifier? STATIC? returnType? FUNCTION identifier LPAREN functionParamList?
RPAREN;

// UDF
function:
Expand All @@ -150,17 +149,13 @@ function:
eos*;

// Declared arguments for a function
functionParamList: functionParam (COMMA functionParam)*;
functionParamList: functionParam (COMMA functionParam)* COMMA?;

// required String param1="default" inject="something"
functionParam: (REQUIRED)? (type)? identifier (
EQUALSIGN expression
)? postannotation*;

// @MyAnnotation "value". This is BL specific, so it's disabled in the CF grammar, but defined here
// in the base grammar for better rule reuse.
preannotation: AT fqn (literalExpression)*;

// foo=bar baz="bum"
postannotation:
key = identifier (
Expand Down Expand Up @@ -191,11 +186,10 @@ type:
| ANY;

// Allow any statement or a function. TODO: This may need to be changed if functions are allowed inside of functions
functionOrStatement: function | statement;
functionOrStatement: function | importStatement | statement;

// property name="foo" type="string" default="bar" inject="something";
property:
javadoc? (preannotation)* PROPERTY postannotation* eos;
property: javadoc? PROPERTY postannotation* eos;

// /** Comment */
javadoc: JAVADOC_COMMENT;
Expand All @@ -221,7 +215,8 @@ statementBlock: LBRACE (statement)* RBRACE eos?;
statement:
// This will "eat" random extra ; at the start of statements
eos* (
do
function
| do
| for
| if
| switch
Expand Down Expand Up @@ -285,7 +280,7 @@ assignmentRight: expression;
argumentList:
(namedArgument | positionalArgument) (
COMMA (namedArgument | positionalArgument)
)*;
)* COMMA?;

/*
func( foo = bar, baz = qux )
Expand Down
3 changes: 1 addition & 2 deletions src/main/antlr/CFTemplateGrammar.g4
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ attributeName:
| INTERFACE
| FUNCTION
| ARGUMENT
| SCRIPT
| RETURN
| IF
| ELSE
Expand Down Expand Up @@ -133,7 +132,7 @@ component:
// <cfproperty name="..."> (zero or more)
(whitespace? property)*
// code in pseudo-constructor
statements
topLevelStatements
// </cfcomponent>
COMPONENT_OPEN SLASH_PREFIX COMPONENT COMPONENT_CLOSE;

Expand Down
3 changes: 1 addition & 2 deletions src/main/antlr/CFTemplateLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ INTERFACE: 'interface';
FUNCTION: 'function';
ARGUMENT: 'argument';

SCRIPT: 'script' -> pushMode(XFSCRIPT);

// return may or may not have an expression, so eat any leading whitespace now so it doesn't give us an expression part that's just a space
RETURN:
'return' [ \t\r\n]* -> pushMode(EXPRESSION_MODE_COMPONENT);
Expand Down Expand Up @@ -128,6 +126,7 @@ fragment DIGIT: [0-9];
fragment COMPONENT_NameChar:
COMPONENT_NameStartChar
| '_'
| '-'
| DIGIT
| ':';

Expand Down
21 changes: 2 additions & 19 deletions src/main/antlr/DocGrammar.g4
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ descriptionLineNoSpaceNoAt:
| NAME
| STAR
| SLASH
| BRACE_OPEN
| BRACE_CLOSE
| JAVADOC_START;

descriptionNewline: NEWLINE;
Expand All @@ -46,7 +44,7 @@ blockTag: SPACE? AT blockTagName SPACE? blockTagContent*;

blockTagName: NAME;

blockTagContent: blockTagText | inlineTag | NEWLINE;
blockTagContent: blockTagText | NEWLINE;

blockTagText: blockTagTextElement+;

Expand All @@ -56,19 +54,4 @@ blockTagTextElement:
| SPACE
| STAR
| SLASH
| BRACE_OPEN
| BRACE_CLOSE
| JAVADOC_START;

inlineTag:
INLINE_TAG_START inlineTagName SPACE* inlineTagContent? BRACE_CLOSE;

inlineTagName: NAME;

inlineTagContent: braceContent+;

braceExpression: BRACE_OPEN braceContent* BRACE_CLOSE;

braceContent: braceExpression | braceText (NEWLINE* braceText)*;

braceText: TEXT_CONTENT | NAME | SPACE | STAR | SLASH | NEWLINE;
| JAVADOC_START;
10 changes: 2 additions & 8 deletions src/main/antlr/DocLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ NEWLINE:

SPACE: (' ' | '\t')+;

TEXT_CONTENT: ~[\n\r\t @*{}/a-zA-Z]+;
TEXT_CONTENT: ~[\n\r\t */a-zA-Z]+;

AT: '@';

Expand All @@ -19,10 +19,4 @@ SLASH: '/';

JAVADOC_START: '/**' STAR*;

JAVADOC_END: SPACE? STAR* '*/';

INLINE_TAG_START: '{@';

BRACE_OPEN: '{';

BRACE_CLOSE: '}';
JAVADOC_END: SPACE? STAR* '*/';
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,29 @@ public Token findPreviousToken( int type ) {
}
return null;
}

/**
* Back up to the closest unclosed brace
* Return null if none found
* *
*
* @return the unmatched opening brace
*/
public Token findUnclosedToken( int start, int end ) {
int count = 0;
reset();
var tokens = getAllTokens();
for ( int i = tokens.size() - 1; i >= 0; i-- ) {
Token t = tokens.get( i );
if ( t.getType() == start ) {
count--;
} else if ( t.getType() == end ) {
count++;
}
if ( count < 0 ) {
return t;
}
}
return null;
}
}
34 changes: 26 additions & 8 deletions src/main/java/ortus/boxlang/compiler/parser/BoxScriptParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
import ortus.boxlang.parser.antlr.BoxScriptGrammar.ParamContext;
import ortus.boxlang.parser.antlr.BoxScriptGrammar.PreannotationContext;
import ortus.boxlang.parser.antlr.BoxScriptLexer;
import ortus.boxlang.parser.antlr.CFScriptLexer;
import ortus.boxlang.runtime.BoxRuntime;
import ortus.boxlang.runtime.components.ComponentDescriptor;
import ortus.boxlang.runtime.services.ComponentService;
Expand Down Expand Up @@ -227,10 +228,10 @@ public ParsingResult parse( String code, Boolean classOrInterface ) throws IOExc
* @see BoxExpression
*/
public ParsingResult parseExpression( String code ) throws IOException {
InputStream inputStream = IOUtils.toInputStream( code, StandardCharsets.UTF_8 );
InputStream inputStream = IOUtils.toInputStream( code, StandardCharsets.UTF_8 );

BoxScriptLexer lexer = new BoxScriptLexer( CharStreams.fromStream( inputStream ) );
BoxScriptGrammar parser = new BoxScriptGrammar( new CommonTokenStream( lexer ) );
BoxScriptLexerCustom lexer = new BoxScriptLexerCustom( CharStreams.fromStream( inputStream ) );
BoxScriptGrammar parser = new BoxScriptGrammar( new CommonTokenStream( lexer ) );
addErrorListeners( lexer, parser );
// var t = lexer.nextToken();
// while ( t.getType() != Token.EOF ) {
Expand All @@ -243,6 +244,14 @@ public ParsingResult parseExpression( String code ) throws IOException {
BoxExpression ast = toAst( null, parseTree );
return new ParsingResult( ast, issues );
}
Token unclosedParen = lexer.findUnclosedToken( CFScriptLexer.LPAREN, CFScriptLexer.RPAREN );
if ( unclosedParen != null ) {
issues.clear();
issues
.add( new Issue( "Unclosed parenthesis [(] on line " + ( unclosedParen.getLine() + this.startLine ),
createOffsetPosition( unclosedParen.getLine(),
unclosedParen.getCharPositionInLine(), unclosedParen.getLine(), unclosedParen.getCharPositionInLine() + 1 ) ) );
}
return new ParsingResult( null, issues );
}

Expand All @@ -259,10 +268,10 @@ public ParsingResult parseExpression( String code ) throws IOException {
* @see BoxStatement
*/
public ParsingResult parseStatement( String code ) throws IOException {
InputStream inputStream = IOUtils.toInputStream( code, StandardCharsets.UTF_8 );
InputStream inputStream = IOUtils.toInputStream( code, StandardCharsets.UTF_8 );

BoxScriptLexer lexer = new BoxScriptLexer( CharStreams.fromStream( inputStream ) );
BoxScriptGrammar parser = new BoxScriptGrammar( new CommonTokenStream( lexer ) );
BoxScriptLexerCustom lexer = new BoxScriptLexerCustom( CharStreams.fromStream( inputStream ) );
BoxScriptGrammar parser = new BoxScriptGrammar( new CommonTokenStream( lexer ) );
addErrorListeners( lexer, parser );
BoxScriptGrammar.FunctionOrStatementContext parseTree = parser.functionOrStatement();

Expand Down Expand Up @@ -344,6 +353,15 @@ protected BoxNode parserFirstStage( InputStream stream, Boolean classOrInterface

// Don't attempt to build AST if there are parsing issues
if ( !issues.isEmpty() ) {
Token unclosedBrace = lexer.findUnclosedToken( BoxScriptLexer.LBRACE, BoxScriptLexer.RBRACE );
if ( unclosedBrace != null ) {
issues.clear();
issues.add(
new Issue( "Unclosed curly brace [{] on line " + ( unclosedBrace.getLine() + this.startLine ),
createOffsetPosition( unclosedBrace.getLine(),
unclosedBrace.getCharPositionInLine(), unclosedBrace.getLine(), unclosedBrace.getCharPositionInLine() + 1 ) ) );
return null;
}
return null;
}

Expand Down Expand Up @@ -428,12 +446,12 @@ private BoxNode toAst( File file, BoxClassContext component ) {
private BoxImport toAst( File file, BoxScriptGrammar.ImportStatementContext rule ) {
BoxExpression expr = null;
BoxIdentifier alias = null;
if ( rule.fqn() != null ) {
if ( rule.importFQN() != null ) {
String prefix = "";
if ( rule.prefix != null ) {
prefix = rule.prefix.getText() + ":";
}
expr = new BoxFQN( prefix + rule.fqn().getText(), getPosition( rule.fqn() ), getSourceText( rule.fqn() ) );
expr = new BoxFQN( prefix + rule.importFQN().getText(), getPosition( rule.importFQN() ), getSourceText( rule.importFQN() ) );
}
if ( rule.alias != null ) {
BoxExpression tmp = toAst( file, rule.alias );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,29 @@ public Token nextToken() {
return nextToken;
}

/**
* Back up to the closest unclosed brace
* Return null if none found
* *
*
* @return the unmatched opening brace
*/
public Token findUnclosedToken( int start, int end ) {
int count = 0;
reset();
var tokens = getAllTokens();
for ( int i = tokens.size() - 1; i >= 0; i-- ) {
Token t = tokens.get( i );
if ( t.getType() == start ) {
count--;
} else if ( t.getType() == end ) {
count++;
}
if ( count < 0 ) {
return t;
}
}
return null;
}

}
Loading

0 comments on commit 50e012e

Please sign in to comment.