More parsing fixes and tons of error message improvements

ortus-boxlang · Apr 21, 2024 · 50e012e · 50e012e
1 parent 57f62ba
commit 50e012e
Show file tree

Hide file tree

Showing 14 changed files with 216 additions and 82 deletions.
diff --git a/src/main/antlr/BoxScriptGrammar.g4 b/src/main/antlr/BoxScriptGrammar.g4
@@ -49,7 +49,7 @@ function:
 	eos*;
 
 // Declared arguments for a function
-functionParamList: functionParam (COMMA functionParam)*;
+functionParamList: functionParam (COMMA functionParam)* COMMA?;
 
 // required String param1="default" inject="something"
 functionParam: (REQUIRED)? (type)? identifier (
@@ -94,10 +94,12 @@ functionOrStatement: function | statement;
 
 // import java:foo.bar.Baz as myAlias;
 importStatement:
-	IMPORT (prefix = identifier COLON)? fqn (DOT STAR)? (
+	IMPORT (prefix = identifier COLON)? importFQN (
 		AS alias = identifier
 	)? eos?;
 
+importFQN: fqn (DOT STAR)?;
+
 // property name="foo" type="string" default="bar" inject="something";
 property:
 	javadoc? (preannotation)* PROPERTY postannotation* eos;
@@ -208,7 +210,7 @@ assignmentRight: expression;
 argumentList:
 	(namedArgument | positionalArgument) (
 		COMMA (namedArgument | positionalArgument)
-	)*;
+	)* COMMA?;
 
 /*
  func( foo = bar, baz = qux )

diff --git a/src/main/antlr/CFScriptGrammar.g4 b/src/main/antlr/CFScriptGrammar.g4
@@ -117,31 +117,30 @@ classOrInterface: boxClass | interface;
 script: importStatement* functionOrStatement* | EOF;
 
 // import java:foo.bar.Baz as myAlias;
-importStatement: IMPORT fqn eos?;
+importStatement: IMPORT importFQN eos?;
+
+importFQN: fqn (DOT STAR)?;
 
 // include "myFile.bxm";
 include: INCLUDE expression;
 
 // class {}
 boxClass:
-	importStatement* javadoc? (preannotation)* ABSTRACT? boxClassName postannotation* LBRACE
-		property* functionOrStatement* RBRACE;
+	importStatement* javadoc? ABSTRACT? boxClassName postannotation* LBRACE property*
+		functionOrStatement* RBRACE;
 
 boxClassName: CLASS_NAME;
 
 interface:
-	importStatement* javadoc? (preannotation)* INTERFACE postannotation* LBRACE interfaceFunction*
-		RBRACE;
+	importStatement* javadoc? INTERFACE postannotation* LBRACE interfaceFunction* RBRACE;
 
 // TODO: default method implementations
-interfaceFunction: (preannotation)* functionSignature (
-		postannotation
-	)* eos;
+interfaceFunction: functionSignature ( postannotation)* eos;
 
 // public String myFunction( String foo, String bar )
 functionSignature:
-	javadoc? (preannotation)* accessModifier? STATIC? returnType? FUNCTION identifier LPAREN
-		functionParamList? RPAREN;
+	javadoc? accessModifier? STATIC? returnType? FUNCTION identifier LPAREN functionParamList?
+		RPAREN;
 
 // UDF
 function:
@@ -150,17 +149,13 @@ function:
 	eos*;
 
 // Declared arguments for a function
-functionParamList: functionParam (COMMA functionParam)*;
+functionParamList: functionParam (COMMA functionParam)* COMMA?;
 
 // required String param1="default" inject="something"
 functionParam: (REQUIRED)? (type)? identifier (
 		EQUALSIGN expression
 	)? postannotation*;
 
-// @MyAnnotation "value". This is BL specific, so it's disabled in the CF grammar, but defined here
-// in the base grammar for better rule reuse.
-preannotation: AT fqn (literalExpression)*;
-
 // foo=bar baz="bum"
 postannotation:
 	key = identifier (
@@ -191,11 +186,10 @@ type:
 	| ANY;
 
 // Allow any statement or a function.  TODO: This may need to be changed if functions are allowed inside of functions
-functionOrStatement: function | statement;
+functionOrStatement: function | importStatement | statement;
 
 // property name="foo" type="string" default="bar" inject="something";
-property:
-	javadoc? (preannotation)* PROPERTY postannotation* eos;
+property: javadoc? PROPERTY postannotation* eos;
 
 // /** Comment */
 javadoc: JAVADOC_COMMENT;
@@ -221,7 +215,8 @@ statementBlock: LBRACE (statement)* RBRACE eos?;
 statement:
 	// This will "eat" random extra ; at the start of statements
 	eos* (
-		do
+		function
+		| do
 		| for
 		| if
 		| switch
@@ -285,7 +280,7 @@ assignmentRight: expression;
 argumentList:
 	(namedArgument | positionalArgument) (
 		COMMA (namedArgument | positionalArgument)
-	)*;
+	)* COMMA?;
 
 /*
  func( foo = bar, baz = qux )

diff --git a/src/main/antlr/CFTemplateGrammar.g4 b/src/main/antlr/CFTemplateGrammar.g4
@@ -57,7 +57,6 @@ attributeName:
 	| INTERFACE
 	| FUNCTION
 	| ARGUMENT
-	| SCRIPT
 	| RETURN
 	| IF
 	| ELSE
@@ -133,7 +132,7 @@ component:
 	// <cfproperty name="..."> (zero or more)
 	(whitespace? property)*
 	// code in pseudo-constructor
-	statements
+	topLevelStatements
 	// </cfcomponent>
 	COMPONENT_OPEN SLASH_PREFIX COMPONENT COMPONENT_CLOSE;
 

diff --git a/src/main/antlr/CFTemplateLexer.g4 b/src/main/antlr/CFTemplateLexer.g4
@@ -84,8 +84,6 @@ INTERFACE: 'interface';
 FUNCTION: 'function';
 ARGUMENT: 'argument';
 
-SCRIPT: 'script' -> pushMode(XFSCRIPT);
-
 // return may or may not have an expression, so eat any leading whitespace now so it doesn't give us an expression part that's just a space
 RETURN:
 	'return' [ \t\r\n]* -> pushMode(EXPRESSION_MODE_COMPONENT);
@@ -128,6 +126,7 @@ fragment DIGIT: [0-9];
 fragment COMPONENT_NameChar:
 	COMPONENT_NameStartChar
 	| '_'
+	| '-'
 	| DIGIT
 	| ':';
 

diff --git a/src/main/antlr/DocGrammar.g4 b/src/main/antlr/DocGrammar.g4
@@ -34,8 +34,6 @@ descriptionLineNoSpaceNoAt:
 	| NAME
 	| STAR
 	| SLASH
-	| BRACE_OPEN
-	| BRACE_CLOSE
 	| JAVADOC_START;
 
 descriptionNewline: NEWLINE;
@@ -46,7 +44,7 @@ blockTag: SPACE? AT blockTagName SPACE? blockTagContent*;
 
 blockTagName: NAME;
 
-blockTagContent: blockTagText | inlineTag | NEWLINE;
+blockTagContent: blockTagText | NEWLINE;
 
 blockTagText: blockTagTextElement+;
 
@@ -56,19 +54,4 @@ blockTagTextElement:
 	| SPACE
 	| STAR
 	| SLASH
-	| BRACE_OPEN
-	| BRACE_CLOSE
-	| JAVADOC_START;
-
-inlineTag:
-	INLINE_TAG_START inlineTagName SPACE* inlineTagContent? BRACE_CLOSE;
-
-inlineTagName: NAME;
-
-inlineTagContent: braceContent+;
-
-braceExpression: BRACE_OPEN braceContent* BRACE_CLOSE;
-
-braceContent: braceExpression | braceText (NEWLINE* braceText)*;
-
-braceText: TEXT_CONTENT | NAME | SPACE | STAR | SLASH | NEWLINE;
+	| JAVADOC_START;
diff --git a/src/main/antlr/DocLexer.g4 b/src/main/antlr/DocLexer.g4
@@ -9,7 +9,7 @@ NEWLINE:
 
 SPACE: (' ' | '\t')+;
 
-TEXT_CONTENT: ~[\n\r\t @*{}/a-zA-Z]+;
+TEXT_CONTENT: ~[\n\r\t */a-zA-Z]+;
 
 AT: '@';
 
@@ -19,10 +19,4 @@ SLASH: '/';
 
 JAVADOC_START: '/**' STAR*;
 
-JAVADOC_END: SPACE? STAR* '*/';
-
-INLINE_TAG_START: '{@';
-
-BRACE_OPEN: '{';
-
-BRACE_CLOSE: '}';
+JAVADOC_END: SPACE? STAR* '*/';
diff --git a/src/main/java/ortus/boxlang/compiler/parser/BoxScriptLexerCustom.java b/src/main/java/ortus/boxlang/compiler/parser/BoxScriptLexerCustom.java
@@ -91,4 +91,29 @@ public Token findPreviousToken( int type ) {
 		}
 		return null;
 	}
+
+	/**
+	 * Back up to the closest unclosed brace
+	 * Return null if none found
+	 * *
+	 * 
+	 * @return the unmatched opening brace
+	 */
+	public Token findUnclosedToken( int start, int end ) {
+		int count = 0;
+		reset();
+		var tokens = getAllTokens();
+		for ( int i = tokens.size() - 1; i >= 0; i-- ) {
+			Token t = tokens.get( i );
+			if ( t.getType() == start ) {
+				count--;
+			} else if ( t.getType() == end ) {
+				count++;
+			}
+			if ( count < 0 ) {
+				return t;
+			}
+		}
+		return null;
+	}
 }
diff --git a/src/main/java/ortus/boxlang/compiler/parser/BoxScriptParser.java b/src/main/java/ortus/boxlang/compiler/parser/BoxScriptParser.java
@@ -113,6 +113,7 @@
 import ortus.boxlang.parser.antlr.BoxScriptGrammar.ParamContext;
 import ortus.boxlang.parser.antlr.BoxScriptGrammar.PreannotationContext;
 import ortus.boxlang.parser.antlr.BoxScriptLexer;
+import ortus.boxlang.parser.antlr.CFScriptLexer;
 import ortus.boxlang.runtime.BoxRuntime;
 import ortus.boxlang.runtime.components.ComponentDescriptor;
 import ortus.boxlang.runtime.services.ComponentService;
@@ -227,10 +228,10 @@ public ParsingResult parse( String code, Boolean classOrInterface ) throws IOExc
 	 * @see BoxExpression
 	 */
 	public ParsingResult parseExpression( String code ) throws IOException {
-		InputStream			inputStream	= IOUtils.toInputStream( code, StandardCharsets.UTF_8 );
+		InputStream				inputStream	= IOUtils.toInputStream( code, StandardCharsets.UTF_8 );
 
-		BoxScriptLexer		lexer		= new BoxScriptLexer( CharStreams.fromStream( inputStream ) );
-		BoxScriptGrammar	parser		= new BoxScriptGrammar( new CommonTokenStream( lexer ) );
+		BoxScriptLexerCustom	lexer		= new BoxScriptLexerCustom( CharStreams.fromStream( inputStream ) );
+		BoxScriptGrammar		parser		= new BoxScriptGrammar( new CommonTokenStream( lexer ) );
 		addErrorListeners( lexer, parser );
 		// var t = lexer.nextToken();
 		// while ( t.getType() != Token.EOF ) {
@@ -243,6 +244,14 @@ public ParsingResult parseExpression( String code ) throws IOException {
 			BoxExpression ast = toAst( null, parseTree );
 			return new ParsingResult( ast, issues );
 		}
+		Token unclosedParen = lexer.findUnclosedToken( CFScriptLexer.LPAREN, CFScriptLexer.RPAREN );
+		if ( unclosedParen != null ) {
+			issues.clear();
+			issues
+			    .add( new Issue( "Unclosed parenthesis [(] on line " + ( unclosedParen.getLine() + this.startLine ),
+			        createOffsetPosition( unclosedParen.getLine(),
+			            unclosedParen.getCharPositionInLine(), unclosedParen.getLine(), unclosedParen.getCharPositionInLine() + 1 ) ) );
+		}
 		return new ParsingResult( null, issues );
 	}
 
@@ -259,10 +268,10 @@ public ParsingResult parseExpression( String code ) throws IOException {
 	 * @see BoxStatement
 	 */
 	public ParsingResult parseStatement( String code ) throws IOException {
-		InputStream			inputStream	= IOUtils.toInputStream( code, StandardCharsets.UTF_8 );
+		InputStream				inputStream	= IOUtils.toInputStream( code, StandardCharsets.UTF_8 );
 
-		BoxScriptLexer		lexer		= new BoxScriptLexer( CharStreams.fromStream( inputStream ) );
-		BoxScriptGrammar	parser		= new BoxScriptGrammar( new CommonTokenStream( lexer ) );
+		BoxScriptLexerCustom	lexer		= new BoxScriptLexerCustom( CharStreams.fromStream( inputStream ) );
+		BoxScriptGrammar		parser		= new BoxScriptGrammar( new CommonTokenStream( lexer ) );
 		addErrorListeners( lexer, parser );
 		BoxScriptGrammar.FunctionOrStatementContext	parseTree	= parser.functionOrStatement();
 
@@ -344,6 +353,15 @@ protected BoxNode parserFirstStage( InputStream stream, Boolean classOrInterface
 
 		// Don't attempt to build AST if there are parsing issues
 		if ( !issues.isEmpty() ) {
+			Token unclosedBrace = lexer.findUnclosedToken( BoxScriptLexer.LBRACE, BoxScriptLexer.RBRACE );
+			if ( unclosedBrace != null ) {
+				issues.clear();
+				issues.add(
+				    new Issue( "Unclosed curly brace [{] on line " + ( unclosedBrace.getLine() + this.startLine ),
+				        createOffsetPosition( unclosedBrace.getLine(),
+				            unclosedBrace.getCharPositionInLine(), unclosedBrace.getLine(), unclosedBrace.getCharPositionInLine() + 1 ) ) );
+				return null;
+			}
 			return null;
 		}
 
@@ -428,12 +446,12 @@ private BoxNode toAst( File file, BoxClassContext component ) {
 	private BoxImport toAst( File file, BoxScriptGrammar.ImportStatementContext rule ) {
 		BoxExpression	expr	= null;
 		BoxIdentifier	alias	= null;
-		if ( rule.fqn() != null ) {
+		if ( rule.importFQN() != null ) {
 			String prefix = "";
 			if ( rule.prefix != null ) {
 				prefix = rule.prefix.getText() + ":";
 			}
-			expr = new BoxFQN( prefix + rule.fqn().getText(), getPosition( rule.fqn() ), getSourceText( rule.fqn() ) );
+			expr = new BoxFQN( prefix + rule.importFQN().getText(), getPosition( rule.importFQN() ), getSourceText( rule.importFQN() ) );
 		}
 		if ( rule.alias != null ) {
 			BoxExpression tmp = toAst( file, rule.alias );

diff --git a/src/main/java/ortus/boxlang/compiler/parser/CFScriptLexerCustom.java b/src/main/java/ortus/boxlang/compiler/parser/CFScriptLexerCustom.java
@@ -129,4 +129,29 @@ public Token nextToken() {
 		return nextToken;
 	}
 
+	/**
+	 * Back up to the closest unclosed brace
+	 * Return null if none found
+	 * *
+	 * 
+	 * @return the unmatched opening brace
+	 */
+	public Token findUnclosedToken( int start, int end ) {
+		int count = 0;
+		reset();
+		var tokens = getAllTokens();
+		for ( int i = tokens.size() - 1; i >= 0; i-- ) {
+			Token t = tokens.get( i );
+			if ( t.getType() == start ) {
+				count--;
+			} else if ( t.getType() == end ) {
+				count++;
+			}
+			if ( count < 0 ) {
+				return t;
+			}
+		}
+		return null;
+	}
+
 }