Skip to content

Commit

Permalink
Finalize separation of parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
bdw429s committed Mar 27, 2024
1 parent 8037850 commit 0cd0123
Show file tree
Hide file tree
Showing 78 changed files with 5,384 additions and 744 deletions.
2 changes: 1 addition & 1 deletion modules/test/bifs/Hello.cfc → modules/test/bifs/Hello.bx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
*/
@BoxBIF 'moduleHelloWorld'
@BoxMember { "string" : { name : "foo" }, "array" : {} }
component{
class{

/**
* The execution of this BIF with amazing BoxLang arguments
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* No aliases
*/
@BoxMember "string"
component{
class{

/**
* The execution of this BIF with amazing BoxLang arguments
Expand Down
101 changes: 49 additions & 52 deletions src/main/antlr/BaseScriptGrammar.g4
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,18 @@ options {
public ortus.boxlang.runtime.services.ComponentService componentService = ortus.boxlang.runtime.BoxRuntime.getInstance().getComponentService();
}

// This is the top level rule, which allow imports always, followed by a component, or an interface, or just a bunch of statements.
script:
importStatement* (
boxClass
| interface
| functionOrStatement*
)
| EOF;

// marks the end of simple statements (no body)
eos: SEMICOLON;

// TODO: This belongs only in the BL grammar. import java:foo.bar.Baz as myAlias;
importStatement:
IMPORT (prefix = identifier COLON)? fqn (DOT STAR)? (
AS alias = identifier
)? eos?;
// This is the top level rule, which allow a component, or an interface, or just a bunch of statements.
script: ( boxClass | interface | functionOrStatement*) | EOF;

// include "myFile.cfm";
// include "myFile.bxm";
include: INCLUDE expression;

// class {}
boxClass:
javadoc? (preannotation)* ABSTRACT? COMPONENT postannotation* LBRACE property*
javadoc? (preannotation)* ABSTRACT? CLASS_NAME postannotation* LBRACE property*
functionOrStatement* RBRACE;

interface:
Expand Down Expand Up @@ -60,7 +48,8 @@ functionParam: (REQUIRED)? (type)? identifier (
EQUALSIGN expression
)? postannotation*;

// @MyAnnotation "value" true
// @MyAnnotation "value". This is BL specific, so it's disabled in the CF grammar, but defined here
// in the base grammar for better rule reuse.
preannotation: AT fqn (literalExpression)*;

// foo=bar baz="bum"
Expand All @@ -84,7 +73,7 @@ type:
NUMERIC
| STRING
| BOOLEAN
| COMPONENT
| CLASS_NAME
| INTERFACE
| ARRAY
| STRUCT
Expand Down Expand Up @@ -164,29 +153,13 @@ component:
// http url="google.com" {}
(componentName componentAttributes statementBlock)
// http url="google.com";
| (componentName componentAttributes eos)
// cfhttp( url="google.com" ){} -- Only needed for CF parser
| (
prefixedIdentifier LPAREN delimitedComponentAttributes? RPAREN statementBlock
)
// cfhttp( url="google.com" ) -- Only needed for CF parser
| (
prefixedIdentifier LPAREN delimitedComponentAttributes? RPAREN
);

// cfSomething
prefixedIdentifier: PREFIXEDIDENTIFIER;
| (componentName componentAttributes eos);

// foo="bar" baz="bum" qux
componentAttributes: (componentAttribute)*;

componentAttribute: identifier (EQUALSIGN expression)?;

// foo="bar", baz="bum"
delimitedComponentAttributes: (componentAttribute) (
COMMA componentAttribute
)*;

/*
++foo
foo++
Expand Down Expand Up @@ -351,8 +324,7 @@ reservedKeyword:
| CASE
| CASTAS
| CATCH
| CLASS
| COMPONENT
| CLASS_NAME
| CONTAIN
| CONTAINS
| CONTINUE
Expand Down Expand Up @@ -418,9 +390,7 @@ reservedKeyword:
| LE
| NEQ
| NOT
| OR
| PREFIX
| PREFIXEDIDENTIFIER;
| OR;

// ANY NEW LEXER RULES IN DEFAULT MODE FOR WORDS NEED ADDED HERE

Expand All @@ -430,7 +400,7 @@ scope: REQUEST | VARIABLES | SERVER;

/*
```
<cfset components="here">
<bx:set components="here">
```
*/
componentIsland:
Expand Down Expand Up @@ -501,8 +471,8 @@ structMember:
identifier (COLON | EQUALSIGN) expression
| stringLiteral (COLON | EQUALSIGN) expression;

// +foo -bar
unary: (MINUS | PLUS | BITWISE_COMPLEMENT) expression;
// +foo -bar b~baz
unary: (MINUS | PLUS | bitwiseCompliment) expression;

// new java:String( param1 )
new:
Expand Down Expand Up @@ -536,11 +506,12 @@ notTernaryExpression:
| notTernaryExpression (STAR | SLASH | PERCENT | BACKSLASH) notTernaryExpression
| notTernaryExpression (PLUS | MINUS | MOD) notTernaryExpression
| notTernaryExpression (
BITWISE_SIGNED_LEFT_SHIFT
| BITWISE_SIGNED_RIGHT_SHIFT
| BITWISE_UNSIGNED_RIGHT_SHIFT
bitwiseSignedLeftShift
| bitwiseSignedRightShift
| bitwiseUnsignedRightShift
) notTernaryExpression
| notTernaryExpression (XOR | INSTANCEOF) notTernaryExpression
| notTernaryExpression XOR notTernaryExpression
| notTernaryExpression instanceOf notTernaryExpression
| notTernaryExpression (AMPERSAND notTernaryExpression)+
| notTernaryExpression (
eq
Expand All @@ -559,18 +530,44 @@ notTernaryExpression:
| NOT CONTAINS
| TEQ
) notTernaryExpression // Comparision
| notTernaryExpression BITWISE_AND notTernaryExpression // Bitwise AND operator
| notTernaryExpression BITWISE_XOR notTernaryExpression // Bitwise XOR operator
| notTernaryExpression BITWISE_OR notTernaryExpression // Bitwise OR operator
| notTernaryExpression bitwiseAnd notTernaryExpression // Bitwise AND operator
| notTernaryExpression bitwiseXOR notTernaryExpression // Bitwise XOR operator
| notTernaryExpression bitwiseOr notTernaryExpression // Bitwise OR operator
| notTernaryExpression ELVIS notTernaryExpression // Elvis operator
| notTernaryExpression IS notTernaryExpression // IS operator
| notTernaryExpression CASTAS notTernaryExpression // CastAs operator
| notTernaryExpression INSTANCEOF notTernaryExpression // InstanceOf operator
| notTernaryExpression castAs notTernaryExpression
| notTernaryExpression DOES NOT CONTAIN notTernaryExpression
| notOrBang notTernaryExpression
| notTernaryExpression (and | or) notTernaryExpression;
// Logical

// foo b<< bar
bitwiseSignedLeftShift: BITWISE_SIGNED_LEFT_SHIFT;

// foo b>> bar
bitwiseSignedRightShift: BITWISE_SIGNED_RIGHT_SHIFT;

// foo b>>> bar
bitwiseUnsignedRightShift: BITWISE_UNSIGNED_RIGHT_SHIFT;

// foo b& bar
bitwiseAnd: BITWISE_AND;

// foo b^ bar
bitwiseXOR: BITWISE_XOR;

// foo |b bar
bitwiseOr: BITWISE_OR;

// b~baz
bitwiseCompliment: BITWISE_COMPLEMENT;

// foo castAs bar
castAs: CASTAS;

// foo instanceOf bar
instanceOf: INSTANCEOF;

and: AND | AMPAMP;

eq: EQ | EQUAL | EQEQ;
Expand Down
7 changes: 3 additions & 4 deletions src/main/antlr/BaseScriptLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ BREAK: 'BREAK';
CASE: 'CASE';
CASTAS: 'CASTAS';
CATCH: 'CATCH';
CLASS: 'CLASS';
COMPONENT: 'COMPONENT';
CONTAIN: 'CONTAIN';
CONTAINS: 'CONTAINS';
CONTINUE: 'CONTINUE';
Expand Down Expand Up @@ -78,6 +76,9 @@ WHEN: 'WHEN';
WHILE: 'WHILE';
XOR: 'XOR';

// ALWAYS OVERRIDEN IN CF OR BOX LEXERS
CLASS_NAME: 'sdf';

AND: 'AND';
AMPAMP: '&&';

Expand Down Expand Up @@ -144,7 +145,6 @@ MODEQUAL: '%=';
PLUS: '+';
PLUSPLUS: '++';
TEQ: '===';
PREFIX: 'CF';

// BITWISE OPERATORS
BITWISE_OR: 'b|';
Expand Down Expand Up @@ -185,7 +185,6 @@ FLOAT_LITERAL_DECIMAL_ONLY_E_NOTATION: DOT DIGIT+ E_NOTATION;
FLOAT_LITERAL_DECIMAL_ONLY: DOT DIGIT+;

INTEGER_LITERAL: DIGIT+;
PREFIXEDIDENTIFIER: PREFIX IDENTIFIER;
IDENTIFIER: [a-z_$]+ ( [_]+ | [a-z]+ | DIGIT)*;

COMPONENT_ISLAND_START: '```' -> pushMode(componentIsland);
Expand Down
24 changes: 8 additions & 16 deletions src/main/antlr/BaseTemplateLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ COMMENT: '<!---' .*? '--->' -> channel(HIDDEN);

WS: (' ' | '\t' | '\r'? '\n')+;

SCRIPT_OPEN: '<cfscript' .*? '>' -> pushMode(XFSCRIPT);

OUTPUT_START:
'<cfoutput' -> pushMode(POSSIBLE_COMPONENT), pushMode(COMPONENT_MODE), pushMode(OUTPUT_MODE);
COMPONENT_OPEN: '<' -> pushMode(POSSIBLE_COMPONENT);

HASHHASH: '##' -> type(CONTENT_TEXT);
Expand All @@ -49,9 +45,8 @@ CONTENT_TEXT: ~[<#]+;
// *********************************************************************************************************************
mode POSSIBLE_COMPONENT;

PREFIX: 'cf' -> pushMode(COMPONENT_MODE);
SLASH_PREFIX: '/cf' -> pushMode(END_COMPONENT);
ANY: . -> type(CONTENT_TEXT), popMode;
// This mode is overridden in the CF and Box lexers
IGNORED: [.];

// *********************************************************************************************************************
mode COMPONENT_MODE;
Expand Down Expand Up @@ -108,6 +103,12 @@ fragment COMPONENT_NameChar:

fragment COMPONENT_NameStartChar: [a-z_];

// *********************************************************************************************************************
mode XFSCRIPT;

// This mode is overridden in the CF and Box lexers
IGNORED2: [.];

// *********************************************************************************************************************
mode OUTPUT_MODE;

Expand Down Expand Up @@ -155,15 +156,6 @@ COMPONENT_NAME2:
COMPONENT_CLOSE2:
'>' -> popMode, popMode, type(COMPONENT_CLOSE);

// *********************************************************************************************************************
mode XFSCRIPT;

fragment COMPONENT_WHITESPACE2: [ \t\r\n]*;
SCRIPT_END_BODY:
'</' COMPONENT_WHITESPACE2 'cfscript' COMPONENT_WHITESPACE2 '>' -> popMode;

SCRIPT_BODY: .+?;

// *********************************************************************************************************************
mode ATTVALUE;

Expand Down
17 changes: 16 additions & 1 deletion src/main/antlr/BoxScriptGrammar.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,19 @@ options {
tokenVocab = BoxScriptLexer;
}

import BaseScriptGrammar;
import BaseScriptGrammar;

// This is the top level rule, which allow imports always, followed by a component, or an interface, or just a bunch of statements.
script:
importStatement* (
boxClass
| interface
| functionOrStatement*
)
| EOF;

// TODO: This belongs only in the BL grammar. import java:foo.bar.Baz as myAlias;
importStatement:
IMPORT (prefix = identifier COLON)? fqn (DOT STAR)? (
AS alias = identifier
)? eos?;
4 changes: 3 additions & 1 deletion src/main/antlr/BoxScriptLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ options {
caseInsensitive = true;
}

import BaseScriptLexer;
import BaseScriptLexer;

CLASS_NAME: 'CLASS';
23 changes: 22 additions & 1 deletion src/main/antlr/BoxTemplateLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,25 @@ options {
caseInsensitive = true;
}

import BaseTemplateLexer;
import BaseTemplateLexer;

SCRIPT_OPEN: '<bx:script' .*? '>' -> pushMode(XFSCRIPT);

OUTPUT_START:
'<bx:output' -> pushMode(POSSIBLE_COMPONENT), pushMode(COMPONENT_MODE), pushMode(OUTPUT_MODE);

// *********************************************************************************************************************
mode XFSCRIPT;

fragment COMPONENT_WHITESPACE2: [ \t\r\n]*;
SCRIPT_END_BODY:
'</' COMPONENT_WHITESPACE2 'bx:script' COMPONENT_WHITESPACE2 '>' -> popMode;

SCRIPT_BODY: .+?;

// *********************************************************************************************************************
mode POSSIBLE_COMPONENT;

PREFIX: 'bx:' -> pushMode(COMPONENT_MODE);
SLASH_PREFIX: '/bx:' -> pushMode(END_COMPONENT);
ANY: . -> type(CONTENT_TEXT), popMode;
Loading

0 comments on commit 0cd0123

Please sign in to comment.