Skip to content

Commit

Permalink
Fix C++ syntax collision (#150)
Browse files Browse the repository at this point in the history
* Fix C++ syntax collision

* Add tests for C++ syntax, fix failing test

* Add more tests for the possible regex cases

* Make JSON Attribute Tests pass

* Change Node version in CI

* Make node version match mine
  • Loading branch information
dacharyc authored May 15, 2024
1 parent 4a28cda commit 38e6ca6
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/node.js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:

strategy:
matrix:
node-version: [20.x]
node-version: [21.x]
# See supported Node.js release schedule at https://nodejs.org/en/about/releases/

steps:
Expand Down
1 change: 1 addition & 0 deletions integrationTests/snip/expected/sample.snippet.cpp-test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
auto something = SomeClass::someProperty;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
auto something = SomeClass::state::something;
7 changes: 7 additions & 0 deletions integrationTests/snip/input/sample.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// :snippet-start: cpp-test
auto something = SomeClass::someProperty;
// :snippet-end:

// :snippet-start: failing-cpp-test
auto something = SomeClass::state::something;
// :snippet-end:
81 changes: 78 additions & 3 deletions src/bluehawk/parser/lexer/lexer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,77 @@ this is used to replace
"Newline",
]);
});

it("does not misinterpret C++ syntax as tokens", () => {
const result = lexer.tokenize(`SomeClass::state::something;`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toBeNull;
});

it("does not make a token from content that starts with ::", () => {
const result = lexer.tokenize(`::SomeClass::state::something;`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toBeNull;
});

it("does not make a token from content that ends with ::", () => {
const result = lexer.tokenize(`SomeClass::state::something::`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toBeNull;
});

it("does not make a token from content that starts and ends with ::", () => {
const result = lexer.tokenize(`::SomeClass::state::something::`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toBeNull;
});

it("does not make a token with a space in the state tag", () => {
const result = lexer.tokenize(`
// :state -start: state-identifier
SomeClass::state::something;
// :state-end:
`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toBeNull;
});

it("does not make a token with a space after the start colon", () => {
const result = lexer.tokenize(`
// : state-start: state-identifier
SomeClass::state::something;
// :state-end:
`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toBeNull;
});

it("Correctly tokenizes C++ syntax within a tag", () => {
const result = lexer.tokenize(`
// :state-start: state-identifier
SomeClass::state::something;
// :state-end:
`);
expect(result.errors.length).toBe(0);
const tokenNames = result.tokens.map((token) => token.tokenType.name);
expect(tokenNames).toStrictEqual([
"Newline",
"LineComment",
"TagStart",
"Identifier",
"Newline",
"Newline",
"LineComment",
"TagEnd",
"Newline",
]);
});
});

describe("custom comment lexer", () => {
Expand Down Expand Up @@ -116,9 +187,13 @@ describe("custom comment lexer", () => {

it("rejects comment patterns that conflict with other tokens", () => {
expect(() => {
makeLexer([makeLineCommentToken(TAG_PATTERN)]);
}).toThrowError(`Errors detected in definition of Lexer:
The same RegExp pattern ->/:([A-z0-9-]+):[^\\S\\r\\n]*/<-has been used in all of the following Token Types: Tag, LineComment <-`);
try {
makeLexer([makeLineCommentToken(TAG_PATTERN)]);
} catch (e) {
expect(e.message).toBe(`Errors detected in definition of Lexer:
The same RegExp pattern ->/(?<!:):([A-z0-9-]+):(?!:)[^\\S\\r\\n]*/<-has been used in all of the following Token Types: Tag, LineComment <-`);
}
});
});
});

Expand Down
12 changes: 9 additions & 3 deletions src/bluehawk/parser/lexer/tokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,24 +82,30 @@ const Text = createToken({
// TODO: Allow any amount of non-newline white space (/[^\S\r\n]*/) to be
// included before or after the actual tag name to make stripping it out
// much easier.
const TAG_START_PATTERN /**/ = /:([A-z0-9-]+)-start:/;
const TAG_END_PATTERN /* */ = /:([A-z0-9-]+)-end:/;
const TAG_PATTERN /* */ = /:([A-z0-9-]+):[^\S\r\n]*/;
const TAG_START_PATTERN /**/ = /(?<!:):([A-z0-9-]+)-start:(?!:)/;
const TAG_END_PATTERN /* */ = /(?<!:):([A-z0-9-]+)-end:(?!:)/;
const TAG_PATTERN /* */ = /(?<!:):([A-z0-9-]+):(?!:)[^\S\r\n]*/;

const TagStart = createToken({
name: "TagStart",
pattern: TAG_START_PATTERN,
push_mode: "TagAttributesMode",
line_breaks: false,
start_chars_hint: [":"],
});

const TagEnd = createToken({
name: "TagEnd",
pattern: TAG_END_PATTERN,
line_breaks: false,
start_chars_hint: [":"],
});

const Tag = createToken({
name: "Tag",
pattern: TAG_PATTERN,
line_breaks: false,
start_chars_hint: [":"],
});

const Identifier = createToken({
Expand Down
32 changes: 16 additions & 16 deletions src/bluehawk/parser/visitor/jsonAttributeList.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ describe("JSON attribute lists", () => {
const result = visitor.visit(cst, source);
expect(result.tagNodes[0].attributes).toBeUndefined();
expect(result.errors[0].message).toBe(
"Expected double-quoted property name in JSON"
"Expected double-quoted property name in JSON at position 19 (line 5 column 3)"
);
expect(result.errors[0].location).toStrictEqual({
line: 5,
column: 3,
offset: 33,
line: 1,
column: 15,
offset: 14,
});
});

Expand Down Expand Up @@ -133,12 +133,12 @@ describe("JSON attribute lists", () => {
const visitor = makeCstVisitor(parser);
const result = visitor.visit(cst, source);
expect(result.errors[0].location).toStrictEqual({
line: 5,
column: 1,
offset: 23,
line: 1,
column: 15,
offset: 14,
});
expect(result.errors[0].message).toBe(
"Expected property name or '}' in JSON"
"Expected property name or '}' in JSON at position 9 (line 5 column 1)"
);
});

Expand All @@ -158,12 +158,12 @@ describe("JSON attribute lists", () => {
const visitor = makeCstVisitor(parser);
const result = visitor.visit(cst, source);
expect(result.errors[0].location).toStrictEqual({
line: 5,
column: 1,
offset: 21,
line: 1,
column: 15,
offset: 14,
});
expect(result.errors[0].message).toBe(
"Expected property name or '}' in JSON"
"Expected property name or '}' in JSON at position 7 (line 5 column 1)"
);
});

Expand Down Expand Up @@ -250,12 +250,12 @@ describe("JSON attribute lists", () => {
const visitor = makeCstVisitor(parser);
const result = visitor.visit(cst, source);
expect(result.errors[0].location).toStrictEqual({
line: 3,
column: 4,
offset: 39,
line: 1,
column: 18,
offset: 17,
});
expect(result.errors[0].message).toBe(
"Expected double-quoted property name in JSON"
"Expected double-quoted property name in JSON at position 22 (line 3 column 4)"
);
});
});

0 comments on commit 38e6ca6

Please sign in to comment.