Skip to content

Commit

Permalink
Semantic token testing
Browse files Browse the repository at this point in the history
  • Loading branch information
aazuspan committed Aug 20, 2024
1 parent c89b292 commit 59ab6a9
Show file tree
Hide file tree
Showing 3 changed files with 249 additions and 97 deletions.
79 changes: 79 additions & 0 deletions tests/server_tests/test_semantics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from __future__ import annotations

import itertools
import tempfile
from dataclasses import dataclass

import lsprotocol.types as lsp
import pytest
from pytest_lsp import LanguageClient

from ..conftest import TestCase, parametrize_cases


@dataclass
class SemanticTestCase(TestCase):
"""A dictionary to record prepare rename results for a symbol."""

source: str
encoding: list[int]


# fmt: off
TEST_CASES: list[SemanticTestCase] = [
SemanticTestCase(
name="variable definition",
source="""Delay MEM REG0""",
encoding=[
0, 0, 5, 8, 0b10, # variable, definition
0, 6, 3, 21, 0b0, # operator
0, 4, 4, 8, 0b1000000100, # variable, constant readonly
],
),
SemanticTestCase(
name="label and opcode",
source="""start:\nsof 0,0""",
encoding=[
0, 0, 5, 0, 0b10, # namespace, definition
1, 0, 3, 12, 0b0, # function
0, 4, 1, 19, 0b0, # number
0, 1, 1, 21, 0b0, # argsep
0, 1, 1, 19, 0b0, # number
],
),
]
# fmt: on


@parametrize_cases(TEST_CASES)
@pytest.mark.asyncio()
async def test_semantic_tokens(
test_case: SemanticTestCase, client: LanguageClient
) -> None:
def batched(iterable, n):
"""
Partial back port of itertools.batched from Python 3.12.
https://docs.python.org/3/library/itertools.html#itertools.batched
"""
iterator = iter(iterable)
while batch := tuple(itertools.islice(iterator, n)):
yield batch

tmp = tempfile.NamedTemporaryFile()
with open(tmp.name, "w") as dst:
dst.write(test_case.source)

response = await client.text_document_semantic_tokens_full_async(
params=lsp.SemanticTokensParams(
text_document=lsp.TextDocumentIdentifier(
uri=f"file:///{tmp.name}",
),
)
)

assert len(response.data) == len(test_case.encoding), "Unexpected encoding length"

# Compare encodings 1 token at a time to make it easier to diagnose issues
for got, expected in zip(batched(response.data, 5), batched(test_case.encoding, 5)):
assert got == expected
100 changes: 3 additions & 97 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,111 +2,17 @@

from __future__ import annotations

import lsprotocol.types as lsp
import pytest

from spinasm_lsp.parser import SPINAsmParser
from spinasm_lsp.tokens import ASFV1Token, EvaluatedToken, TokenLookup

from .conftest import PATCH_DIR, TEST_PATCHES
from .conftest import TEST_PATCHES


@pytest.mark.parametrize("patch", TEST_PATCHES, ids=lambda x: x.stem)
def test_example_patches(patch):
"""Test that the example patches from SPINAsm are parsable."""
with open(patch, encoding="utf-8") as f:
assert SPINAsmParser(f.read()).parse()
parser = SPINAsmParser(f.read()).parse()


@pytest.fixture()
def sentence_token_lookup() -> tuple[str, TokenLookup]:
"""A sentence with a token registry for each word."""
sentence = "This is a line with words."

# Build a list of word tokens, ignoring whitespace. We'll build the tokens
# consistently with asfv1 parsed tokens.
words = list(filter(lambda x: x, sentence.split(" ")))
token_vals = [ASFV1Token(type="LABEL", txt=w, stxt=w, val=None) for w in words]
tokens = []
col = 0

lookup = TokenLookup()
for t in token_vals:
start = sentence.index(t.txt, col)
parsed_token = t.at_position(lsp.Position(line=0, character=start))
eval_token = EvaluatedToken.from_parsed_token(parsed_token)

col = eval_token.range.end.character + 1

tokens.append(eval_token)
lookup.add_token(parsed_token)

return sentence, lookup


def test_get_token_from_registry(sentence_token_lookup: tuple[str, TokenLookup]):
"""Test that tokens are correctly retrieved by position from a registry."""
sentence, lookup = sentence_token_lookup

# Manually build a mapping of column indexes to expected token words. Note that
# each word includes the whitespace immediately after it, which is consistent with
# other LSPs, and that all other whitespace is None.
token_positions = {i: None for i in range(len(sentence))}
for i in range(0, 5):
token_positions[i] = "This"
for i in range(7, 10):
token_positions[i] = "is"
for i in range(10, 12):
token_positions[i] = "a"
for i in range(12, 17):
token_positions[i] = "line"
for i in range(20, 25):
token_positions[i] = "with"
for i in range(25, 32):
token_positions[i] = "words."

for i, word in token_positions.items():
found_tok = lookup.get(position=lsp.Position(line=0, character=i))
found_val = found_tok.stxt if found_tok is not None else found_tok
msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
assert found_val == word, msg


def test_get_token_at_invalid_position_returns_none(sentence_token_lookup):
"""Test that retrieving tokens from out of bounds always returns None."""
_, lookup = sentence_token_lookup

assert lookup.get(position=lsp.Position(line=99, character=99)) is None


def test_get_token_positions():
"""Test getting all positions of a token from a registry."""
patch = PATCH_DIR / "Basic.spn"
with open(patch) as fp:
source = fp.read()

parser = SPINAsmParser(source).parse()

all_matches = parser.evaluated_tokens.get(name="apout")
assert len(all_matches) == 4
assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]


def test_concatenate_cho_rdal_tokens():
"""Test that CHO and RDAL tokens are concatenated correctly into CHO RDAL."""
cho = ASFV1Token(type="MNEMONIC", txt="CHO", stxt="CHO", val=None).at_position(
start=lsp.Position(line=0, character=0)
)

# Put whitespace between CHO and RDAL to test that range is calculated
rdal = ASFV1Token(type="LABEL", txt="RDAL", stxt="RDAL", val=None).at_position(
start=lsp.Position(line=0, character=10)
)

cho_rdal = cho.concatenate(rdal)

assert cho_rdal.stxt == "CHO RDAL"
assert cho_rdal.type == "MNEMONIC"
assert cho_rdal.range == lsp.Range(
start=lsp.Position(line=0, character=0), end=lsp.Position(line=0, character=14)
)
assert list(parser.evaluated_tokens)
167 changes: 167 additions & 0 deletions tests/test_tokens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
from __future__ import annotations

from dataclasses import dataclass, field

import lsprotocol.types as lsp
import pytest

from spinasm_lsp.parser import SPINAsmParser
from spinasm_lsp.tokens import ASFV1Token, LSPToken, TokenLookup

from .conftest import PATCH_DIR, TestCase, parametrize_cases


@dataclass
class TokenSemanticsTestCase(TestCase):
"""A dictionary to record prepare rename results for a symbol."""

token: LSPToken
encoding: list[int]
type: lsp.SemanticTokenTypes
modifiers: list[lsp.SemanticTokenModifiers] = field(default_factory=list)
prev_token_start: lsp.Position = lsp.Position(line=0, character=0)


TOKEN_SEMANTICS: list[TokenSemanticsTestCase] = [
TokenSemanticsTestCase(
name="skp at start",
token=LSPToken(
type="MNEMONIC",
stxt="SKP",
range=lsp.Range(lsp.Position(0, 0), lsp.Position(0, 2)),
),
encoding=[0, 0, 3, 12, 0b0],
type=lsp.SemanticTokenTypes.Function,
),
TokenSemanticsTestCase(
name="variable on newline",
token=LSPToken(
type="LABEL",
stxt="TMP",
range=lsp.Range(lsp.Position(10, 0), lsp.Position(10, 2)),
),
encoding=[9, 0, 3, 8, 0b0],
type=lsp.SemanticTokenTypes.Variable,
prev_token_start=lsp.Position(line=1, character=8),
),
TokenSemanticsTestCase(
name="constant after token",
token=LSPToken(
type="LABEL",
stxt="REG0",
range=lsp.Range(lsp.Position(3, 15), lsp.Position(3, 2)),
is_constant=True,
),
encoding=[0, 5, 4, 8, 0b1000000100],
type=lsp.SemanticTokenTypes.Variable,
modifiers=[
lsp.SemanticTokenModifiers.Readonly,
lsp.SemanticTokenModifiers.DefaultLibrary,
],
prev_token_start=lsp.Position(line=3, character=10),
),
]


@parametrize_cases(TOKEN_SEMANTICS)
def test_semantic_tokens(test_case: TokenSemanticsTestCase):
"""Test that the semantic tokens are correctly generated."""
encoding = test_case.token.semantic_encoding(test_case.prev_token_start)

assert test_case.token.semantic_type == test_case.type
assert test_case.token.semantic_modifiers == test_case.modifiers
assert encoding == test_case.encoding


@pytest.fixture()
def sentence_token_lookup() -> tuple[str, TokenLookup]:
"""A sentence with a token registry for each word."""
sentence = "This is a line with words."

# Build a list of word tokens, ignoring whitespace. We'll build the tokens
# consistently with asfv1 parsed tokens.
words = list(filter(lambda x: x, sentence.split(" ")))
token_vals = [ASFV1Token(type="LABEL", txt=w, stxt=w, val=None) for w in words]
tokens = []
col = 0

lookup = TokenLookup()
for t in token_vals:
start = sentence.index(t.txt, col)
parsed_token = t.at_position(lsp.Position(line=0, character=start))
eval_token = LSPToken.from_parsed_token(parsed_token)

col = eval_token.range.end.character + 1

tokens.append(eval_token)
lookup.add_token(parsed_token)

return sentence, lookup


def test_get_token_from_registry(sentence_token_lookup: tuple[str, TokenLookup]):
"""Test that tokens are correctly retrieved by position from a registry."""
sentence, lookup = sentence_token_lookup

# Manually build a mapping of column indexes to expected token words. Note that
# each word includes the whitespace immediately after it, which is consistent with
# other LSPs, and that all other whitespace is None.
token_positions = {i: None for i in range(len(sentence))}
for i in range(0, 5):
token_positions[i] = "This"
for i in range(7, 10):
token_positions[i] = "is"
for i in range(10, 12):
token_positions[i] = "a"
for i in range(12, 17):
token_positions[i] = "line"
for i in range(20, 25):
token_positions[i] = "with"
for i in range(25, 32):
token_positions[i] = "words."

for i, word in token_positions.items():
found_tok = lookup.get(position=lsp.Position(line=0, character=i))
found_val = found_tok.stxt if found_tok is not None else found_tok
msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
assert found_val == word, msg


def test_get_token_at_invalid_position_returns_none(sentence_token_lookup):
"""Test that retrieving tokens from out of bounds always returns None."""
_, lookup = sentence_token_lookup

assert lookup.get(position=lsp.Position(line=99, character=99)) is None


def test_get_token_positions():
"""Test getting all positions of a token from a registry."""
patch = PATCH_DIR / "Basic.spn"
with open(patch) as fp:
source = fp.read()

parser = SPINAsmParser(source).parse()

all_matches = parser.evaluated_tokens.get(name="apout")
assert len(all_matches) == 4
assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]


def test_concatenate_cho_rdal_tokens():
"""Test that CHO and RDAL tokens are concatenated correctly into CHO RDAL."""
cho = ASFV1Token(type="MNEMONIC", txt="CHO", stxt="CHO", val=None).at_position(
start=lsp.Position(line=0, character=0)
)

# Put whitespace between CHO and RDAL to test that range is calculated
rdal = ASFV1Token(type="LABEL", txt="RDAL", stxt="RDAL", val=None).at_position(
start=lsp.Position(line=0, character=10)
)

cho_rdal = cho.concatenate(rdal)

assert cho_rdal.stxt == "CHO RDAL"
assert cho_rdal.type == "MNEMONIC"
assert cho_rdal.range == lsp.Range(
start=lsp.Position(line=0, character=0), end=lsp.Position(line=0, character=14)
)

0 comments on commit 59ab6a9

Please sign in to comment.