From 59ab6a9e02a6b5899d9512003342e990b01458fb Mon Sep 17 00:00:00 2001
From: Aaron Zuspan <aa.zuspan@gmail.com>
Date: Mon, 19 Aug 2024 23:50:53 -0700
Subject: [PATCH] Semantic token testing

---
 tests/server_tests/test_semantics.py |  79 +++++++++++++
 tests/test_parser.py                 | 100 +---------------
 tests/test_tokens.py                 | 167 +++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 97 deletions(-)
 create mode 100644 tests/server_tests/test_semantics.py
 create mode 100644 tests/test_tokens.py

diff --git a/tests/server_tests/test_semantics.py b/tests/server_tests/test_semantics.py
new file mode 100644
index 0000000..80c27dd
--- /dev/null
+++ b/tests/server_tests/test_semantics.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import itertools
+import tempfile
+from dataclasses import dataclass
+
+import lsprotocol.types as lsp
+import pytest
+from pytest_lsp import LanguageClient
+
+from ..conftest import TestCase, parametrize_cases
+
+
+@dataclass
+class SemanticTestCase(TestCase):
+    """A dictionary to record prepare rename results for a symbol."""
+
+    source: str
+    encoding: list[int]
+
+
+# fmt: off
+TEST_CASES: list[SemanticTestCase] = [
+    SemanticTestCase(
+        name="variable definition",
+        source="""Delay MEM REG0""",
+        encoding=[
+            0, 0, 5, 8, 0b10, # variable, definition
+            0, 6, 3, 21, 0b0, # operator
+            0, 4, 4, 8, 0b1000000100, # variable, constant readonly
+        ],
+    ),
+    SemanticTestCase(
+        name="label and opcode",
+        source="""start:\nsof 0,0""",
+        encoding=[ 
+            0, 0, 5, 0, 0b10, # namespace, definition
+            1, 0, 3, 12, 0b0, # function
+            0, 4, 1, 19, 0b0, # number
+            0, 1, 1, 21, 0b0, # argsep
+            0, 1, 1, 19, 0b0, # number
+        ], 
+    ),
+]
+# fmt: on
+
+
+@parametrize_cases(TEST_CASES)
+@pytest.mark.asyncio()
+async def test_semantic_tokens(
+    test_case: SemanticTestCase, client: LanguageClient
+) -> None:
+    def batched(iterable, n):
+        """
+        Partial back port of itertools.batched from Python 3.12.
+
+        https://docs.python.org/3/library/itertools.html#itertools.batched
+        """
+        iterator = iter(iterable)
+        while batch := tuple(itertools.islice(iterator, n)):
+            yield batch
+
+    tmp = tempfile.NamedTemporaryFile()
+    with open(tmp.name, "w") as dst:
+        dst.write(test_case.source)
+
+    response = await client.text_document_semantic_tokens_full_async(
+        params=lsp.SemanticTokensParams(
+            text_document=lsp.TextDocumentIdentifier(
+                uri=f"file:///{tmp.name}",
+            ),
+        )
+    )
+
+    assert len(response.data) == len(test_case.encoding), "Unexpected encoding length"
+
+    # Compare encodings 1 token at a time to make it easier to diagnose issues
+    for got, expected in zip(batched(response.data, 5), batched(test_case.encoding, 5)):
+        assert got == expected
diff --git a/tests/test_parser.py b/tests/test_parser.py
index a1aff4d..b404fc9 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2,111 +2,17 @@
 
 from __future__ import annotations
 
-import lsprotocol.types as lsp
 import pytest
 
 from spinasm_lsp.parser import SPINAsmParser
-from spinasm_lsp.tokens import ASFV1Token, EvaluatedToken, TokenLookup
 
-from .conftest import PATCH_DIR, TEST_PATCHES
+from .conftest import TEST_PATCHES
 
 
 @pytest.mark.parametrize("patch", TEST_PATCHES, ids=lambda x: x.stem)
 def test_example_patches(patch):
     """Test that the example patches from SPINAsm are parsable."""
     with open(patch, encoding="utf-8") as f:
-        assert SPINAsmParser(f.read()).parse()
+        parser = SPINAsmParser(f.read()).parse()
 
-
-@pytest.fixture()
-def sentence_token_lookup() -> tuple[str, TokenLookup]:
-    """A sentence with a token registry for each word."""
-    sentence = "This   is a line    with words."
-
-    # Build a list of word tokens, ignoring whitespace. We'll build the tokens
-    # consistently with asfv1 parsed tokens.
-    words = list(filter(lambda x: x, sentence.split(" ")))
-    token_vals = [ASFV1Token(type="LABEL", txt=w, stxt=w, val=None) for w in words]
-    tokens = []
-    col = 0
-
-    lookup = TokenLookup()
-    for t in token_vals:
-        start = sentence.index(t.txt, col)
-        parsed_token = t.at_position(lsp.Position(line=0, character=start))
-        eval_token = EvaluatedToken.from_parsed_token(parsed_token)
-
-        col = eval_token.range.end.character + 1
-
-        tokens.append(eval_token)
-        lookup.add_token(parsed_token)
-
-    return sentence, lookup
-
-
-def test_get_token_from_registry(sentence_token_lookup: tuple[str, TokenLookup]):
-    """Test that tokens are correctly retrieved by position from a registry."""
-    sentence, lookup = sentence_token_lookup
-
-    # Manually build a mapping of column indexes to expected token words. Note that
-    # each word includes the whitespace immediately after it, which is consistent with
-    # other LSPs, and that all other whitespace is None.
-    token_positions = {i: None for i in range(len(sentence))}
-    for i in range(0, 5):
-        token_positions[i] = "This"
-    for i in range(7, 10):
-        token_positions[i] = "is"
-    for i in range(10, 12):
-        token_positions[i] = "a"
-    for i in range(12, 17):
-        token_positions[i] = "line"
-    for i in range(20, 25):
-        token_positions[i] = "with"
-    for i in range(25, 32):
-        token_positions[i] = "words."
-
-    for i, word in token_positions.items():
-        found_tok = lookup.get(position=lsp.Position(line=0, character=i))
-        found_val = found_tok.stxt if found_tok is not None else found_tok
-        msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
-        assert found_val == word, msg
-
-
-def test_get_token_at_invalid_position_returns_none(sentence_token_lookup):
-    """Test that retrieving tokens from out of bounds always returns None."""
-    _, lookup = sentence_token_lookup
-
-    assert lookup.get(position=lsp.Position(line=99, character=99)) is None
-
-
-def test_get_token_positions():
-    """Test getting all positions of a token from a registry."""
-    patch = PATCH_DIR / "Basic.spn"
-    with open(patch) as fp:
-        source = fp.read()
-
-    parser = SPINAsmParser(source).parse()
-
-    all_matches = parser.evaluated_tokens.get(name="apout")
-    assert len(all_matches) == 4
-    assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]
-
-
-def test_concatenate_cho_rdal_tokens():
-    """Test that CHO and RDAL tokens are concatenated correctly into CHO RDAL."""
-    cho = ASFV1Token(type="MNEMONIC", txt="CHO", stxt="CHO", val=None).at_position(
-        start=lsp.Position(line=0, character=0)
-    )
-
-    # Put whitespace between CHO and RDAL to test that range is calculated
-    rdal = ASFV1Token(type="LABEL", txt="RDAL", stxt="RDAL", val=None).at_position(
-        start=lsp.Position(line=0, character=10)
-    )
-
-    cho_rdal = cho.concatenate(rdal)
-
-    assert cho_rdal.stxt == "CHO RDAL"
-    assert cho_rdal.type == "MNEMONIC"
-    assert cho_rdal.range == lsp.Range(
-        start=lsp.Position(line=0, character=0), end=lsp.Position(line=0, character=14)
-    )
+        assert list(parser.evaluated_tokens)
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
new file mode 100644
index 0000000..a2c652f
--- /dev/null
+++ b/tests/test_tokens.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+import lsprotocol.types as lsp
+import pytest
+
+from spinasm_lsp.parser import SPINAsmParser
+from spinasm_lsp.tokens import ASFV1Token, LSPToken, TokenLookup
+
+from .conftest import PATCH_DIR, TestCase, parametrize_cases
+
+
+@dataclass
+class TokenSemanticsTestCase(TestCase):
+    """A dictionary to record prepare rename results for a symbol."""
+
+    token: LSPToken
+    encoding: list[int]
+    type: lsp.SemanticTokenTypes
+    modifiers: list[lsp.SemanticTokenModifiers] = field(default_factory=list)
+    prev_token_start: lsp.Position = lsp.Position(line=0, character=0)
+
+
+TOKEN_SEMANTICS: list[TokenSemanticsTestCase] = [
+    TokenSemanticsTestCase(
+        name="skp at start",
+        token=LSPToken(
+            type="MNEMONIC",
+            stxt="SKP",
+            range=lsp.Range(lsp.Position(0, 0), lsp.Position(0, 2)),
+        ),
+        encoding=[0, 0, 3, 12, 0b0],
+        type=lsp.SemanticTokenTypes.Function,
+    ),
+    TokenSemanticsTestCase(
+        name="variable on newline",
+        token=LSPToken(
+            type="LABEL",
+            stxt="TMP",
+            range=lsp.Range(lsp.Position(10, 0), lsp.Position(10, 2)),
+        ),
+        encoding=[9, 0, 3, 8, 0b0],
+        type=lsp.SemanticTokenTypes.Variable,
+        prev_token_start=lsp.Position(line=1, character=8),
+    ),
+    TokenSemanticsTestCase(
+        name="constant after token",
+        token=LSPToken(
+            type="LABEL",
+            stxt="REG0",
+            range=lsp.Range(lsp.Position(3, 15), lsp.Position(3, 2)),
+            is_constant=True,
+        ),
+        encoding=[0, 5, 4, 8, 0b1000000100],
+        type=lsp.SemanticTokenTypes.Variable,
+        modifiers=[
+            lsp.SemanticTokenModifiers.Readonly,
+            lsp.SemanticTokenModifiers.DefaultLibrary,
+        ],
+        prev_token_start=lsp.Position(line=3, character=10),
+    ),
+]
+
+
+@parametrize_cases(TOKEN_SEMANTICS)
+def test_semantic_tokens(test_case: TokenSemanticsTestCase):
+    """Test that the semantic tokens are correctly generated."""
+    encoding = test_case.token.semantic_encoding(test_case.prev_token_start)
+
+    assert test_case.token.semantic_type == test_case.type
+    assert test_case.token.semantic_modifiers == test_case.modifiers
+    assert encoding == test_case.encoding
+
+
+@pytest.fixture()
+def sentence_token_lookup() -> tuple[str, TokenLookup]:
+    """A sentence with a token registry for each word."""
+    sentence = "This   is a line    with words."
+
+    # Build a list of word tokens, ignoring whitespace. We'll build the tokens
+    # consistently with asfv1 parsed tokens.
+    words = list(filter(lambda x: x, sentence.split(" ")))
+    token_vals = [ASFV1Token(type="LABEL", txt=w, stxt=w, val=None) for w in words]
+    tokens = []
+    col = 0
+
+    lookup = TokenLookup()
+    for t in token_vals:
+        start = sentence.index(t.txt, col)
+        parsed_token = t.at_position(lsp.Position(line=0, character=start))
+        eval_token = LSPToken.from_parsed_token(parsed_token)
+
+        col = eval_token.range.end.character + 1
+
+        tokens.append(eval_token)
+        lookup.add_token(parsed_token)
+
+    return sentence, lookup
+
+
+def test_get_token_from_registry(sentence_token_lookup: tuple[str, TokenLookup]):
+    """Test that tokens are correctly retrieved by position from a registry."""
+    sentence, lookup = sentence_token_lookup
+
+    # Manually build a mapping of column indexes to expected token words. Note that
+    # each word includes the whitespace immediately after it, which is consistent with
+    # other LSPs, and that all other whitespace is None.
+    token_positions = {i: None for i in range(len(sentence))}
+    for i in range(0, 5):
+        token_positions[i] = "This"
+    for i in range(7, 10):
+        token_positions[i] = "is"
+    for i in range(10, 12):
+        token_positions[i] = "a"
+    for i in range(12, 17):
+        token_positions[i] = "line"
+    for i in range(20, 25):
+        token_positions[i] = "with"
+    for i in range(25, 32):
+        token_positions[i] = "words."
+
+    for i, word in token_positions.items():
+        found_tok = lookup.get(position=lsp.Position(line=0, character=i))
+        found_val = found_tok.stxt if found_tok is not None else found_tok
+        msg = f"Expected token `{word}` at col {i}, found `{found_val}`"
+        assert found_val == word, msg
+
+
+def test_get_token_at_invalid_position_returns_none(sentence_token_lookup):
+    """Test that retrieving tokens from out of bounds always returns None."""
+    _, lookup = sentence_token_lookup
+
+    assert lookup.get(position=lsp.Position(line=99, character=99)) is None
+
+
+def test_get_token_positions():
+    """Test getting all positions of a token from a registry."""
+    patch = PATCH_DIR / "Basic.spn"
+    with open(patch) as fp:
+        source = fp.read()
+
+    parser = SPINAsmParser(source).parse()
+
+    all_matches = parser.evaluated_tokens.get(name="apout")
+    assert len(all_matches) == 4
+    assert [t.range.start.line for t in all_matches] == [23, 57, 60, 70]
+
+
+def test_concatenate_cho_rdal_tokens():
+    """Test that CHO and RDAL tokens are concatenated correctly into CHO RDAL."""
+    cho = ASFV1Token(type="MNEMONIC", txt="CHO", stxt="CHO", val=None).at_position(
+        start=lsp.Position(line=0, character=0)
+    )
+
+    # Put whitespace between CHO and RDAL to test that range is calculated
+    rdal = ASFV1Token(type="LABEL", txt="RDAL", stxt="RDAL", val=None).at_position(
+        start=lsp.Position(line=0, character=10)
+    )
+
+    cho_rdal = cho.concatenate(rdal)
+
+    assert cho_rdal.stxt == "CHO RDAL"
+    assert cho_rdal.type == "MNEMONIC"
+    assert cho_rdal.range == lsp.Range(
+        start=lsp.Position(line=0, character=0), end=lsp.Position(line=0, character=14)
+    )