Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add natural translation for DSL #574

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d1a544b
Made first version for translation using !natural_language
BrentBlanckaert Dec 11, 2024
d0cf7de
forgot to push actual file
BrentBlanckaert Dec 11, 2024
0334e1f
fixed linting
BrentBlanckaert Dec 11, 2024
c1114bc
fixed pyright issue
BrentBlanckaert Dec 11, 2024
c61b563
add test for unit-test
BrentBlanckaert Dec 11, 2024
145deae
Fixed some bugs and wrote another test for io
BrentBlanckaert Dec 12, 2024
e14c758
setup main
BrentBlanckaert Dec 12, 2024
65fb097
Made a small fix
BrentBlanckaert Dec 12, 2024
e941ef6
Tested an extra edge case
BrentBlanckaert Dec 13, 2024
eef397b
Cleaned up code and added extra cases.
BrentBlanckaert Dec 13, 2024
30bcdcc
Started on usage with translation table.
BrentBlanckaert Dec 13, 2024
4230003
Added support for translation-table in global scope, tab-scope and co…
BrentBlanckaert Dec 14, 2024
1ddef15
Cleaned up code and fixed pyright issue
BrentBlanckaert Dec 15, 2024
5dabc80
fixed tests and added more
BrentBlanckaert Dec 15, 2024
69a77d3
fixed some small issues
BrentBlanckaert Dec 15, 2024
eb62f92
made some small fixes
BrentBlanckaert Dec 17, 2024
6e258cf
wrote an extra test
BrentBlanckaert Dec 17, 2024
1db1db2
fix spelling mistake
BrentBlanckaert Dec 17, 2024
4dedd8c
fixed linting issue
BrentBlanckaert Dec 17, 2024
b9786c2
increasing test coverage
BrentBlanckaert Dec 17, 2024
c63e391
removed some redundant code
BrentBlanckaert Dec 19, 2024
a35c49a
Adding a few comments
BrentBlanckaert Dec 19, 2024
10b0eb3
Cleaned up code some more and added extra cases for input and output …
BrentBlanckaert Dec 28, 2024
3539227
Updated statement/expression case and added programmingLanguageMap fo…
BrentBlanckaert Dec 28, 2024
e1180f9
started added new json schema
BrentBlanckaert Dec 28, 2024
466ec16
Made some changes to schema
BrentBlanckaert Dec 28, 2024
49079a0
fixed some bugs in the schema
BrentBlanckaert Dec 29, 2024
4174beb
fixed some bugs and fixed the tests
BrentBlanckaert Dec 29, 2024
1a79a82
fixed an edge case and made an extra test for it.
BrentBlanckaert Dec 29, 2024
81ce161
added the actual writing to a file.
BrentBlanckaert Dec 29, 2024
64a00cd
changed formatter to jinja
BrentBlanckaert Jan 4, 2025
88a2ede
small cleanup
BrentBlanckaert Jan 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion tested/dsl/translate_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,22 @@ class ReturnOracle(dict):
pass


class NaturalLanguageMap(dict):
pass


OptionDict = dict[str, int | bool]
YamlObject = (
YamlDict | list | bool | float | int | str | None | ExpressionString | ReturnOracle
YamlDict
| list
| bool
| float
| int
| str
| None
| ExpressionString
| ReturnOracle
| NaturalLanguageMap
)


Expand Down Expand Up @@ -138,6 +151,14 @@ def _return_oracle(loader: yaml.Loader, node: yaml.Node) -> ReturnOracle:
return ReturnOracle(result)


def _natural_language_map(loader: yaml.Loader, node: yaml.Node) -> NaturalLanguageMap:
result = _parse_yaml_value(loader, node)
assert isinstance(
result, dict
), f"A natural language map must be an object, got {result} which is a {type(result)}."
return NaturalLanguageMap(result)


def _parse_yaml(yaml_stream: str) -> YamlObject:
"""
Parse a string or stream to YAML.
Expand All @@ -148,6 +169,7 @@ def _parse_yaml(yaml_stream: str) -> YamlObject:
yaml.add_constructor("!" + actual_type, _custom_type_constructors, loader)
yaml.add_constructor("!expression", _expression_string, loader)
yaml.add_constructor("!oracle", _return_oracle, loader)
yaml.add_constructor("!natural_language", _natural_language_map, loader)

try:
return yaml.load(yaml_stream, loader)
Expand Down
216 changes: 216 additions & 0 deletions tested/nat_translation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import sys
Fixed Show fixed Hide fixed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must say I expected this file to be much simpler.

In principle the !natural_language should simply be replaced by the content of the specified language in the map.
So in my mind, this code should not know whether it is working within a tab, context, testcase,...

The fact that this preprocess script is so heavily linked to the precise TESTed DSL, will make it harder to maintain in the future. Any change to the TESTed DSL will also have to be verified here.

Do you think it is possible to write a more abstract solution, or have I missed some potential issues?


import yaml

from tested.dsl.translate_parser import (
ExpressionString,
NaturalLanguageMap,
ReturnOracle,
YamlDict,
YamlObject,
_parse_yaml,
_validate_dsl,
_validate_testcase_combinations,
)
Fixed Show fixed Hide fixed


def translate_testcase(testcase: YamlDict, language: str) -> YamlDict:
_validate_testcase_combinations(testcase)

key_to_set = "statement" if "statement" in testcase else "expression"
if (expr_stmt := testcase.get(key_to_set)) is not None:
# Must use !natural_language
if isinstance(expr_stmt, NaturalLanguageMap):
assert language in expr_stmt
testcase[key_to_set] = expr_stmt[language]
else:
if (stdin_stmt := testcase.get("stdin")) is not None:
if isinstance(stdin_stmt, dict):
assert language in stdin_stmt
testcase["stdin"] = stdin_stmt[language]

arguments = testcase.get("arguments", [])
if isinstance(arguments, dict):
assert language in arguments
testcase["arguments"] = arguments[language]

if (stdout := testcase.get("stdout")) is not None:
# Must use !natural_language
if isinstance(stdout, NaturalLanguageMap):
assert language in stdout
testcase["stdout"] = stdout[language]
elif isinstance(stdout, dict):
data = stdout["data"]
if isinstance(data, dict):
assert language in data
stdout["data"] = data[language]
testcase["stdout"] = stdout
if (file := testcase.get("file")) is not None:
# Must use !natural_language
if isinstance(file, NaturalLanguageMap):
assert language in file
testcase["file"] = file[language]

Check warning on line 52 in tested/nat_translation.py

View check run for this annotation

Codecov / codecov/patch

tested/nat_translation.py#L50-L52

Added lines #L50 - L52 were not covered by tests
if (stderr := testcase.get("stderr")) is not None:
# Must use !natural_language
if isinstance(stderr, NaturalLanguageMap):
assert language in stderr
testcase["stderr"] = stderr[language]
elif isinstance(stderr, dict):
data = stderr["data"]
if isinstance(data, dict):
assert language in data
stderr["data"] = data[language]
testcase["stderr"] = stderr

if (exception := testcase.get("exception")) is not None:
if isinstance(exception, NaturalLanguageMap):
assert language in exception
testcase["exception"] = exception[language]
elif isinstance(exception, dict):
message = exception["message"]
if isinstance(message, dict):
assert language in message
exception["message"] = message[language]
testcase["exception"] = exception

if (result := testcase.get("return")) is not None:
if isinstance(result, ReturnOracle):
arguments = result.get("arguments", [])
if isinstance(arguments, dict):
assert language in arguments
result["arguments"] = arguments[language]

value = result.get("value")
# Must use !natural_language
if isinstance(value, NaturalLanguageMap):
assert language in value
result["value"] = value[language]

testcase["return"] = result
elif isinstance(result, NaturalLanguageMap):
# Must use !natural_language
assert language in result
testcase["return"] = result[language]

if (description := testcase.get("description")) is not None:
# Must use !natural_language
if isinstance(description, NaturalLanguageMap):
assert language in description
testcase["description"] = description[language]
elif isinstance(description, dict):
dd = description["description"]
if isinstance(dd, dict):
assert language in dd
description["description"] = dd[language]
testcase["description"] = description

return testcase


def translate_testcases(testcases: list, language: str) -> list:
result = []
for testcase in testcases:
assert isinstance(testcase, dict)
result.append(translate_testcase(testcase, language))

return result


def translate_contexts(contexts: list, language: str) -> list:
result = []
for context in contexts:
assert isinstance(context, dict)
print(f"context: {context}")
if "script" in context or "testcases" in context:
key_to_set = "script" if "script" in context else "testcases"
raw_testcases = context.get(key_to_set)
assert isinstance(raw_testcases, list)
context[key_to_set] = translate_testcases(raw_testcases, language)
result.append(context)

return result


def translate_tab(tab: YamlDict, language: str) -> YamlDict:
key_to_set = "unit" if "unit" in tab else "tab"
name = tab.get(key_to_set)

if isinstance(name, dict):
assert language in name
tab[key_to_set] = name[language]

# The tab can have testcases or contexts.
if "contexts" in tab:
assert isinstance(tab["contexts"], list)
tab["contexts"] = translate_contexts(tab["contexts"], language)
elif "cases" in tab:
assert "unit" in tab

Check warning on line 147 in tested/nat_translation.py

View check run for this annotation

Codecov / codecov/patch

tested/nat_translation.py#L147

Added line #L147 was not covered by tests
# We have testcases N.S. / contexts O.S.
assert isinstance(tab["cases"], list)
tab["cases"] = translate_contexts(tab["cases"], language)

Check warning on line 150 in tested/nat_translation.py

View check run for this annotation

Codecov / codecov/patch

tested/nat_translation.py#L149-L150

Added lines #L149 - L150 were not covered by tests
elif "testcases" in tab:
# We have scripts N.S. / testcases O.S.
assert "tab" in tab
assert isinstance(tab["testcases"], list)
tab["testcases"] = translate_testcases(tab["testcases"], language)

Check warning on line 155 in tested/nat_translation.py

View check run for this annotation

Codecov / codecov/patch

tested/nat_translation.py#L153-L155

Added lines #L153 - L155 were not covered by tests
else:
print(tab)
assert "scripts" in tab
assert isinstance(tab["scripts"], list)
tab["scripts"] = translate_testcases(tab["scripts"], language)
return tab


def translate_tabs(dsl_list: list, language: str) -> list:
result = []
for tab in dsl_list:
assert isinstance(tab, dict)
result.append(translate_tab(tab, language))

return result


def translate_dsl(dsl_object: YamlObject, language: str) -> YamlObject:
if isinstance(dsl_object, list):
return translate_tabs(dsl_object, language)
else:
assert isinstance(dsl_object, dict)
key_to_set = "units" if "units" in dsl_object else "tabs"
tab_list = dsl_object.get(key_to_set)
assert isinstance(tab_list, list)
dsl_object[key_to_set] = translate_tabs(tab_list, language)
return dsl_object


def parse_yaml(yaml_path: str) -> YamlObject:
with open(yaml_path, "r") as stream:
result = _parse_yaml(stream.read())

Check warning on line 187 in tested/nat_translation.py

View check run for this annotation

Codecov / codecov/patch

tested/nat_translation.py#L186-L187

Added lines #L186 - L187 were not covered by tests

return result

Check warning on line 189 in tested/nat_translation.py

View check run for this annotation

Codecov / codecov/patch

tested/nat_translation.py#L189

Added line #L189 was not covered by tests


def convert_to_yaml(yaml_object: YamlObject) -> str:
def oracle_representer(dumper, data):
return dumper.represent_mapping("!oracle", data)

def expression_representer(dumper, data):
return dumper.represent_scalar("!expression", data)

# Register the representer for the ReturnOracle object
yaml.add_representer(ReturnOracle, oracle_representer)
yaml.add_representer(ExpressionString, expression_representer)
return yaml.dump(yaml_object, sort_keys=False)


# if __name__ == "__main__":
# n = len(sys.argv)
# assert n > 1, "Expected atleast two argument (path to yaml file and language)."
#
# path = sys.argv[1]
# lang = sys.argv[2]
# new_yaml = parse_yaml(path)
# print(new_yaml)
# translated_dsl = translate_dsl(new_yaml, lang)
# yaml_string = convert_to_yaml(translated_dsl)
# print(yaml_string)
# _validate_dsl(_parse_yaml(yaml_string))
Fixed Show fixed Hide fixed
Loading
Loading