From 3747803a0fee7c9a3be538fc5315c8bf4d2bac6e Mon Sep 17 00:00:00 2001 From: pr0me Date: Fri, 9 Jun 2023 10:44:48 +0200 Subject: [PATCH 01/16] bump Joern version and cleanup --- Dockerfile | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index d045f82..7c438ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,21 +36,16 @@ ARG REQUIREMENTS_FILE COPY ${REQUIREMENTS_FILE} ${TPF_HOME}/${REQUIREMENTS_FILE} RUN pip install -r ${TPF_HOME}/${REQUIREMENTS_FILE} -ARG JOERN_VERSION="v1.1.1538" +ARG JOERN_VERSION="v1.2.1" RUN echo ${JOERN_VERSION} COPY discovery ${DISCOVERY_HOME} RUN chmod +x ${DISCOVERY_HOME}/joern/joern-install.sh -RUN /bin/sh -c 'cd ${DISCOVERY_HOME}/joern/ && ./joern-install.sh --version=v1.1.1538 --install-dir=/opt/joern' - -# install js2cpg -# RUN /bin/sh -c 'cd ${DISCOVERY_HOME}/joern/js2cpg/; sbt stage' - +RUN /bin/sh -c 'cd ${DISCOVERY_HOME}/joern/ && ./joern-install.sh --version=v1.2.1 --install-dir=/opt/joern' # ADD HERE COMMANDS USEFUL FOR OTHER DOCKER-COMPOSE SERVICES -# ENV PYTHONPATH "${PYTHONPATH}:${TPF_HOME}/tp_framework" RUN python setup.py develop -ENTRYPOINT [ "bash" ] \ No newline at end of file +ENTRYPOINT [ "bash" ] From 28ba7dd9abb2db5db0707b78392ac6ba8db7504b Mon Sep 17 00:00:00 2001 From: pr0me Date: Fri, 9 Jun 2023 12:47:12 +0200 Subject: [PATCH 02/16] more graceful error handling for discovery mode --- tp_framework/core/discovery.py | 24 ++++++++++++++++++------ tp_framework/core/instance.py | 5 ++++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index 7f86912..62e7658 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -205,8 +205,11 @@ def run_and_process_discovery_rule(cpg: Path, discovery_rule: Path, f"No discovery method has been specified. Likely you need to modify the discovery->method property in the JSON file of the pattern instance related to the discovery rule {discovery_rule}. We will continue with the default discovery method for Scala discovery rules (aka '{default_discovery_method}').") discovery_method = default_discovery_method if discovery_method == "joern": - cpg_file_name, query_name, raw_findings = run_joern_discovery_rule(cpg, discovery_rule) - findings = process_joern_discovery_rule_findings(discovery_rule, query_name, raw_findings) + try: + cpg_file_name, query_name, raw_findings = run_joern_discovery_rule(cpg, discovery_rule) + findings = process_joern_discovery_rule_findings(discovery_rule, query_name, raw_findings) + except: + findings = None return findings else: e = DiscoveryMethodNotSupported(discovery_method=discovery_method) @@ -389,7 +392,12 @@ def discovery_ignore_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, for tpi_id in d_tpi_id_path: tpi_json_path = d_tpi_id_path[tpi_id] tpi_json_rel = os.path.relpath(tpi_json_path, start=tp_lib) - tpi_instance = load_instance_from_metadata(tpi_json_rel, tp_lib, language) # get the instance + # get the instance + try: + tpi_instance = load_instance_from_metadata(tpi_json_rel, tp_lib, language) + except: + logger.exception(f"Failed to decode metadata `{tp_lib / tpi_json_rel}`") + continue d_tpi = {"instance": tpi_instance, "measurement": "ignored", "jsonpath": tpi_json_path, "discovery": discovery_for_tpi(tpi_instance, tpi_json_path, cpg, disc_output_dir, measurement_stop=False, already_executed=d_dr_executed)} @@ -412,6 +420,11 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di if not measurement_stop and tpi_instance.discovery_rule: # prepare and execute the discovery rule (if not done yet) dr = (tpi_json_path.parent / tpi_instance.discovery_rule).resolve() + if not dr.exists(): + d_tpi_discovery["rule_path"] = str(dr) + logger.exception("Scala rule for {} does not exist".format(dr)) + return d_tpi_discovery + logger.info( f"{msgpre}prepare discovery rule {dr}...") d_tpi_discovery["rule_path"] = str(dr) @@ -423,7 +436,6 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di f"{msgpre}running discovery rule...") # related to #42 pdr = patch_PHP_discovery_rule(dr, tpi_instance.language, output_dir=disc_output_dir) - # try: findings = run_and_process_discovery_rule(cpg, pdr, discovery_method=d_tpi_discovery["method"]) d_tpi_discovery["results"] = findings @@ -433,8 +445,8 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di already_executed[d_tpi_discovery["rule_hash"]] = None logger.error( f"{msgpre}Discovery rule failure for this instance: {e}") - ## JoernQueryError(e) - ## JoernQueryParsingResultError(e) + # JoernQueryError(e) + # JoernQueryParsingResultError(e) already_executed[d_tpi_discovery["rule_hash"]] = findings logger.info( f"{msgpre} discovery rule executed.") diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index a39fa68..81864e4 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -222,7 +222,10 @@ def load_instance_from_metadata(metadata: str, tp_lib: Path, language: str) -> I raise InstanceDoesNotExists(ref_metadata=metadata_path.name) with open(metadata_path) as file: - instance: Dict = json.load(file) + try: + instance: Dict = json.load(file) + except Exception as e: + raise e pattern_id = utils.get_id_from_name(metadata_path.parent.parent.name) pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, tp_lib) From 693578bef5b7c898f425465507264adb6eb0672b Mon Sep 17 00:00:00 2001 From: pr0me Date: Fri, 9 Jun 2023 13:51:31 +0200 Subject: [PATCH 03/16] generate json file with only the findings --- tp_framework/core/discovery.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index 62e7658..ce5d103 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -469,6 +469,7 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di "method", "queryFile", "queryHash", "queryName", "queryAccuracy", "queryAlreadyExecuted", "discovery", "filename", "lineNumber", "methodFullName"] rows = [] + findings = [] for tp_id in d_res: if d_res[tp_id]["measurement_found"] is False: rows.append( @@ -538,6 +539,7 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di row["discovery"] = f["discovery"] row["queryName"] = f["queryName"] if f["discovery"]: + findings.append(f) row["filename"] = f["filename"] row["lineNumber"] = f["lineNumber"] row["methodFullName"] = f["methodFullName"] @@ -547,6 +549,9 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di pass ofile = disc_output_dir / f"discovery_{build_name}.csv" utils.write_csv_file(ofile, fields, rows) + findings_file = disc_output_dir / f"findings_{build_name}.json" + with open(findings_file, 'w+') as f: + json.dump(findings, f, sort_keys=True, indent=4) d_results = { "discovery_result_file": str(ofile), "results": d_res From fc0200fb2113065d72c24623275292077bf119ab Mon Sep 17 00:00:00 2001 From: pr0me Date: Mon, 12 Jun 2023 13:27:58 +0200 Subject: [PATCH 04/16] add option to pass pre-computed cpg instead of source directory --- .gitignore | 2 ++ tp_framework/cli/interface.py | 5 +++-- tp_framework/cli/tpf_commands.py | 9 ++++++++- tp_framework/core/discovery.py | 15 +++++++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 6bd1c26..cc6ff62 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ __pycache__/ .pytest_cache venv tp_framework.egg-info/ +tp_framework/.metals/ +tp_framework/.vscode/ coverage_html/ .coverage htmlcov diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index c0c1525..aa4fa06 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -63,7 +63,8 @@ def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict def run_discovery_for_pattern_list(src_dir: Path, pattern_id_list: list[int], language: str, itools: list[Dict], tp_lib_path: Path = Path(config.DEFAULT_TP_LIBRARY_ROOT_DIR).resolve(), output_dir: Path = Path(config.RESULT_DIR).resolve(), - ignore: bool = False): + ignore: bool = False, + cpg: str = None): print("Discovery for patterns started...") # Set output directory and logger build_name, disc_output_dir = utils.get_operation_build_name_and_dir( @@ -72,7 +73,7 @@ def run_discovery_for_pattern_list(src_dir: Path, pattern_id_list: list[int], la # utils.check_tp_lib(tp_lib_path) d_res = discovery.discovery(Path(src_dir), pattern_id_list, tp_lib_path, itools, language, build_name, - disc_output_dir, ignore=ignore) + disc_output_dir, ignore=ignore, cpg=cpg) print("Discovery for patterns completed.") print(f"- results available here: {disc_output_dir}") print(f"- log file available here: {disc_output_dir / config.logfile}") diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index 9e8964b..36019a1 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -213,6 +213,12 @@ def add_command_subparser(self, subparser): required=True, help="Path to discovery target folder" ) + discovery_parser.add_argument( + "-c", "--cpg", + dest="cpg_existing", + type=str, + help="Specify an already existing CPG in TARGET_DIR instead of letting the framework generate a new one." + ) discovery_parser.add_argument( "-i", "--ignore-measurements", action="store_true", @@ -254,6 +260,7 @@ def execute_command(self, args): tp_lib_path: str = parse_tp_lib(args.tp_lib) target_dir = Path(args.target_discovery) utils.check_target_dir(target_dir) + cpg_name: str = args.cpg_existing output_dir: str = parse_output_dir(args.output_dir) tool_parsed: list[Dict] = parse_tool_list(args.tools) l_pattern_id = parse_patterns(args.all_patterns, args.pattern_range, args.patterns, @@ -261,7 +268,7 @@ def execute_command(self, args): language) try: interface.run_discovery_for_pattern_list(target_dir, l_pattern_id, language, tool_parsed, tp_lib_path, - output_dir=output_dir, ignore=args.ignore) + output_dir=output_dir, ignore=args.ignore, cpg=cpg_name) except InvalidSastTools: print(invalidSastTools()) exit(1) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index 62e7658..a1799c9 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -248,13 +248,19 @@ def discovery(src_dir: Path, l_tp_id: list[int], tp_lib_path: Path, itools: list build_name: str, disc_output_dir: Path, timeout_sec: int = 0, - ignore=False) -> Dict: + ignore=False, + cpg: str = None) -> Dict: logger.info("Discovery for patterns started...") # TODO: to support multiple discovery methods the following would need major refactoring. # - CPG is specific to Joern # - each discovery rule tells which method to use # - on the other hand you do not want to compute the CPG multiple times - cpg: Path = generate_cpg(src_dir, language, build_name, disc_output_dir, timeout_sec=timeout_sec) + + # if a CPG name is specified, expect it in TARGET_DIR. Else, generate new CPG from source + if cpg is not None: + cpg: Path = src_dir / cpg + else: + cpg: Path = generate_cpg(src_dir, language, build_name, disc_output_dir, timeout_sec=timeout_sec) if not ignore: return discovery_under_measurement(cpg, l_tp_id, tp_lib_path, itools, language, build_name, disc_output_dir, timeout_sec=timeout_sec) @@ -469,6 +475,7 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di "method", "queryFile", "queryHash", "queryName", "queryAccuracy", "queryAlreadyExecuted", "discovery", "filename", "lineNumber", "methodFullName"] rows = [] + findings = [] for tp_id in d_res: if d_res[tp_id]["measurement_found"] is False: rows.append( @@ -538,6 +545,7 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di row["discovery"] = f["discovery"] row["queryName"] = f["queryName"] if f["discovery"]: + findings.append(f) row["filename"] = f["filename"] row["lineNumber"] = f["lineNumber"] row["methodFullName"] = f["methodFullName"] @@ -547,6 +555,9 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di pass ofile = disc_output_dir / f"discovery_{build_name}.csv" utils.write_csv_file(ofile, fields, rows) + findings_file = disc_output_dir / f"findings_{build_name}.json" + with open(findings_file, 'w+') as f: + json.dump(findings, f, sort_keys=True, indent=4) d_results = { "discovery_result_file": str(ofile), "results": d_res From 9f49771f1a8213632e9ca442f2eb526c6d047bfe Mon Sep 17 00:00:00 2001 From: felix-20 Date: Thu, 22 Jun 2023 11:33:58 +0200 Subject: [PATCH 05/16] started on refactoring patterns --- qualitytests/core/test_instanceR.py | 112 ++++ qualitytests/core/test_patternR.py | 105 +++ qualitytests/core/test_pattern_repair.py | 37 ++ qualitytests/core/test_utils.py | 16 +- ...> __P@TCHED__1_instance_3_global_array.sc} | 0 tp_framework/cli/interface.py | 48 +- tp_framework/cli/main.py | 4 + tp_framework/cli/tpf_commands.py | 122 +++- tp_framework/core/discovery.py | 31 +- tp_framework/core/errors.py | 23 +- tp_framework/core/exceptions.py | 42 +- tp_framework/core/instance.py | 624 +++++++++++------- tp_framework/core/measurement.py | 2 +- tp_framework/core/pattern.py | 313 ++++++--- tp_framework/core/pattern_operations.py | 81 +-- tp_framework/core/pattern_repair.py | 31 + tp_framework/core/utils.py | 95 ++- tp_framework/tmp.py | 17 + 18 files changed, 1263 insertions(+), 440 deletions(-) create mode 100644 qualitytests/core/test_instanceR.py create mode 100644 qualitytests/core/test_patternR.py create mode 100644 qualitytests/core/test_pattern_repair.py rename qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/{__P@TCHED___1_instance_3_global_array.sc => __P@TCHED__1_instance_3_global_array.sc} (100%) create mode 100644 tp_framework/core/pattern_repair.py create mode 100644 tp_framework/tmp.py diff --git a/qualitytests/core/test_instanceR.py b/qualitytests/core/test_instanceR.py new file mode 100644 index 0000000..2893030 --- /dev/null +++ b/qualitytests/core/test_instanceR.py @@ -0,0 +1,112 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.instance import Instance +from core.exceptions import PatternDoesNotExists, InstanceInvalid +from qualitytests.qualitytests_utils import join_resources_path + +class mockPattern: + def __init__(self) -> None: + self.pattern_id = 1 + +class TestInstance: + sample_tp_lib: Path = join_resources_path("sample_patlib") + + example_instance_dict = { + "code": { + "path": "", + "injection_skeleton_broken": True + }, + "discovery": { + "rule": "", + "method": "joern", + "rule_accuracy": "Perfect", + "notes": "Some notes" + }, + "remediation": { + "notes": "./docs/remediation_notes.md", + "transformation": None, + "modeling_rule": None + }, + "compile": { + "binary": "", + "dependencies": None, + "instruction": None + }, + "expectation": { + "type": "xss", + "sink_file": "", + "sink_line": 5, + "source_file": "", + "source_line": 9, + "expectation": True + }, + "properties": { + "category": "S0", + "feature_vs_internal_api": "FEATURE", + "input_sanitizer": False, + "source_and_sink": False, + "negative_test_case": False + } + } + + invalid_instances = [ + (Path("./test_instance.json"), False, {}, "The provided instance path 'test_instance.json' does not exist."), + (Path("./1_instance_test_pattern.json"), True, {}, "Could not get id from ''."), + (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {}, "Pattern 1 - Instance 1 - Please check 1_instance_test_pattern/1_instance_test_pattern.json."), + (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {"name": "instance"}, "Pattern 1 - Instance 1 - 'code:path' must be contained in instance json.") + ] + + @pytest.mark.parametrize("json_file_path, is_file_return, read_json_return, expected_error", invalid_instances) + def test_init_invalid_instance_from_json_path(self, + json_file_path: Path, + is_file_return: bool, + read_json_return: dict, + expected_error: str): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch('core.utils.read_json') as read_json_mock, \ + pytest.raises(InstanceInvalid) as e_info: + is_file_mock.return_value = is_file_return + read_json_mock.return_value = read_json_return + Instance.init_from_json_path(json_file_path, mockPattern()) + is_file_mock.assert_called_once() + assert f"{expected_error} Instance is invalid." == str(e_info.value) + + def test_init_valid_instance_from_json_path(self): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestInstance.example_instance_dict + test_instance = Instance.init_from_json_path(Path("./1_instance_test_pattern/1_instance_test_pattern.json"), mockPattern()) + + read_json_mock.assert_called_once() + is_file_mock.assert_called_once() + assert Path("./1_instance_test_pattern/") == test_instance.instance_path + assert Path("./1_instance_test_pattern/1_instance_test_pattern.json") == test_instance.instance_json_path + assert 1 == test_instance.instance_id + assert "" == test_instance.code_path + assert test_instance.description is None + assert test_instance.code_injection_skeleton_broken + assert "xss" == test_instance.expectation_type + assert "" == test_instance.expectation_sink_file + assert 5 == test_instance.expectation_sink_line + assert "" == test_instance.expectation_source_file + assert 9 == test_instance.expectation_source_line + assert test_instance.expectation_expectation + assert "" == test_instance.compile_binary + assert test_instance.compile_instruction is None + assert test_instance.compile_dependencies is None + assert "" == test_instance.discovery_rule + assert "joern" == test_instance.discovery_method + assert "Perfect" == test_instance.discovery_rule_accuracy + assert "Some notes" == test_instance.discovery_notes + assert "S0" == test_instance.properties_category + assert "FEATURE" == test_instance.properties_feature_vs_internal_api + assert not test_instance.properties_input_sanitizer + assert not test_instance.properties_source_and_sink + assert not test_instance.properties_negative_test_case + assert "./docs/remediation_notes.md" == test_instance.remediation_notes + assert test_instance.remediation_transformation is None + assert test_instance.remediation_modeling_rule is None \ No newline at end of file diff --git a/qualitytests/core/test_patternR.py b/qualitytests/core/test_patternR.py new file mode 100644 index 0000000..60a52f1 --- /dev/null +++ b/qualitytests/core/test_patternR.py @@ -0,0 +1,105 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.pattern import Pattern +from core.exceptions import PatternDoesNotExists, PatternInvalid +from qualitytests.qualitytests_utils import join_resources_path + +class TestPatternR: + sample_tp_lib: Path = join_resources_path("sample_patlib") + + example_pattern_dict = { + "name": "Test Pattern", + "description": "./docs/description.md", + "family": "test_pattern", + "tags": ["sast", "language"], + "instances": [ + "./1_instance_1_test_pattern/1_instance_1_test_pattern.json" + ] + } + + not_existing_patterns = [(1000, "php"), (1000, "js"), (1000, "java")] + invalid_patterns = [ + (3, "php", {}, "The pattern needs a valid JSON file."), + (3, "php", {"name": "test_instances_key_in_json_missing"}, "Pattern 3 (PHP) - Pattern JSON file needs an 'instances' key with valid relative links."), + (3, "php", {"instances": ["test_instances_invalid_relative_path"]}, "Pattern 3 (PHP) - The instance path 'test_instances_invalid_relative_path' is not valid.") + ] + valid_patterns = [ + (1, "php", example_pattern_dict), + (1, "js", example_pattern_dict) + ] + + valid_patterns_without_id = [ + (Path("path_to_json_file"), "php", Path("pattern_path"), 5), + (Path("path_to_json_file"), "js", Path("pattern_path"), 3) + ] + + @pytest.mark.parametrize("pattern_id, language", not_existing_patterns) + def test_not_exising_pattern_init_from_id_and_language(self, pattern_id: int, language: str): + with pytest.raises(PatternDoesNotExists) as e_info: + Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + assert f"Specified Pattern `{pattern_id}` does not exists." == str(e_info.value) + + @pytest.mark.parametrize("pattern_id, language, read_json_return, expected_assertion_error", invalid_patterns) + def test_init_invalid_pattern_from_id_and_language(self, + pattern_id: int, language: str, + read_json_return: dict, + expected_assertion_error: str): + with patch('core.utils.read_json') as read_json_mock, \ + pytest.raises(PatternInvalid) as e_info: + + read_json_mock.return_value = read_json_return + Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + + read_json_mock.assert_called_once() + assert f"{expected_assertion_error} Pattern is invalid." == str(e_info.value) + + @pytest.mark.parametrize("path_to_json, language, pattern_path, expected_id", valid_patterns_without_id) + def test_init_from_json_file_without_pattern_id(self, path_to_json: Path, language: str, pattern_path: Path, expected_id: int): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = TestPatternR.example_pattern_dict + pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPatternR.sample_tp_lib) + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + assert expected_id == pattern.pattern_id + assert path_to_json == pattern.pattern_json_path + assert pattern_path == pattern.pattern_path + assert language.upper() == pattern.language + + + @pytest.mark.parametrize("pattern_id, language, read_json_return", valid_patterns) + def test_init_valid_pattern_from_id_and_language(self, pattern_id: int, language: str, + read_json_return: dict): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = read_json_return + test_pattern = Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + assert "Test Pattern" == test_pattern.name + assert "./docs/description.md" == test_pattern.description + assert "test_pattern" == test_pattern.family + assert ["sast", "language"] == test_pattern.tags \ No newline at end of file diff --git a/qualitytests/core/test_pattern_repair.py b/qualitytests/core/test_pattern_repair.py new file mode 100644 index 0000000..4a45cc1 --- /dev/null +++ b/qualitytests/core/test_pattern_repair.py @@ -0,0 +1,37 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.pattern import Pattern +from core.pattern_repair import PatternRepair +from qualitytests.qualitytests_utils import join_resources_path + + +class MockedPattern: + def __init__(self) -> None: + self.tp_lib_path: Path = join_resources_path("sample_patlib") + + +class TestPatternRepair: + mocked_pattern = MockedPattern() + + def test_repair_pattern_json(self): + with patch("pathlib.Path.is_file") as is_file_mock_init: + is_file_mock_init.return_value = True + pattern_repair = PatternRepair(TestPatternRepair.mocked_pattern) + + is_file_mock_init.assert_called_once() + + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.get_pattern_json") as get_pattern_json_mock, \ + patch("shutil.copy") as copy_mock: + is_file_mock.return_value = False + get_pattern_json_mock.return_value = None + + pattern_repair.repair_pattern_json() + + is_file_mock.assert_called_once() + get_pattern_json_mock.assert_called_once() + copy_mock.assert_called_once() + + \ No newline at end of file diff --git a/qualitytests/core/test_utils.py b/qualitytests/core/test_utils.py index b86ccd1..417da83 100644 --- a/qualitytests/core/test_utils.py +++ b/qualitytests/core/test_utils.py @@ -5,6 +5,7 @@ import config from core import utils from core.exceptions import PatternDoesNotExists, TPLibDoesNotExist, LanguageTPLibDoesNotExist, DiscoveryMethodNotSupported +from unittest.mock import patch import qualitytests.qualitytests_utils as qualitytests_utils def setup_three_pattern(tmp_path: Path): @@ -209,4 +210,17 @@ def test_get_tpi_id_from_jsonpath(self): assert utils.get_tpi_id_from_jsonpath(jp) == 1 jp = qualitytests_utils.join_resources_path( "sample_patlib") / "PHP" / "3_global_array" / "2_instance_3_global_array" / "111_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 2 \ No newline at end of file + assert utils.get_tpi_id_from_jsonpath(jp) == 2 + + next_free_pattern_id_test_cases = [ + ([Path('1_instance_test_pattern'), Path('2_instance_test_pattern')], 3, 1), + ([Path('1_instance_test_pattern'), Path('3_instance_test_pattern')], 2, 1), + ([Path('1_instance_test_pattern'), Path('3_instance_test_pattern')], 2, 2), + ] + + @pytest.mark.parametrize("list_dir_ret_value, expected_value, proposed_id", next_free_pattern_id_test_cases) + def test_get_next_free_pattern_id_for_language(self, list_dir_ret_value: list, expected_value: int, proposed_id: int): + tp_lib_path = qualitytests_utils.join_resources_path("sample_patlib") + with patch("core.utils.list_dirs_only") as list_dir_mock: + list_dir_mock.return_value = list_dir_ret_value + assert expected_value == utils.get_next_free_pattern_id_for_language("PHP", tp_lib_path) \ No newline at end of file diff --git a/qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED___1_instance_3_global_array.sc b/qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED__1_instance_3_global_array.sc similarity index 100% rename from qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED___1_instance_3_global_array.sc rename to qualitytests/resources/sample_patlib/PHP/3_global_array/1_instance_3_global_array/__P@TCHED__1_instance_3_global_array.sc diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index aa4fa06..f269961 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -10,6 +10,7 @@ import config from core import utils, pattern_operations, measurement, discovery, measure, errors from core.exceptions import PatternValueError +from core.pattern import Pattern # CRUD patterns @@ -23,17 +24,11 @@ def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict print(errors.patternFolderNotFound(pattern_dir_path)) return - if not pattern_json: - # TODO - add_pattern: we could automatically find the json file - default_pattern_json = f"{pattern_dir_path.name}.json" - pattern_json_path: Path = pattern_dir_path / default_pattern_json - if not pattern_json_path.exists(): - print(errors.patternDefaultJSONNotFound(default_pattern_json)) - return - else: - # TODO - add_pattern: handle for both branches the case in which the json file does not exist? - pattern_json_path: Path = Path(pattern_json).resolve() - + pattern_json_path = Path(pattern_json) if pattern_json else utils.get_pattern_json(pattern_dir_path) + if not pattern_json_path: + print(errors.patternDefaultJSONNotFound(pattern_dir)) + return + tp_lib_path.mkdir(exist_ok=True, parents=True) try: @@ -208,4 +203,33 @@ def check_discovery_rules(language: str, pattern_ids: list[int], print(f" - num errors: {d_res['counters']['errors']}") if export_file: print(f"- csv file available here: {output_dir / export_file}") - print(f"- log file available here: {output_dir / config.logfile}") \ No newline at end of file + print(f"- log file available here: {output_dir / config.logfile}") + + +def repair_patterns(language: str, pattern_ids: list, + masking_file: Path, include_README: bool, + measurement_results: Path, checkdiscoveryrule_results: Path, + output_dir: Path, tp_lib_path: Path): + print("Pattern Repair started...") + should_include_readme = not include_README + utils.check_tp_lib(tp_lib_path) + if should_include_readme: + utils.check_file_exist(checkdiscoveryrule_results) + utils.check_file_exist(masking_file, ".json") if masking_file else None + utils.check_measurement_results_exist(measurement_results) + output_dir.mkdir(exist_ok=True, parents=True) + utils.add_loggers(output_dir) + + # for pattern_id in pattern_ids: + # pattern = Pattern.init_from_id_and_language(pat) # (pattern_id, language, tp_lib_path) + # print(pattern) + # # pattern_path = get_pattern_path_by_pattern_id(language, pattern_id, tp_lib_path) + # # PatternRepair( + # # pattern_path, + # # language, + # # tp_lib_path, + # # checkdiscoveryrule_results, + # # masking_file, + # # measurement_results, + # # ).repair(should_include_readme) + # pass \ No newline at end of file diff --git a/tp_framework/cli/main.py b/tp_framework/cli/main.py index 1eb7941..4fb71aa 100644 --- a/tp_framework/cli/main.py +++ b/tp_framework/cli/main.py @@ -20,6 +20,7 @@ def main(args=None): manual_discovery_cmd = tpf_commands.ManualDiscovery() sastreport_cmd = tpf_commands.Report() check_discovery_rules_cmd = tpf_commands.CheckDiscoveryRules() + pattern_repair_cmd = tpf_commands.PatternRepair() # Sub-parsers subparser = parser.add_subparsers(title="Commands", dest="command", metavar="") add_pattern_cmd.add_command_subparser(subparser) @@ -29,6 +30,7 @@ def main(args=None): manual_discovery_cmd.add_command_subparser(subparser) sastreport_cmd.add_command_subparser(subparser) check_discovery_rules_cmd.add_command_subparser(subparser) # TODO: in-progress, not tested + pattern_repair_cmd.add_command_subparser(subparser) # Parsing args: Namespace = parser.parse_args(args) @@ -50,6 +52,8 @@ def main(args=None): sastreport_cmd.execute_command(args) case "checkdiscoveryrules": check_discovery_rules_cmd.execute_command(args) + case "patternrepair": + pattern_repair_cmd.execute_command(args) case other: print("Command not valid...") exit(1) diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index 36019a1..102ffe3 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -9,6 +9,7 @@ from core.exceptions import InvalidSastTools from core.errors import invalidSastTools +from core.pattern import Pattern class Command(ABC): @@ -514,6 +515,93 @@ def execute_command(self, args): export_file=args.export, output_dir=output_dir) +class PatternRepair(Command): + + # overriding abstract method + def add_command_subparser(self, subparser): + pattern_repair_parser = subparser.add_parser("patternrepair", + help="Repair patterns in your catalogue, helps you keeping the structure of all patterns the same") + pattern_repair_parser_pattern_selection_mode = pattern_repair_parser.add_mutually_exclusive_group(required=True) + pattern_repair_parser.add_argument( + "-l", "--language", + metavar="LANGUAGE", + dest="language", + required=True, + help="Programming language targeted" + ) + pattern_repair_parser_pattern_selection_mode.add_argument( + "-p", "--patterns", + metavar="PATTERN_ID", + dest="patterns", + nargs="+", + type=int, + help="Specify pattern(s) ID(s) to test for discovery" + ) + pattern_repair_parser_pattern_selection_mode.add_argument( + "--pattern-range", + metavar="RANGE_START-RANGE_END", + dest="pattern_range", + type=str, + help="Specify pattern ID range separated by`-` (ex. 10-50)" + ) + pattern_repair_parser_pattern_selection_mode.add_argument( + "-a", "--all-patterns", + dest="all_patterns", + action="store_true", + help="Test discovery for all available patterns" + ) + pattern_repair_parser.add_argument( + "--tp-lib", + metavar="TP_LIB_DIR", + dest="tp_lib", + help=f"Absolute path to alternative pattern library, default resolves to `./{config.TP_LIB_REL_DIR}`" + ) + pattern_repair_parser.add_argument( + "--output-dir", + metavar="OUTPUT_DIR", + dest="output_dir", + help=f"Absolute path to the folder where outcomes (e.g., log file, export file if any) will be stored, default resolves to `./{config.RESULT_REL_DIR}`" + ) + pattern_repair_parser.add_argument( + "--masking-file", + metavar="MASKING_FILE", + dest="masking_file", + help=f"Absolute path to a json file, that contains a mapping, if the name for some measurement tools should be kept secret, default is None" + ) + pattern_repair_parser.add_argument( + "--measurement-results", + metavar="MEASUREMENT_DIR", + dest="measurement_dir", + help=f"Absolute path to the folder where measurement results are stored, default resolves to `./{config.MEASUREMENT_REL_DIR}`" + ) + pattern_repair_parser.add_argument( + "--checkdiscoveryrules-results", + metavar="CHECKDISCOVERYRULES_FILE", + dest="checkdiscoveryrules_file", + help=f"Absolute path to the csv file, where the results of the `checkdiscoveryrules` command are stored, default resolves to `./checkdiscoveryrules.csv`" + ) + pattern_repair_parser.add_argument( + "--skip-readme", + dest="skip_readme", + action="store_true", + help="If set, the README generation is skipped." + ) + # overriding abstract method + def execute_command(self, args): + language: str = args.language.upper() + tp_lib_path: str = parse_tp_lib(args.tp_lib) + l_pattern_id = sorted(parse_patterns(args.all_patterns, args.pattern_range, args.patterns, + tp_lib_path, + language)) + output_dir: Path = parse_dir_or_file(args.output_dir) + measurement_results: Path = parse_dir_or_file(args.measurement_dir, config.MEASUREMENT_REL_DIR, "Measurement directory") + checkdiscoveryrules_results: Path = parse_dir_or_file(args.checkdiscoveryrules_file, "checkdiscoveryrules.csv", "Checkdiscoveryrules csv file") + masking_file: Path or None = parse_dir_or_file(args.masking_file) if args.masking_file else None + interface.repair_patterns(language=language, pattern_ids=l_pattern_id, + masking_file=masking_file, include_README=args.skip_readme, + measurement_results=measurement_results, checkdiscoveryrule_results=checkdiscoveryrules_results, + output_dir=output_dir, tp_lib_path=tp_lib_path) + # class Template(Command): # # # overriding abstract method @@ -561,28 +649,48 @@ def parse_tool_list(tools: list[str]): def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path: Path, language: str): + # is this necessary? Should be ensured by `.add_mutually_exclusive_group(required=True)` in the parser try: assert sum(bool(e) for e in [all_patterns, pattern_range, patterns]) == 1 # these elements are in mutual exclusion except Exception as e: print("The following parameters are in mutual exclusion: `--all-patterns`, `--pattern-range`, and `--patterns`") exit(1) + id_list: list[int] = [] if all_patterns: lang_tp_lib_path: Path = tp_lib_path / language utils.check_lang_tp_lib_path(lang_tp_lib_path) try: id_list: list[int] = list(map(lambda d: utils.get_id_from_name(d.name), utils.list_dirs_only(lang_tp_lib_path))) - return id_list except Exception as e: print("Some patterns could not be properly fetched from the pattern library.") + print(e) exit(1) - if pattern_range: + elif pattern_range: try: spattern_range: str = pattern_range.split("-") - pattern_id_list: list[int] = list(range(int(spattern_range[0]), int(spattern_range[1]) + 1)) - return pattern_id_list + id_list: list[int] = list(range(int(spattern_range[0]), int(spattern_range[1]) + 1)) except Exception as e: - print("Pattern range could not be properly parsed. ") + print("Pattern range could not be properly parsed.") + print(e) exit(1) - if patterns and len(patterns) > 0: - return patterns \ No newline at end of file + elif patterns and len(patterns) > 0: + id_list = patterns + # init a Pattern to make sure, all the patterns that should be used for the task are valid. + # return only the pattern_id, to be compatible with current implementation + # TODO: refactor to use the Pattern instances instead of the ids + return sorted([Pattern.init_from_id_and_language(idx, language, tp_lib_path).pattern_id \ + for idx in id_list]) + + +def parse_dir_or_file(path_to_file_or_dir: str, + default_path: str = config.RESULT_DIR, + name: str = "Output directory") -> Path: + if not path_to_file_or_dir: + path_to_file_or_dir: str = str(default_path) + try: + path_to_file_or_dir_as_path: Path = Path(path_to_file_or_dir).resolve() + return path_to_file_or_dir_as_path + except Exception as e: + print(f"{name} is wrong: {path_to_file_or_dir}") + exit(1) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index a1799c9..656c59e 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -17,7 +17,7 @@ CPGLanguageNotSupported, DiscoveryRuleError, DiscoveryRuleParsingResultError, InvalidSastTools from core.measurement import Measurement -from core.instance import Instance, instance_from_dict, load_instance_from_metadata +from core.instance import Instance #, instance_from_dict, load_instance_from_metadata from core.pattern import get_pattern_by_pattern_id # mand_finding_joern_keys = ["filename", "methodFullName", "lineNumber"] @@ -738,7 +738,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], (i + 1, len(l_tp_id), tp_id) # tp_info )) try: - target_tp, p_dir = get_pattern_by_pattern_id(language, tp_id, tp_lib_path) + target_tp, _ = get_pattern_by_pattern_id(language, tp_id, tp_lib_path) l_tpi_dir: list[Path] = utils.list_tpi_paths_by_tp_id( language, tp_id, tp_lib_path ) @@ -749,21 +749,16 @@ def check_discovery_rules(language: str, l_tp_id: list[int], results.append(res) err += 1 continue - for j, path in enumerate(l_tpi_dir): + for j, instance_path in enumerate(l_tpi_dir): try: - target_src = path.parent - # TODO: use a function to load an instance, in general it looks to me we are going a bit back and forth - # from json and file system. - # Also: this loading seems to be used in many other places (e.g., start_add_measurement_for_pattern)... - with open(path) as instance_json_file: - instance_json: Dict = json.load(instance_json_file) - - tpi_id = utils.get_id_from_name(path.name) + target_src = instance_path.parent + target_instance: Instance = Instance.init_from_json_path(instance_path, target_tp) + + tpi_id = utils.get_id_from_name(instance_path.name) logger.info(utils.get_tpi_op_status_string( (i + 1, len(l_tp_id), tp_id), t_tpi_info=(j + 1, len(l_tpi_dir), tpi_id) )) - target_instance: Instance = instance_from_dict(instance_json, target_tp, language, tpi_id) if target_instance.discovery_rule: dr_path = target_src / target_instance.discovery_rule @@ -771,7 +766,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], logger.warning( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule {dr_path} does not exist") res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, discovery_rule=dr_path) + instance_path=instance_path, discovery_rule=dr_path) results.append(res) err += 1 continue @@ -784,12 +779,12 @@ def check_discovery_rules(language: str, l_tp_id: list[int], if d_results["findings"] and any( f["result"] == discovery_result_strings["discovery"] for f in d_results["findings"]): res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, pattern_name=target_tp.name, + instance_path=instance_path, pattern_name=target_tp.name, discovery_rule=dr_path, successful="yes") success += 1 else: res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, pattern_name=target_tp.name, + instance_path=instance_path, pattern_name=target_tp.name, discovery_rule=dr_path, successful="no") unsuccess += 1 results.append(res) @@ -797,7 +792,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], logger.info( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule is not provided for the pattern") res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=path, successful="missing") + instance_path=instance_path, successful="missing") results.append(res) missing += 1 logger.info(utils.get_tpi_op_status_string( @@ -807,8 +802,8 @@ def check_discovery_rules(language: str, l_tp_id: list[int], )) except Exception as e: logger.warning( - f"Something went wrong for the instance at {path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(tp_id, language, pattern_name=target_tp.name, instance_path=path) + f"Something went wrong for the instance at {instance_path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") + res = get_check_discovery_rule_result(tp_id, language, pattern_name=target_tp.name, instance_path=instance_path) results.append(res) err += 1 continue diff --git a/tp_framework/core/errors.py b/tp_framework/core/errors.py index 8d0e99b..24c32da 100644 --- a/tp_framework/core/errors.py +++ b/tp_framework/core/errors.py @@ -2,10 +2,13 @@ def patternDoesNotExists(pattern_id): return f"Specified Pattern `{pattern_id}` does not exists." -def patternValueError(): +def patternValueError(): # TODO: can we get rid of that? return f"Error during Pattern initialization." +def patternInvalidError(e): + return f"{e} Pattern is invalid." + def patternKeyError(e): return f"Key {e} was not found in pattern metadata." @@ -22,6 +25,8 @@ def instanceDoesNotExists(instance_id=None, ref_metadata=None): message = f"Specified Pattern Instance at `{ref_metadata}` does not exists." return message +def instanceInvalidError(e): + return f"{e} Instance is invalid." def patternFolderNotFound(pattern_dir_path): return f"`Pattern source folder {pattern_dir_path}` not found or is not a folder." @@ -84,4 +89,18 @@ def discoveryRuleParsingResultError(): def unexpectedException(e): - return f"Unexpected exception triggered: {e}." \ No newline at end of file + return f"Unexpected exception triggered: {e}." + + +def measurementResultsDirDoesNotExist(): + return "The directory with the measurements does not exist." + + +def fileDoesNotExist(): + return "The file you provided for does not exist or is the wrong file type." + + +def templateDirDoesNotExist(not_exisitng_dir_or_file): + return f"Your tplib does not have {not_exisitng_dir_or_file}." + + diff --git a/tp_framework/core/exceptions.py b/tp_framework/core/exceptions.py index 281221e..24a2ec1 100644 --- a/tp_framework/core/exceptions.py +++ b/tp_framework/core/exceptions.py @@ -8,6 +8,18 @@ def __init__(self, pattern_id): super().__init__(self.message) +class PatternInvalid(Exception): + def __init__(self, message: str) -> None: + self.message = errors.patternInvalidError(message) + super().__init__(self.message) + + +class PatternRepairError(Exception): + def __init__(self, message: str) -> None: + self.message = message + super().__init__(self.message) + + class InstanceDoesNotExists(Exception): def __init__(self, instance_id: int = None, ref_metadata: str = None): self.instance_id = instance_id @@ -16,6 +28,12 @@ def __init__(self, instance_id: int = None, ref_metadata: str = None): super().__init__(self.message) +class InstanceInvalid(Exception): + def __init__(self, message: str) -> None: + self.message = errors.instanceInvalidError(message) + super().__init__(self.message) + + class PatternFolderNotFound(Exception): def __init__(self): self.message = errors.patternFolderNotFound() @@ -85,7 +103,7 @@ def __init__(self, message=None, discovery_method=None): super().__init__(self.message) -class PatternValueError(Exception): +class PatternValueError(Exception): # TODO: can we get rid of this? def __init__(self, message=None): if message: self.message = message @@ -121,4 +139,24 @@ def __init__(self, stderr=None): self.message = stderr else: self.message = errors.discoveryRuleParsingResultError() - super().__init__(self.message) \ No newline at end of file + super().__init__(self.message) + +# Pattern Repair + +class MeasurementResultsDoNotExist(Exception): + def __init__(self, message=errors.measurementResultsDirDoesNotExist()): + self.message = message + super().__init__(self.message) + + +class FileDoesNotExist(Exception): + def __init__(self, message=errors.fileDoesNotExist()): + self.message = message + super().__init__(self.message) + + +class TemplateDoesNotExist(Exception): + def __init__(self, message=errors.templateDirDoesNotExist('template')) -> None: + self.message = message + super().__init__(self.message) + diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index 81864e4..f8e6d07 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -1,247 +1,377 @@ -import json -from enum import Enum -from pathlib import Path -from typing import Dict - -from core import utils -from core.exceptions import PatternDoesNotExists, InstanceDoesNotExists -from core.pattern import Pattern, get_pattern_path_by_pattern_id, get_pattern_by_pattern_id - -import logging -from core import loggermgr - -logger = logging.getLogger(loggermgr.logger_name(__name__)) - -class PatternCategory(str, Enum): - S0 = "S0" - D1 = "D1" - D2 = "D2" - D3 = "D3" - D4 = "D4" - - -class FeatureVsInternalApi(str, Enum): - FEATURE = "FEATURE" - INTERNAL_API = "INTERNAL_API" - - -class Instance(Pattern): - # TODO - pattern instance: update to current structure 09/2022 - ''' - ''' - - def __init__( - self, - code_path: Path, - code_injection_skeleton_broken: bool, - compile_dependencies: Path, # added 092022 - compile_binary: Path, - compile_instruction: str, # added 092022 - remediation_transformation: str, # added 092022 - remediation_modeling_rule: Path, # added 092022 - remediation_notes: str, # added 092022 - properties_category: PatternCategory, - properties_negative_test_case: bool, - properties_source_and_sink: bool, - properties_input_sanitizer: bool, - properties_feature_vs_internal_api: FeatureVsInternalApi, - discovery_rule: Path, - discovery_method: str, - discovery_rule_accuracy: str, - discovery_notes: str, - expectation: bool, - expectation_type: str, - expectation_sink_file: Path, - expectation_sink_line: int, - expectation_source_file: Path, - expectation_source_line: int, - name: str, - description: str, - family: str, - tags: list[str], - instances: list[Path], - language: str, - pattern_id: int = None, - instance_id: int = None, - pattern_dir: Path = None, - ) -> None: - if pattern_id is None: - super().__init__(name, language, instances, family, description, tags, pattern_dir=pattern_dir) - else: - super().__init__(name, language, instances, family, description, tags, pattern_id) - - self.code_injection_skeleton_broken = code_injection_skeleton_broken - self.compile_dependencies = compile_dependencies # added 092022 - self.compile_binary = compile_binary - self.compile_instruction = compile_instruction # added 092022 - self.remediation_transformation = remediation_transformation # added 092022 - self.remediation_modeling_rule = remediation_modeling_rule # added 092022 - self.remediation_notes = remediation_notes # added 092022 - self.properties_category = properties_category - self.properties_negative_test_case = properties_negative_test_case - self.properties_source_and_sink = properties_source_and_sink - self.properties_input_sanitizer = properties_input_sanitizer - self.properties_feature_vs_internal_api = properties_feature_vs_internal_api - self.expectation = expectation - self.discovery_rule = discovery_rule - self.discovery_method = discovery_method - self.discovery_rule_accuracy = discovery_rule_accuracy - self.discovery_notes = discovery_notes - self.expectation_type = expectation_type - self.expectation_sink_file = expectation_sink_file - self.expectation_sink_line = expectation_sink_line - self.expectation_source_file = expectation_source_file - self.expectation_source_line = expectation_source_line - self.instance_id = instance_id or self.define_instance_id(pattern_dir) - if code_path is None: - logger.warning("Instance without code snippet cannot even be measured by SAST tools: pattern {0}, instance {1}".format(name, instance_id)) - self.code_path = "" - else: - self.code_path = code_path - - - def define_instance_id(self, pattern_dir: Path) -> int: - try: - inst_list: list[Path] = utils.list_tpi_paths_by_tp_id( - self.language, self.pattern_id, pattern_dir) - id_list: list[int] = sorted(list(map(lambda x: int(str(x.name).split("_")[0]), inst_list))) - return id_list[-1] + 1 if len(id_list) > 0 else 1 - except PatternDoesNotExists: - return 1 - - def add_instance_to_pattern_id(self, language: str, pattern_src_dir: Path, pattern_dir: Path) -> None: - instance_dir_name: str = utils.get_instance_dir_name_from_pattern(pattern_src_dir.name, self.pattern_id, - self.instance_id) - pattern_dir_name: str = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, self.pattern_id) - instance_dir: Path = pattern_dir / language / pattern_dir_name / instance_dir_name - instance_dir.mkdir(exist_ok=True, parents=True) - instance_json_file: Path = instance_dir / f"{instance_dir_name}.json" - - with open(instance_json_file, "w") as json_file: - instance_dict: Dict = { - "code": { - "path": utils.get_relative_path_str_or_none(self.code_path), - "injection_skeleton_broken": self.code_injection_skeleton_broken - }, - "remediation": { - "notes": self.remediation_notes, - "transformation": self.remediation_transformation, - "modeling_rule": utils.get_relative_path_str_or_none(self.remediation_modeling_rule) - }, - "discovery": { - "rule": utils.get_relative_path_str_or_none(self.discovery_rule), - "method": self.discovery_method, - "rule_accuracy": self.discovery_rule_accuracy, - "notes": self.discovery_notes - }, - "compile": { - "binary": utils.get_relative_path_str_or_none(self.compile_binary), - "dependencies": utils.get_relative_path_str_or_none(self.compile_dependencies), - "instruction": self.compile_instruction - }, - "expectation": { - "type": self.expectation_type, - "sink_file": utils.get_relative_path_str_or_none(self.expectation_sink_file), - "sink_line": self.expectation_sink_line, - "source_file": utils.get_relative_path_str_or_none(self.expectation_source_file), - "source_line": self.expectation_source_line, - "expectation": self.expectation - }, - "properties": { - "category": utils.get_enum_value_or_none(self.properties_category), - "feature_vs_internal_api": utils.get_enum_value_or_none(self.properties_feature_vs_internal_api), - "input_sanitizer": self.properties_input_sanitizer, - "source_and_sink": self.properties_source_and_sink, - "negative_test_case": self.properties_negative_test_case - } - } - json.dump(instance_dict, json_file, indent=4) - - -# TODO (old): Test this -def get_instance_by_instance_id(language: str, instance_id: int, pattern_id, pattern_dir: Path) -> Instance: - instance_dir: Path = get_instance_path_from_instance_id(language, pattern_id, instance_id, pattern_dir) - instance_json: Path = instance_dir / f"{instance_dir.name}.json" - with open(instance_json) as json_file: - pattern_from_json: Dict = json.load(json_file) - - pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, pattern_dir) - return instance_from_dict(pattern_from_json, pattern, language, pattern_id) - - -def get_instance_path_from_instance_id(language: str, pattern_id: int, instance_id: int, pattern_dir: Path) -> Path: - pattern_path: Path = get_pattern_path_by_pattern_id(language, pattern_id, pattern_dir) - filtered_res: list[str] = list(filter( - lambda x: int(x.split("_")[0]) == instance_id, - map(lambda y: y.name, utils.list_dirs_only(pattern_path)) - )) - if not filtered_res: - raise InstanceDoesNotExists(instance_id) - return pattern_path / filtered_res[0] - - -def instance_from_dict(instance_dict: Dict, pattern: Pattern, language: str, instance_id: int) -> Instance: - return Instance( - utils.get_path_or_none(utils.get_from_dict(instance_dict, "code", "path")), # code_path: Path, - utils.get_from_dict(instance_dict, "code", "injection_skeleton_broken"), # code_injection_skeleton_broken: bool, - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "dependencies")), # compile_dependencies: Path, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "binary")), # compile_binary: Path, - utils.get_from_dict(instance_dict, "compile", "instruction"), # compile_instruction: str, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "transformation"), # remediation_transformation: str, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "remediation", "modeling_rule")), # remediation_modeling_rule: Path, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "notes"), # remediation_notes: str, # added 092022 - get_pattern_category_or_none(utils.get_from_dict(instance_dict, "properties", "category")), - utils.get_from_dict(instance_dict, "properties", "negative_test_case"), - utils.get_from_dict(instance_dict, "properties", "source_and_sink"), - utils.get_from_dict(instance_dict, "properties", "input_sanitizer"), - get_feature_vs_internal_api_or_none(utils.get_from_dict(instance_dict, "properties", "feature_vs_internal_api")), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "discovery", "rule")), - utils.get_from_dict(instance_dict, "discovery", "method"), - utils.get_from_dict(instance_dict, "discovery", "rule_accuracy"), - utils.get_from_dict(instance_dict, "discovery", "notes"), - utils.get_from_dict(instance_dict, "expectation", "expectation"), - utils.get_from_dict(instance_dict, "expectation", "type"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "sink_file")), - utils.get_from_dict(instance_dict, "expectation", "sink_line"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "source_file")), - utils.get_from_dict(instance_dict, "expectation", "source_line"), - pattern.name, - pattern.description, - pattern.family, - pattern.tags, - pattern.instances, - language, - pattern_id=pattern.pattern_id, - instance_id=instance_id - ) - - -def load_instance_from_metadata(metadata: str, tp_lib: Path, language: str) -> Instance: - metadata_path: Path = tp_lib / metadata - if not metadata_path.exists(): - raise InstanceDoesNotExists(ref_metadata=metadata_path.name) - - with open(metadata_path) as file: - try: - instance: Dict = json.load(file) - except Exception as e: - raise e - - pattern_id = utils.get_id_from_name(metadata_path.parent.parent.name) - pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, tp_lib) - instance_id = utils.get_id_from_name(metadata_path.parent.name) - return instance_from_dict(instance, pattern, language, instance_id) - - -def get_pattern_category_or_none(el) -> PatternCategory | None: - try: - return PatternCategory(el) - except ValueError: - return None - - -def get_feature_vs_internal_api_or_none(el) -> FeatureVsInternalApi | None: - try: - return FeatureVsInternalApi(el) - except ValueError: - return None \ No newline at end of file +import json +import shutil +from pathlib import Path +from os import listdir + +from core import utils +from core.exceptions import InstanceInvalid + +class Instance: + @classmethod + def init_from_json_path(cls, path_to_instance_json: Path, pattern_id=None): + if not path_to_instance_json.is_file(): + raise InstanceInvalid(f"The provided instance path '{path_to_instance_json}' does not exist.") + return cls._init_from_json(cls(), path_to_instance_json, pattern_id) + + def __init__(self) -> None: + self.instance_path = None + self.instance_json_path = None + self.instance_id = None + self.pattern_id = None + + # JSON fields + self.description = None + self.code_path = None + self.code_injection_skeleton_broken = None + self.expectation_type = None + self.expectation_sink_file = None + self.expectation_sink_line = None + self.expectation_source_file = None + self.expectation_source_line = None + self.expectation_expectation = None + self.compile_binary = None + self.compile_instruction = None + self.compile_dependencies = None + self.discovery_rule = None + self.discovery_method = None + self.discovery_rule_accuracy = None + self.discovery_notes = None + self.properties_category = None + self.properties_feature_vs_internal_api = None + self.properties_input_sanitizer = None + self.properties_source_and_sink = None + self.properties_negative_test_case = None + self.remediation_notes = None + self.remediation_transformation = None + self.remediation_modeling_rule = None + + self.attributes_with_type_path = ['instance_path', 'instance_json_path'] + + def _assert_instance(self): + try: + int(self.instance_id) + int(self.pattern_id) + assert self.instance_path.is_dir() + assert self.instance_json_path.is_file() + assert self.code_path.is_file() + except Exception as e: + raise InstanceInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") + + def _init_from_json(self, path_to_instance_json: Path, pattern_id): + self.instance_path = path_to_instance_json.parent + self.instance_json_path = Path(path_to_instance_json.name) + try: + self.instance_id = utils.get_id_from_name(self.instance_path.name) + except Exception as e: + raise InstanceInvalid(f"Could not get id from '{self.instance_path.name}'.") + + self.pattern_id = pattern_id + + # enforced values + instance_properties = utils.read_json(self.instance_json_path) + if not instance_properties: + raise InstanceInvalid(f"{self._log_prefix()}Please check {self.instance_json_path}.") + + self.description = instance_properties.get("description", None) + self.code_path = Path(instance_properties.get("code", {}).get("path", None)) + self.code_injection_skeleton_broken = instance_properties.get("code", {}).get("injection_skeleton_broken", None) + self.expectation_type = instance_properties.get("expectation", {}).get("type", None) + self.expectation_sink_file = Path(instance_properties.get("expectation", {}).get("sink_file", None)) + self.expectation_sink_line = instance_properties.get("expectation", {}).get("sink_line", None) + self.expectation_source_file = Path(instance_properties.get("expectation", {}).get("source_file", None)) + self.expectation_source_line = instance_properties.get("expectation", {}).get("source_line", None) + self.expectation_expectation = instance_properties.get("expectation", {}).get("expectation", None) + self.compile_binary = instance_properties.get("compile", {}).get("binary", None) + self.compile_instruction = instance_properties.get("compile", {}).get("instruction", None) + self.compile_dependencies = instance_properties.get("compile", {}).get("dependencies", None) + self.discovery_rule = Path(instance_properties.get("discovery", {}).get("rule", None)) + self.discovery_method = instance_properties.get("discovery", {}).get("method", None) + self.discovery_rule_accuracy = instance_properties.get("discovery", {}).get("rule_accuracy", None) + self.discovery_notes = instance_properties.get("discovery", {}).get("notes", None) + self.properties_category = instance_properties.get("properties", {}).get("category", None) + self.properties_feature_vs_internal_api = instance_properties.get("properties", {}).get("feature_vs_internal_api", None) + self.properties_input_sanitizer = instance_properties.get("properties", {}).get("input_sanitizer", None) + self.properties_source_and_sink = instance_properties.get("properties", {}).get("source_and_sink", None) + self.properties_negative_test_case = instance_properties.get("properties", {}).get("negative_test_case", None) + self.remediation_notes = instance_properties.get("remediation", {}).get("notes", None) + self.remediation_transformation = instance_properties.get("remediation", {}).get("transformation", None) + self.remediation_modeling_rule = instance_properties.get("remediation", {}).get("modeling_rule", None) + return self + + def __getattribute__(self, name): + base_path = super().__getattribute__("instance_path") + attr = super().__getattribute__(name) + if isinstance(attr, Path) and attr != base_path: + attr = base_path / attr + return attr + + def _log_prefix(self): + return f"Pattern {self.pattern_id} - Instance {self.instance_id} - " + + def _make_path(self, path_name: str): + return Path(self.instance_path / path_name).resolve() if path_name else None + + def __str__(self) -> str: + return f"Instance {self.instance_id}" + + def copy_to_tplib(self, pattern_path: Path): + new_instance_path = pattern_path / self.instance_path.name + new_instance_path.mkdir(parents=True, exist_ok=True) + utils.copy_dir_content(self.instance_path, new_instance_path) + self.instance_path = new_instance_path + + +if __name__ == "__main__": + p = Path(__file__).parent.parent.parent / 'testability_patterns' / 'PHP' / '1_static_variables' / '1_instance_1_static_variables' / '1_instance_1_static_variables.json' + i = Instance.init_from_json_path(p, 1) + print('\033[92m', i.code_path, '\033[0m') + i.instance_path = "/tmp" + print('\033[92m', i.code_path, '\033[0m') + +# import json +# from enum import Enum +# from pathlib import Path +# from typing import Dict + +# from core import utils +# from core.exceptions import PatternDoesNotExists, InstanceDoesNotExists +# from core.pattern import Pattern, get_pattern_path_by_pattern_id, get_pattern_by_pattern_id + +# import logging +# from core import loggermgr + +# logger = logging.getLogger(loggermgr.logger_name(__name__)) + +# class PatternCategory(str, Enum): +# S0 = "S0" +# D1 = "D1" +# D2 = "D2" +# D3 = "D3" +# D4 = "D4" + + +# class FeatureVsInternalApi(str, Enum): +# FEATURE = "FEATURE" +# INTERNAL_API = "INTERNAL_API" + + +# class Instance(Pattern): +# # TODO - pattern instance: update to current structure 09/2022 +# ''' +# ''' + +# def __init__( +# self, +# code_path: Path, +# code_injection_skeleton_broken: bool, +# compile_dependencies: Path, # added 092022 +# compile_binary: Path, +# compile_instruction: str, # added 092022 +# remediation_transformation: str, # added 092022 +# remediation_modeling_rule: Path, # added 092022 +# remediation_notes: str, # added 092022 +# properties_category: PatternCategory, +# properties_negative_test_case: bool, +# properties_source_and_sink: bool, +# properties_input_sanitizer: bool, +# properties_feature_vs_internal_api: FeatureVsInternalApi, +# discovery_rule: Path, +# discovery_method: str, +# discovery_rule_accuracy: str, +# discovery_notes: str, +# expectation: bool, +# expectation_type: str, +# expectation_sink_file: Path, +# expectation_sink_line: int, +# expectation_source_file: Path, +# expectation_source_line: int, +# name: str, +# description: str, +# family: str, +# tags: list[str], +# instances: list[Path], +# language: str, +# pattern_id: int = None, +# instance_id: int = None, +# pattern_dir: Path = None, +# ) -> None: +# if pattern_id is None: +# super().__init__(name, language, instances, family, description, tags, pattern_dir=pattern_dir) +# else: +# super().__init__(name, language, instances, family, description, tags, pattern_id) + +# self.code_injection_skeleton_broken = code_injection_skeleton_broken +# self.compile_dependencies = compile_dependencies # added 092022 +# self.compile_binary = compile_binary +# self.compile_instruction = compile_instruction # added 092022 +# self.remediation_transformation = remediation_transformation # added 092022 +# self.remediation_modeling_rule = remediation_modeling_rule # added 092022 +# self.remediation_notes = remediation_notes # added 092022 +# self.properties_category = properties_category +# self.properties_negative_test_case = properties_negative_test_case +# self.properties_source_and_sink = properties_source_and_sink +# self.properties_input_sanitizer = properties_input_sanitizer +# self.properties_feature_vs_internal_api = properties_feature_vs_internal_api +# self.expectation = expectation +# self.discovery_rule = discovery_rule +# self.discovery_method = discovery_method +# self.discovery_rule_accuracy = discovery_rule_accuracy +# self.discovery_notes = discovery_notes +# self.expectation_type = expectation_type +# self.expectation_sink_file = expectation_sink_file +# self.expectation_sink_line = expectation_sink_line +# self.expectation_source_file = expectation_source_file +# self.expectation_source_line = expectation_source_line +# self.instance_id = instance_id or self.define_instance_id(pattern_dir) +# if code_path is None: +# logger.warning("Instance without code snippet cannot even be measured by SAST tools: pattern {0}, instance {1}".format(name, instance_id)) +# self.code_path = "" +# else: +# self.code_path = code_path + + +# def define_instance_id(self, pattern_dir: Path) -> int: +# try: +# inst_list: list[Path] = utils.list_tpi_paths_by_tp_id( +# self.language, self.pattern_id, pattern_dir) +# id_list: list[int] = sorted(list(map(lambda x: int(str(x.name).split("_")[0]), inst_list))) +# return id_list[-1] + 1 if len(id_list) > 0 else 1 +# except PatternDoesNotExists: +# return 1 + +# def add_instance_to_pattern_id(self, language: str, pattern_src_dir: Path, pattern_dir: Path) -> None: +# instance_dir_name: str = utils.get_instance_dir_name_from_pattern(pattern_src_dir.name, self.pattern_id, +# self.instance_id) +# pattern_dir_name: str = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, self.pattern_id) +# instance_dir: Path = pattern_dir / language / pattern_dir_name / instance_dir_name +# instance_dir.mkdir(exist_ok=True, parents=True) +# instance_json_file: Path = instance_dir / f"{instance_dir_name}.json" + +# with open(instance_json_file, "w") as json_file: +# instance_dict: Dict = { +# "code": { +# "path": utils.get_relative_path_str_or_none(self.code_path), +# "injection_skeleton_broken": self.code_injection_skeleton_broken +# }, +# "remediation": { +# "notes": self.remediation_notes, +# "transformation": self.remediation_transformation, +# "modeling_rule": utils.get_relative_path_str_or_none(self.remediation_modeling_rule) +# }, +# "discovery": { +# "rule": utils.get_relative_path_str_or_none(self.discovery_rule), +# "method": self.discovery_method, +# "rule_accuracy": self.discovery_rule_accuracy, +# "notes": self.discovery_notes +# }, +# "compile": { +# "binary": utils.get_relative_path_str_or_none(self.compile_binary), +# "dependencies": utils.get_relative_path_str_or_none(self.compile_dependencies), +# "instruction": self.compile_instruction +# }, +# "expectation": { +# "type": self.expectation_type, +# "sink_file": utils.get_relative_path_str_or_none(self.expectation_sink_file), +# "sink_line": self.expectation_sink_line, +# "source_file": utils.get_relative_path_str_or_none(self.expectation_source_file), +# "source_line": self.expectation_source_line, +# "expectation": self.expectation +# }, +# "properties": { +# "category": utils.get_enum_value_or_none(self.properties_category), +# "feature_vs_internal_api": utils.get_enum_value_or_none(self.properties_feature_vs_internal_api), +# "input_sanitizer": self.properties_input_sanitizer, +# "source_and_sink": self.properties_source_and_sink, +# "negative_test_case": self.properties_negative_test_case +# } +# } +# json.dump(instance_dict, json_file, indent=4) + + +# # TODO (old): Test this +# def get_instance_by_instance_id(language: str, instance_id: int, pattern_id, pattern_dir: Path) -> Instance: +# instance_dir: Path = get_instance_path_from_instance_id(language, pattern_id, instance_id, pattern_dir) +# instance_json: Path = instance_dir / f"{instance_dir.name}.json" +# with open(instance_json) as json_file: +# pattern_from_json: Dict = json.load(json_file) + +# pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, pattern_dir) +# return instance_from_dict(pattern_from_json, pattern, language, pattern_id) + + +# def get_instance_path_from_instance_id(language: str, pattern_id: int, instance_id: int, pattern_dir: Path) -> Path: +# pattern_path: Path = get_pattern_path_by_pattern_id(language, pattern_id, pattern_dir) +# filtered_res: list[str] = list(filter( +# lambda x: int(x.split("_")[0]) == instance_id, +# map(lambda y: y.name, utils.list_dirs_only(pattern_path)) +# )) +# if not filtered_res: +# raise InstanceDoesNotExists(instance_id) +# return pattern_path / filtered_res[0] + + +# def instance_from_dict(instance_dict: Dict, pattern: Pattern, language: str, instance_id: int) -> Instance: +# return Instance( +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "code", "path")), # code_path: Path, +# utils.get_from_dict(instance_dict, "code", "injection_skeleton_broken"), # code_injection_skeleton_broken: bool, +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "dependencies")), # compile_dependencies: Path, # added 092022 +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "binary")), # compile_binary: Path, +# utils.get_from_dict(instance_dict, "compile", "instruction"), # compile_instruction: str, # added 092022 +# utils.get_from_dict(instance_dict, "remediation", "transformation"), # remediation_transformation: str, # added 092022 +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "remediation", "modeling_rule")), # remediation_modeling_rule: Path, # added 092022 +# utils.get_from_dict(instance_dict, "remediation", "notes"), # remediation_notes: str, # added 092022 +# get_pattern_category_or_none(utils.get_from_dict(instance_dict, "properties", "category")), +# utils.get_from_dict(instance_dict, "properties", "negative_test_case"), +# utils.get_from_dict(instance_dict, "properties", "source_and_sink"), +# utils.get_from_dict(instance_dict, "properties", "input_sanitizer"), +# get_feature_vs_internal_api_or_none(utils.get_from_dict(instance_dict, "properties", "feature_vs_internal_api")), +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "discovery", "rule")), +# utils.get_from_dict(instance_dict, "discovery", "method"), +# utils.get_from_dict(instance_dict, "discovery", "rule_accuracy"), +# utils.get_from_dict(instance_dict, "discovery", "notes"), +# utils.get_from_dict(instance_dict, "expectation", "expectation"), +# utils.get_from_dict(instance_dict, "expectation", "type"), +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "sink_file")), +# utils.get_from_dict(instance_dict, "expectation", "sink_line"), +# utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "source_file")), +# utils.get_from_dict(instance_dict, "expectation", "source_line"), +# pattern.name, +# pattern.description, +# pattern.family, +# pattern.tags, +# pattern.instances, +# language, +# pattern_id=pattern.pattern_id, +# instance_id=instance_id +# ) + + +# def load_instance_from_metadata(metadata: str, tp_lib: Path, language: str) -> Instance: +# metadata_path: Path = tp_lib / metadata +# if not metadata_path.exists(): +# raise InstanceDoesNotExists(ref_metadata=metadata_path.name) + +# with open(metadata_path) as file: +# try: +# instance: Dict = json.load(file) +# except Exception as e: +# raise e + +# pattern_id = utils.get_id_from_name(metadata_path.parent.parent.name) +# pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, tp_lib) +# instance_id = utils.get_id_from_name(metadata_path.parent.name) +# return instance_from_dict(instance, pattern, language, instance_id) + + +# def get_pattern_category_or_none(el) -> PatternCategory | None: +# try: +# return PatternCategory(el) +# except ValueError: +# return None + + +# def get_feature_vs_internal_api_or_none(el) -> FeatureVsInternalApi | None: +# try: +# return FeatureVsInternalApi(el) +# except ValueError: +# return None \ No newline at end of file diff --git a/tp_framework/core/measurement.py b/tp_framework/core/measurement.py index e405650..408f245 100644 --- a/tp_framework/core/measurement.py +++ b/tp_framework/core/measurement.py @@ -12,7 +12,7 @@ import config from core import utils from core.exceptions import InstanceDoesNotExists, MeasurementNotFound -from core.instance import Instance, load_instance_from_metadata +from core.instance import Instance #, load_instance_from_metadata class Measurement: diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index d9d0280..0ef4661 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -1,82 +1,231 @@ -import json -from pathlib import Path -from core import utils -from core.exceptions import LanguageTPLibDoesNotExist, PatternDoesNotExists, PatternValueError -from typing import Dict, Tuple - - -class Pattern: - def __init__(self, name: str, language: str, instances: list[Path], family: str = None, description: str = "", - tags: list[str] = [], pattern_id: int = None, pattern_dir: Path = None) -> None: - self.name = name - self.description = description - self.family = family - self.tags = tags - self.instances = instances - self.language = language - self.pattern_id = pattern_id or self.define_pattern_id(pattern_dir) - - def define_pattern_id(self, pattern_dir) -> int: - try: - dir_list: list[Path] = utils.list_pattern_paths_for_language(self.language, pattern_dir) - except LanguageTPLibDoesNotExist: - return 1 - id_list: list[int] = sorted(list(map(lambda x: int(str(x.name).split("_")[0]), dir_list))) - return id_list[-1] + 1 if len(id_list) > 0 else 1 - - def add_pattern_to_tp_library(self, language: str, pattern_src_dir: Path, pattern_dir: Path) -> None: - pattern_dir_name: str = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, self.pattern_id) - new_tp_dir: Path = pattern_dir / language / pattern_dir_name - new_tp_dir.mkdir(exist_ok=True, parents=True) - pattern_json_file: Path = new_tp_dir / f"{pattern_dir_name}.json" - - with open(pattern_json_file, "w") as json_file: - pattern_dict: Dict = { - "name": self.name, - "description": self.description, - "family": self.family, - "tags": self.tags, - "instances": self.instances, - } - json.dump(pattern_dict, json_file, indent=4) - - def add_new_instance_reference(self, language: str, pattern_dir: Path, new_instance_ref: str) -> None: - tp_dir: Path = get_pattern_path_by_pattern_id(language, self.pattern_id, pattern_dir) - with open(tp_dir / f"{tp_dir.name}.json") as json_file: - pattern_dict: Dict = json.load(json_file) - - pattern_dict["instances"].append(new_instance_ref) - - with open(tp_dir / f"{tp_dir.name}.json", "w") as json_file: - json.dump(pattern_dict, json_file, indent=4) - - -# TODO (old): Test this -def get_pattern_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Tuple[Pattern, Path]: - tp_dir: Path = get_pattern_path_by_pattern_id(language, pattern_id, tp_lib_dir) - tp_json: Path = tp_dir / f"{tp_dir.name}.json" - with open(tp_json) as json_file: - pattern_from_json: Dict = json.load(json_file) - return pattern_from_dict(pattern_from_json, language, pattern_id), tp_dir - - -def get_pattern_path_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Path: - tp_dir_for_language: Path = tp_lib_dir / language - filtered_res: list[str] = list(filter( - lambda x: x.split("_")[0] == str(pattern_id), - map(lambda y: y.name, utils.list_dirs_only(tp_dir_for_language)) - )) - if not filtered_res: - raise PatternDoesNotExists(pattern_id) - return tp_dir_for_language / filtered_res[0] - - -def pattern_from_dict(pattern_dict: Dict, language: str, pattern_id: int) -> Pattern: - try: - return Pattern(pattern_dict["name"], language, pattern_dict["instances"], - family=pattern_dict.get("family", None), - description=pattern_dict.get("description", ""), - tags=pattern_dict.get("tags", []), - pattern_id=pattern_id) - except KeyError as e: - raise PatternValueError(message=f"Key {e} was not found in pattern metadata") +# import json +import shutil +from os import listdir +from pathlib import Path + +from core.exceptions import PatternInvalid +from core.instance import Instance +from core.pattern_repair import PatternRepair +from core import utils +# from core.exceptions import LanguageTPLibDoesNotExist, PatternDoesNotExists, PatternValueError +from typing import Tuple + + + +class Pattern: + @classmethod + def init_from_id_and_language(cls, id: int, language: str, tp_lib_path: Path): + return cls._init_from_id_and_language(cls(), id, language.upper(), tp_lib_path) + + @classmethod + def init_from_json_file_without_pattern_id(cls, json_file_path: Path, language: str, pattern_path: Path, tp_lib_path: Path): + return cls._init_from_json_without_id(cls(), json_file_path, language, pattern_path, tp_lib_path) + + def __init__(self) -> None: + # metadata + self.pattern_id = None + self.language = None # TODO: needed? + self.tp_lib_path = None # TODO needed? + self.language = None + self.pattern_path = None + self.pattern_json_path = None + + # json fields + self.name = None + self.description = None + self.family = None + self.tags = None + self.version = None + self.instances = [] + + # repairing tools + self.pattern_repair = None + + def _assert_pattern(self): + try: + assert int(self.pattern_id) + assert self.language + assert self.tp_lib_path.is_dir() + assert self.pattern_path.is_dir() + assert self.pattern_json_path.is_file() + assert self.instances and all([isinstance(instance, Instance) for instance in self.instances]) + except Exception as e: + raise PatternInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") + + def _init_from_id_and_language(self, id: int, language: str, tp_lib_path: Path): + self.pattern_id = id + self.language = language.upper() + self.tp_lib_path = tp_lib_path + self.pattern_path = utils.get_pattern_dir_from_id(id, language, tp_lib_path) + self._init_from_json_file(utils.get_pattern_json(self.pattern_path)) + self._assert_pattern() + return self + + def _init_instances(self, instance_paths_from_json: list): + instances = [] + for instance_json in instance_paths_from_json: + print('\033[93m', instance_json, '\033[0m') + abs_path = Path(self.pattern_path / Path(instance_json)) + if not abs_path.is_file(): + raise PatternInvalid(f"{self._log_prefix()}The instance path '{instance_json}' is not valid.") + instances += [Instance.init_from_json_path(abs_path, self.pattern_id)] + instances = sorted(instances, key=lambda instance: instance.instance_id) + return instances + + def _init_from_json_file(self, json_file_path: Path): + self.pattern_json_path = json_file_path + pattern_properties = utils.read_json(self.pattern_json_path) + if not pattern_properties: + raise PatternInvalid("The pattern needs a valid JSON file.") + self.name = pattern_properties["name"] if "name" in pattern_properties.keys() else None + self.description = pattern_properties["description"] if "description" in pattern_properties.keys() else None + self.family = pattern_properties["family"] if "family" in pattern_properties.keys() else None + self.tags = pattern_properties["tags"] if "tags" in pattern_properties.keys() else None + self.version = pattern_properties["version"] if "version" in pattern_properties.keys() else None + if "instances" in pattern_properties.keys() and pattern_properties["instances"]: + self.instances = self._init_instances(pattern_properties["instances"]) + else: + # Raise exception + raise PatternInvalid(f"{self._log_prefix()}Pattern JSON file needs an 'instances' key with valid relative links.") + return self + + def _init_from_json_without_id(self, json_file_path: Path, language: str, pattern_path: Path, tp_lib_path: Path): + self.language = language.upper() + self.pattern_path = pattern_path + self.tp_lib_path = tp_lib_path + self._init_from_json_file(json_file_path) + try: + given_id = utils.get_id_from_name(self.pattern_path.name) + except Exception: + given_id = None + free_id = utils.get_next_free_pattern_id_for_language(self.language, self.tp_lib_path, given_id) + self.pattern_id = free_id + self._assert_pattern() + return self + + def _log_prefix(self): + return f"Pattern {self.pattern_id} ({self.language}) - " + + def __str__(self) -> str: + return str(vars(self)) + + def copy_to_tplib(self) -> Path: + # copies the pattern and all its instances into the tp_lib + new_pattern_path = self.tp_lib_path / self.language / f'{self.pattern_id}_{self.pattern_path.name}' + print(new_pattern_path) + for instance in self.instances: + instance.copy_to_tplib(new_pattern_path) + utils.copy_dir_content(self.pattern_path, new_pattern_path) + self.repair(soft=True) + + def repair(self, soft: bool = False): + # soft repair enforces the instances structure (and names) and updates relative links in pattern JSON + self.pattern_repair = PatternRepair(self) + self.pattern_repair + pass + + +def get_pattern_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Tuple[Pattern, Path]: + pattern = Pattern.init_from_id_and_language(pattern_id, language, tp_lib_dir) + return pattern, pattern.pattern_path + + +# def pattern_from_dict(pattern_dict: Dict, language: str, pattern_id: int) -> Pattern: +# try: +# return Pattern(pattern_dict["name"], language, pattern_dict["instances"], +# family=pattern_dict.get("family", None), +# description=pattern_dict.get("description", ""), +# tags=pattern_dict.get("tags", []), +# pattern_id=pattern_id) +# except KeyError as e: +# raise PatternValueError(message=f"Key {e} was not found in pattern metadata") + +# def get_pattern_path_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Path: +# tp_dir_for_language: Path = tp_lib_dir / language +# filtered_res: list[str] = list(filter( +# lambda x: x.split("_")[0] == str(pattern_id), +# map(lambda y: y.name, utils.list_dirs_only(tp_dir_for_language)) +# )) +# if not filtered_res: +# raise PatternDoesNotExists(pattern_id) +# return tp_dir_for_language / filtered_res[0] + +# class Pattern: +# def __init__(self, name: str, language: str, instances: list[Path], family: str = None, description: str = "", +# tags: list[str] = [], pattern_id: int = None, pattern_dir: Path = None) -> None: +# self.name = name +# self.description = description +# self.family = family +# self.tags = tags +# self.instances = instances +# self.language = language +# self.pattern_id = pattern_id or self.define_pattern_id(pattern_dir) + +# def define_pattern_id(self, pattern_dir) -> int: +# try: +# dir_list: list[Path] = utils.list_pattern_paths_for_language(self.language, pattern_dir) +# except LanguageTPLibDoesNotExist: +# return 1 +# id_list: list[int] = sorted(list(map(lambda x: int(str(x.name).split("_")[0]), dir_list))) +# return id_list[-1] + 1 if len(id_list) > 0 else 1 + +# def add_pattern_to_tp_library(self, language: str, pattern_src_dir: Path, pattern_dir: Path) -> None: +# pattern_dir_name: str = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, self.pattern_id) +# new_tp_dir: Path = pattern_dir / language / pattern_dir_name +# new_tp_dir.mkdir(exist_ok=True, parents=True) +# pattern_json_file: Path = new_tp_dir / f"{pattern_dir_name}.json" + +# with open(pattern_json_file, "w") as json_file: +# pattern_dict: Dict = { +# "name": self.name, +# "description": self.description, +# "family": self.family, +# "tags": self.tags, +# "instances": self.instances, +# } +# json.dump(pattern_dict, json_file, indent=4) + +# def add_new_instance_reference(self, language: str, pattern_dir: Path, new_instance_ref: str) -> None: +# tp_dir: Path = get_pattern_path_by_pattern_id(language, self.pattern_id, pattern_dir) +# with open(tp_dir / f"{tp_dir.name}.json") as json_file: +# pattern_dict: Dict = json.load(json_file) + +# pattern_dict["instances"].append(new_instance_ref) + +# with open(tp_dir / f"{tp_dir.name}.json", "w") as json_file: +# json.dump(pattern_dict, json_file, indent=4) + + +# # TODO (old): Test this +# def get_pattern_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Tuple[Pattern, Path]: +# tp_dir: Path = get_pattern_path_by_pattern_id(language, pattern_id, tp_lib_dir) +# tp_json: Path = tp_dir / f"{tp_dir.name}.json" +# with open(tp_json) as json_file: +# pattern_from_json: Dict = json.load(json_file) +# return pattern_from_dict(pattern_from_json, language, pattern_id), tp_dir + + +# def get_pattern_path_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Path: +# tp_dir_for_language: Path = tp_lib_dir / language +# filtered_res: list[str] = list(filter( +# lambda x: x.split("_")[0] == str(pattern_id), +# map(lambda y: y.name, utils.list_dirs_only(tp_dir_for_language)) +# )) +# if not filtered_res: +# raise PatternDoesNotExists(pattern_id) +# return tp_dir_for_language / filtered_res[0] + + +# def pattern_from_dict(pattern_dict: Dict, language: str, pattern_id: int) -> Pattern: +# try: +# return Pattern(pattern_dict["name"], language, pattern_dict["instances"], +# family=pattern_dict.get("family", None), +# description=pattern_dict.get("description", ""), +# tags=pattern_dict.get("tags", []), +# pattern_id=pattern_id) +# except KeyError as e: +# raise PatternValueError(message=f"Key {e} was not found in pattern metadata") + + +if __name__ == '__main__': + print(PatternR.init_from_id_and_language(1, 'php', Path('./testability_patterns'))) diff --git a/tp_framework/core/pattern_operations.py b/tp_framework/core/pattern_operations.py index 4cef8cf..c7a5ee1 100644 --- a/tp_framework/core/pattern_operations.py +++ b/tp_framework/core/pattern_operations.py @@ -10,26 +10,19 @@ from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -import core.instance +# import core.instance from core import errors from core import utils, analysis from core.exceptions import PatternValueError -from core.instance import Instance, PatternCategory, FeatureVsInternalApi, instance_from_dict +from core.instance import Instance #, PatternCategory, FeatureVsInternalApi # , instance_from_dict from core.pattern import Pattern, get_pattern_by_pattern_id from core.sast_job_runner import SASTjob, job_list_to_dict from core.measurement import meas_list_to_tp_dict -def add_testability_pattern_to_lib(language: str, pattern_dict: Dict, pattern_src_dir: Path | None, +def add_testability_pattern_to_lib(language: str, pattern: Pattern, pattern_src_dir: Path | None, pattern_lib_dest: Path) -> Path: - try: - pattern: Pattern = Pattern(pattern_dict["name"], language, - [pattern_src_dir / instance_relative_path for instance_relative_path in - pattern_dict["instances"] if - pattern_src_dir], pattern_dict["family"], pattern_dict["description"], - pattern_dict["tags"], pattern_dir=pattern_lib_dest) - except KeyError as e: - raise PatternValueError(message=errors.patternKeyError(e)) - + print(pattern) + exit(0) pattern_instances_json_refs = pattern.instances pattern.instances = [] pattern.add_pattern_to_tp_library(language, pattern_src_dir, pattern_lib_dest) @@ -42,49 +35,17 @@ def add_testability_pattern_to_lib(language: str, pattern_dict: Dict, pattern_sr return pattern_lib_dest / language / utils.get_pattern_dir_name_from_name(pattern_src_dir.name, pattern.pattern_id) -def add_tp_instance_to_lib(language: str, pattern: Pattern, instance_dict: Dict, inst_old_name: str, +def add_tp_instance_to_lib(language: str, pattern: Pattern, instance: Instance, inst_old_name: str, pattern_src_dir: Path, pattern_lib_dst: Path) -> Path: - instance: Instance = Instance( - utils.get_path_or_none(utils.get_from_dict(instance_dict, "code", "path")), # code_path: Path, - utils.get_from_dict(instance_dict, "code", "injection_skeleton_broken"), # code_injection_skeleton_broken: bool, - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "dependencies")), # compile_dependencies: Path, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "compile", "binary")), # compile_binary: Path, - utils.get_from_dict(instance_dict, "compile", "instruction"), # compile_instruction: str, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "transformation"), # remediation_transformation: str, # added 092022 - utils.get_path_or_none(utils.get_from_dict(instance_dict, "remediation", "modeling_rule")), # remediation_modeling_rule: Path, # added 092022 - utils.get_from_dict(instance_dict, "remediation", "notes"), # remediation_notes: str, # added 092022 - core.instance.get_pattern_category_or_none(utils.get_from_dict(instance_dict, "properties", "category")), - utils.get_from_dict(instance_dict, "properties", "negative_test_case"), - utils.get_from_dict(instance_dict, "properties", "source_and_sink"), - utils.get_from_dict(instance_dict, "properties", "input_sanitizer"), - core.instance.get_feature_vs_internal_api_or_none(utils.get_from_dict(instance_dict, "properties", "feature_vs_internal_api")), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "discovery", "rule")), - utils.get_from_dict(instance_dict, "discovery", "method"), - utils.get_from_dict(instance_dict, "discovery", "rule_accuracy"), - utils.get_from_dict(instance_dict, "discovery", "notes"), - utils.get_from_dict(instance_dict, "expectation", "expectation"), - utils.get_from_dict(instance_dict, "expectation", "type"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "sink_file")), - utils.get_from_dict(instance_dict, "expectation", "sink_line"), - utils.get_path_or_none(utils.get_from_dict(instance_dict, "expectation", "source_file")), - utils.get_from_dict(instance_dict, "expectation", "source_line"), - pattern.name, - pattern.description, - pattern.family, - pattern.tags, - pattern.instances, - language, - pattern.pattern_id, - pattern_dir=pattern_lib_dst - ) - inst_name = utils.get_instance_dir_name_from_pattern(pattern_src_dir.name, pattern.pattern_id, instance.instance_id) pattern_name = utils.get_pattern_dir_name_from_name(pattern_src_dir.name, pattern.pattern_id) instance_src_dir: Path = pattern_src_dir / inst_old_name instance_dst_dir: Path = pattern_lib_dst / language / pattern_name / inst_name - instance.add_instance_to_pattern_id(language, pattern_src_dir, pattern_lib_dst) + # TODO: refactoring here + # instance.add_instance_to_pattern_id(language, pattern_src_dir, pattern_lib_dst) + pattern.add_new_instance(instance) pattern.add_new_instance_reference(language, pattern_lib_dst, f"./{inst_name}/{inst_name}.json") for path in list(instance_src_dir.iterdir()): @@ -98,26 +59,18 @@ def add_tp_instance_to_lib(language: str, pattern: Pattern, instance_dict: Dict, def add_testability_pattern_to_lib_from_json(language: str, pattern_json: Path, pattern_src_dir: Path, pattern_lib_dest: Path) -> Path: - with open(pattern_json) as json_file: - try: - pattern: Dict = json.load(json_file) - except JSONDecodeError as e: - raise e - try: - return add_testability_pattern_to_lib(language, pattern, pattern_src_dir, pattern_lib_dest) - except PatternValueError as e: - raise e + # The pattern objects automatically initializes the instances as well + pattern = Pattern.init_from_json_file_without_pattern_id(pattern_json, language, pattern_src_dir, pattern_lib_dest) + print(pattern) + # dump the pattern to the tplib + return pattern.copy_to_tplib() + # return add_testability_pattern_to_lib(language, pattern, pattern_src_dir, pattern_lib_dest) def add_tp_instance_to_lib_from_json(language: str, pattern_id: int, instance_json: Path, pattern_src_dir: Path, pattern_dest_dir: Path): - pattern, p_dir = get_pattern_by_pattern_id(language, pattern_id, pattern_dest_dir) - - with open(instance_json) as json_file: - try: - instance: Dict = json.load(json_file) - except JSONDecodeError as e: - raise e + pattern = Pattern.init_from_id_and_language(pattern_id, language, pattern_dest_dir) + instance = Instance.init_from_json_path(instance_json, pattern) return add_tp_instance_to_lib( language, pattern, instance, instance_json.parent.name, pattern_src_dir, pattern_dest_dir ) diff --git a/tp_framework/core/pattern_repair.py b/tp_framework/core/pattern_repair.py new file mode 100644 index 0000000..d52b664 --- /dev/null +++ b/tp_framework/core/pattern_repair.py @@ -0,0 +1,31 @@ +import shutil +from core.exceptions import PatternRepairError +from core import utils + + +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + +class PatternRepair: + def __init__(self, pattern) -> None: + self.pattern_to_repair = pattern + self.pattern_json_template = pattern.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" + if not self.pattern_json_template.is_file(): + logger.warn(f"{self._log_prefix()}Expects a template JSON file in {self.pattern_json_template}") + raise PatternRepairError(f"No template JSON found in {self.pattern_json_template}") + + def _log_prefix(self): + return f"PatternRepair ({self.pattern_to_repair.pattern_id} - {self.pattern_to_repair.language}) " + + def repair_pattern_json(self): + # make sure there is a pattern JSON file + if not self.pattern_to_repair.pattern_json_path.is_file(): + self.pattern_json_path = utils.get_pattern_json() + if not self.pattern_json_path: + logger.info("Copying template JSON.") + expected_json_path = self.pattern_to_repair.pattern_path / f'{self.pattern_to_repair.name}.json' + shutil.copy(self.pattern_json_template, expected_json_path) + # make sure the instances are correct + for instance in self.pattern_to_repair.instances: + instance.repair \ No newline at end of file diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index 5725b3f..6eb0087 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -1,7 +1,9 @@ import csv import os +import json from datetime import datetime from platform import system +import shutil from importlib import import_module from pathlib import Path @@ -17,7 +19,8 @@ import config from core import pattern, instance from core.exceptions import PatternDoesNotExists, LanguageTPLibDoesNotExist, TPLibDoesNotExist, InvalidSastTools, \ - DiscoveryMethodNotSupported, TargetDirDoesNotExist, InvalidSastTool, PatternFolderNotFound, InstanceDoesNotExists + DiscoveryMethodNotSupported, TargetDirDoesNotExist, InvalidSastTool, PatternFolderNotFound, InstanceDoesNotExists, \ + FileDoesNotExist, TemplateDoesNotExist, MeasurementResultsDoNotExist from core import errors @@ -46,17 +49,32 @@ def list_tpi_paths_by_tp_id(language: str, pattern_id: int, tp_lib_dir: Path) -> def get_tpi_id_from_jsonpath(jp: Path) -> int: return get_id_from_name(jp.parent.name) -def get_pattern_dir_from_id(pattern_id: int, language: str, tp_lib_dir: Path) -> Path: + +def get_pattern_dir_from_id(pattern_id: int, language: str, tp_lib_dir: Path) -> Path: # needed tp_lib_dir_lang_dir: Path = tp_lib_dir / language if tp_lib_dir_lang_dir.is_dir(): pattern_with_id = list(filter(lambda p: get_id_from_name(p.name) == pattern_id, list_dirs_only(tp_lib_dir_lang_dir))) if pattern_with_id: - return pattern_with_id[0] + return Path(pattern_with_id[0]) raise PatternDoesNotExists(pattern_id) else: raise PatternDoesNotExists(pattern_id) +def get_next_free_pattern_id_for_language(language: str, tp_lib_dir: Path, proposed_id = None): + lang_tp_lib_path = tp_lib_dir / language + check_lang_tp_lib_path(lang_tp_lib_path) + all_patterns = list_dirs_only(lang_tp_lib_path) + taken_ids = [] + for pattern in all_patterns: + taken_ids += [get_id_from_name(pattern.name)] + id_range = list(range(1, max(taken_ids)+1)) + free_ids = sorted(list(set(id_range) - set(taken_ids))) + if proposed_id in free_ids: + return proposed_id + return free_ids[0] if free_ids else max(taken_ids) + 1 + + def get_instance_dir_from_id(instance_id: int, pattern_dir: Path) -> Path: if pattern_dir.is_dir(): return get_instance_dir_from_list(instance_id, list_dirs_only(pattern_dir)) @@ -200,6 +218,20 @@ def get_discovery_rules(discovery_rule_list: list[str], discovery_rule_ext: str) # Others # +def check_measurement_results_exist(measurement_dir: Path): + if not measurement_dir.is_dir(): + e = MeasurementResultsDoNotExist() + logger.error(get_exception_message(e)) + raise e + + +def check_file_exist(file_path: Path, file_suffix = ".csv"): + if not file_path.is_file() or not file_path.suffix == file_suffix: + e = FileDoesNotExist(file_path) + logger.error(get_exception_message(e)) + raise e + + def build_timestamp_language_name(name: Path | None, language: str, now: datetime, extra: str = None) -> str: res = language if name: @@ -351,4 +383,59 @@ def get_file_hash(fpath, bigfile=False): else: while chunk := f.read(8192): hash.update(chunk) - return hash.hexdigest() \ No newline at end of file + return hash.hexdigest() + + + +########################### New utils + +def list_files(path_to_parent_dir: Path, suffix: str): + assert suffix[0] == ".", "Suffix has to start with '.'" + return list(filter(lambda file_name: file_name.suffix == suffix, [path_to_parent_dir / f for f in os.listdir(path_to_parent_dir)])) + + +def get_pattern_json(path_to_pattern: Path) -> Path: + json_files_in_pattern_dir = list_files(path_to_pattern, ".json") + if len(json_files_in_pattern_dir) == 1: + return json_files_in_pattern_dir[0] + elif not json_files_in_pattern_dir: + logger.warning(f"Could not find a pattern JSON file in {path_to_pattern.name}") + return None + else: + logger.warning(f"Found multiple '.json' files for {path_to_pattern.name}") + if path_to_pattern / f"{path_to_pattern.name}.json" in json_files_in_pattern_dir: + return path_to_pattern / f"{path_to_pattern.name}.json" + logger.warning("Could not determine the right pattern JSON file. Please name it _.json") + return None + + +def read_json(path_to_json_file: Path): + if not path_to_json_file.is_file(): + return {} + result = {} + + try: + with open(path_to_json_file, "r") as json_file: + result = json.load(json_file) + except json.JSONDecodeError as err: + raise Exception(f"JSON is corrupt, please check {path_to_json_file}") from err + + if not result: + logger.error(f"JSON file is empty") + return result + + +def copy_dir_content(path_to_src_dir: Path, path_to_dst_dir: Path): + for element in os.listdir(path_to_src_dir): + src_path = path_to_src_dir / element + dest_path = path_to_dst_dir / element + if dest_path.exists(): + continue + if src_path.is_file(): + shutil.copy2(src_path, dest_path) + else: + shutil.copytree(src_path, dest_path) + + +if __name__ == "__main__": + print(get_pattern_json(Path('./testability_patterns/PHP/85_test_pattern'))) \ No newline at end of file diff --git a/tp_framework/tmp.py b/tp_framework/tmp.py new file mode 100644 index 0000000..c3c2e0f --- /dev/null +++ b/tp_framework/tmp.py @@ -0,0 +1,17 @@ +from pathlib import Path + +class A: + def __init__(self) -> None: + self.path = Path(".") + self.my_path = self.make_path('abc') + + def make_path(self, arg): + yield self.path / arg + + +a = A() +print(a.path) +print(a.my_path) +a.path = Path('/') +print(a.path) +print(a.my_path.is_file()) \ No newline at end of file From a0574c966e609a25d5d821309cce6ca0a18f21cb Mon Sep 17 00:00:00 2001 From: felix-20 Date: Tue, 27 Jun 2023 09:20:25 +0200 Subject: [PATCH 06/16] before implementing pattern repair --- tp_framework/core/discovery.py | 116 ++++++++++-------------- tp_framework/core/instance.py | 18 ++-- tp_framework/core/measurement.py | 34 +++---- tp_framework/core/pattern.py | 11 ++- tp_framework/core/pattern_operations.py | 12 +-- tp_framework/core/report_for_sast.py | 23 +++-- tp_framework/core/sast_job_runner.py | 3 + 7 files changed, 106 insertions(+), 111 deletions(-) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index 0a49c80..4fb5bcd 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -314,25 +314,20 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito # l_not_measured_tp_id.append(tp_id) continue target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib) - l_tpi_jsonpath = [instance.instance_json_path for instance in target_pattern.instances] - d_tpi_id_path = {} - for instance in target_pattern.instances: - d_tpi_id_path[instance.instance_id] = instance.instance_json_path l_meas_tpi_path = utils.list_dirs_only(meas_tp_path) # computing not supported tp instances (tpi) to be discovered d_res_tpi = {} d_dr_executed = {} - for tpi_id in d_tpi_id_path: - msgpre = f"pattern {tp_id} instance {tpi_id} - " - tpi_json_path = d_tpi_id_path[tpi_id] + for tpi in target_pattern.instances: + msgpre = f"pattern {tp_id} instance {tpi.instance_id} - " try: - meas_tpi_path = utils.get_instance_dir_from_list(tpi_id, l_meas_tpi_path) + meas_tpi_path = utils.get_instance_dir_from_list(tpi.instance_id, l_meas_tpi_path) except: logger.warning( f"{msgpre}No measurements for this instance. {msgpost}") - d_res_tpi[tpi_id] = { + d_res_tpi[tpi.instance_id] = { "measurement": "not_found", - "jsonpath": tpi_json_path + "jsonpath": tpi.instance_json_path } continue l_last_meas = measurement.load_measurements(utils.get_last_measurement_for_pattern_instance(meas_tpi_path), @@ -342,16 +337,16 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito if not meas_tpi_by_tools: logger.warning( f"{msgpre}No measurements of the tools specified ({[t['name'] + ':' + t['version'] for t in tools]}) for the instance. {msgpost}") - d_res_tpi[tpi_id] = { + d_res_tpi[tpi.instance_id] = { "measurement": "not_found", - "jsonpath": tpi_json_path + "jsonpath": tpi.instance_json_path } continue tpi_instance = meas_tpi_by_tools[0].instance d_tpi = { "instance": tpi_instance, "measurement": "supported", - "jsonpath": tpi_json_path, + "jsonpath": tpi.instance_json_path, "discovery": {} } # discovery continue iff at least one tool not supporting the tpi @@ -368,9 +363,9 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito d_tpi["measurement"] = "not_supported" # discovery per tpi measurement_stop: bool = d_tpi["measurement"] not in ["ignore", "not_supported"] - d_tpi["discovery"] = discovery_for_tpi(tpi_instance, tpi_json_path, cpg, disc_output_dir, + d_tpi["discovery"] = discovery_for_tpi(tpi_instance, tpi.instance_json_path, cpg, disc_output_dir, measurement_stop=measurement_stop, already_executed=d_dr_executed) - d_res_tpi[tpi_id] = d_tpi + d_res_tpi[tpi.instance_id] = d_tpi d_res[tp_id]["instances"] = d_res_tpi # post-process results and export them @@ -390,21 +385,16 @@ def discovery_ignore_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, # loop over testability patterns (tp) to be discovered for tp_id in l_tp_id: d_res[tp_id] = {"measurement_found": None} - l_tpi_jsonpath = utils.list_tpi_paths_by_tp_id(language, tp_id, tp_lib) # TODO: do we need this later? - d_tpi_id_path = {} - for jp in l_tpi_jsonpath: - d_tpi_id_path[utils.get_tpi_id_from_jsonpath(jp)] = jp + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib) # loop over tp instances (tpi) to be discovered d_res_tpi = {} d_dr_executed = {} - for tpi_id in d_tpi_id_path: - tpi_json_path = d_tpi_id_path[tpi_id] - tpi_json_rel = os.path.relpath(tpi_json_path, start=tp_lib) - tpi_instance = load_instance_from_metadata(tpi_json_rel, tp_lib, language) # get the instance - d_tpi = {"instance": tpi_instance, "measurement": "ignored", "jsonpath": tpi_json_path, - "discovery": discovery_for_tpi(tpi_instance, tpi_json_path, cpg, disc_output_dir, + for instance in target_pattern.instances: + tpi_json_path = instance.instance_json_path + d_tpi = {"instance": instance, "measurement": "ignored", "jsonpath": tpi_json_path, + "discovery": discovery_for_tpi(instance, tpi_json_path, cpg, disc_output_dir, measurement_stop=False, already_executed=d_dr_executed)} - d_res_tpi[tpi_id] = d_tpi + d_res_tpi[instance.instance_id] = d_tpi d_res[tp_id]["instances"] = d_res_tpi # post-process results and export them @@ -705,16 +695,14 @@ def get_check_discovery_rule_result_header(): ] -def get_check_discovery_rule_result(pattern_id, language, - instance_id=None, instance_path=None, pattern_name=None, - discovery_rule=None, successful="error") -> Dict: +def get_check_discovery_rule_result(pattern: Pattern, instance: Instance | None= None, successful="error") -> Dict: return { - "pattern_id": pattern_id, - "instance_id": instance_id, - "instance_path": instance_path, - "pattern_name": pattern_name, - "language": language, - "discovery_rule": discovery_rule, + "pattern_id": pattern.pattern_id, + "instance_id": instance.instance_id if instance else None, + "instance_path": instance.instance_path if instance else None, + "pattern_name": pattern.name, + "language": pattern.language, + "discovery_rule": instance.discovery_rule if instance else None, "successful": successful } @@ -730,85 +718,79 @@ def check_discovery_rules(language: str, l_tp_id: list[int], unsuccess = 0 missing = 0 err = 0 + num_patterns = len(l_tp_id) for i, tp_id in enumerate(l_tp_id): logger.info(utils.get_tp_op_status_string( - (i + 1, len(l_tp_id), tp_id) # tp_info + (i + 1, num_patterns, tp_id) # tp_info )) try: - target_tp, _ = get_pattern_by_pattern_id(language, tp_id, tp_lib_path) - l_tpi_dir: list[Path] = utils.list_tpi_paths_by_tp_id( - language, tp_id, tp_lib_path - ) + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + num_instances = len(target_pattern.instances) except Exception as e: + # should not happen at all! And should be removed and a list of patterns should be parsed to that function logger.warning( f"Either pattern id {tp_id} does not exist, or its file system structure is not valid, or its instances cannot be fetched. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(tp_id, language) + res = get_check_discovery_rule_result(pattern=target_pattern) results.append(res) err += 1 continue - for j, instance_path in enumerate(l_tpi_dir): + instance: Instance + for j, instance in enumerate(target_pattern.instances): try: - target_src = instance_path.parent - target_instance: Instance = Instance.init_from_json_path(instance_path, target_tp) - - tpi_id = utils.get_id_from_name(instance_path.name) + tpi_id = instance.instance_id logger.info(utils.get_tpi_op_status_string( - (i + 1, len(l_tp_id), tp_id), - t_tpi_info=(j + 1, len(l_tpi_dir), tpi_id) + (i + 1, num_patterns, tp_id), + t_tpi_info=(j + 1, num_instances, tpi_id) )) - if target_instance.discovery_rule: - dr_path = target_src / target_instance.discovery_rule + if instance.discovery_rule: + dr_path = instance.discovery_rule if not dr_path.is_file(): logger.warning( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule {dr_path} does not exist") - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=instance_path, discovery_rule=dr_path) + res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance) results.append(res) err += 1 continue + target_src = instance.instance_path + build_name, disc_output_dir = utils.get_operation_build_name_and_dir( "check_discovery_rules", target_src, language, output_dir) - d_results = manual_discovery(target_src, target_instance.discovery_method, [dr_path], language, + d_results = manual_discovery(target_src, instance.discovery_method, [dr_path], language, build_name, disc_output_dir, timeout_sec=timeout_sec) # Inspect the d_results if d_results["findings"] and any( f["result"] == discovery_result_strings["discovery"] for f in d_results["findings"]): - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=instance_path, pattern_name=target_tp.name, - discovery_rule=dr_path, successful="yes") + res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance, successful="yes") success += 1 else: - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=instance_path, pattern_name=target_tp.name, - discovery_rule=dr_path, successful="no") + res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance, successful="no") unsuccess += 1 results.append(res) else: logger.info( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule is not provided for the pattern") - res = get_check_discovery_rule_result(tp_id, language, instance_id=tpi_id, - instance_path=instance_path, successful="missing") + res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance, successful="missing") results.append(res) missing += 1 logger.info(utils.get_tpi_op_status_string( - (i + 1, len(l_tp_id), tp_id), - t_tpi_info=(j + 1, len(l_tpi_dir), tpi_id), + (i + 1, num_patterns, tp_id), + t_tpi_info=(j + 1, num_instances, tpi_id), status="done." )) except Exception as e: logger.warning( - f"Something went wrong for the instance at {instance_path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(tp_id, language, pattern_name=target_tp.name, instance_path=instance_path) + f"Something went wrong for the instance at {instance.instance_path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") + res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance) results.append(res) err += 1 continue logger.info(utils.get_tp_op_status_string( - (i + 1, len(l_tp_id), tp_id), # tp_info + (i + 1, num_patterns, tp_id), # tp_info status="done." )) - logger.info(f"Check/Test discovery rules for {len(l_tp_id)} patterns: done") + logger.info(f"Check/Test discovery rules for {num_patterns} patterns: done") d_res = { "results": results, "counters": { diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index a58b584..b53ca6d 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -5,16 +5,19 @@ class Instance: @classmethod - def init_from_json_path(cls, path_to_instance_json: Path, pattern_id=None): + def init_from_json_path(cls, path_to_instance_json: Path, pattern_id: int, language: str): if not path_to_instance_json.is_file(): raise InstanceInvalid(f"The provided instance path '{path_to_instance_json}' does not exist.") - return cls._init_from_json(cls(), path_to_instance_json, pattern_id) + return cls._init_from_json(cls(), path_to_instance_json, pattern_id, language) def __init__(self) -> None: self.instance_path = None self.instance_json_path = None self.instance_id = None self.pattern_id = None + self.language = None + self.name = None + self.pattern = None # JSON fields self.description = None @@ -42,21 +45,21 @@ def __init__(self) -> None: self.remediation_transformation = None self.remediation_modeling_rule = None - self.attributes_with_type_path = ['instance_path', 'instance_json_path'] - def _assert_instance(self): try: int(self.instance_id) - int(self.pattern_id) + assert self.language.isupper() assert self.instance_path.is_dir() assert self.instance_json_path.is_file() assert self.code_path.is_file() except Exception as e: raise InstanceInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") - def _init_from_json(self, path_to_instance_json: Path, pattern_id): + def _init_from_json(self, path_to_instance_json: Path, pattern_id: int, language: str): self.instance_path = path_to_instance_json.parent + self.name = self.instance_path.name self.instance_json_path = Path(path_to_instance_json.name) + self.language = language.upper() try: self.instance_id = utils.get_id_from_name(self.instance_path.name) except Exception as e: @@ -107,10 +110,11 @@ def _make_path(self, path_name: str): return Path(self.instance_path / path_name).resolve() if path_name else None def __str__(self) -> str: - return f"Instance {self.instance_id}" + return f"Instance {self.instance_id} {self.name}" def copy_to_tplib(self, pattern_path: Path): new_instance_path = pattern_path / self.instance_path.name new_instance_path.mkdir(parents=True, exist_ok=True) utils.copy_dir_content(self.instance_path, new_instance_path) self.instance_path = new_instance_path + self.name = self.instance_path.name diff --git a/tp_framework/core/measurement.py b/tp_framework/core/measurement.py index 250953e..368cc81 100644 --- a/tp_framework/core/measurement.py +++ b/tp_framework/core/measurement.py @@ -13,6 +13,7 @@ from core import utils from core.exceptions import InstanceDoesNotExists, MeasurementNotFound from core.instance import Instance #, load_instance_from_metadata +from core.pattern import Pattern class Measurement: @@ -61,8 +62,8 @@ def define_verdict(self, date: datetime, instance: Instance, findings: list[Dict # found = instance.expectation_sink_file.name == finding["file"] if found: break # we found a matching finding - self.result = (found == instance.expectation) - self.expected_result = instance.expectation + self.result = (found == instance.expectation_expectation) + self.expected_result = instance.expectation_expectation self.tool = tool self.version = version self.instance = instance @@ -90,9 +91,10 @@ def load_measurements(meas_file: Path, tp_lib: Path, language: str) -> list[Meas return [] parsed_meas: list[Measurement] = [] for m in meas: - instance = load_instance_from_json(m["instance"], tp_lib, language) + instance_json_path = tp_lib / Path(m["instance"]) + instance = Instance.init_from_json_path(instance_json_path, None, language) # NOTE 06/2023: if not expectation in measurement, then we take it from instance (backword compatibility though it could introduce mistakes if the instance expectation was changed after the measurement) - expected_result = m["expected_result"] if "expected_result" in m.keys() else instance.expectation + expected_result = m["expected_result"] if "expected_result" in m.keys() else instance.expectation_expectation parsed_meas.append(Measurement( m["date"], m["result"], @@ -104,23 +106,23 @@ def load_measurements(meas_file: Path, tp_lib: Path, language: str) -> list[Meas return parsed_meas -def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, p_id: int, - pi_id: int) -> Measurement: +def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, pattern: Pattern, + instance: Instance) -> Measurement: # TODO - load last measurement: the code hereafter strongly depends on the folder notation in place for # patterns and pattern instances. Make sure to factorize in function what needs to # and to generalize the approach as much as we can to rely the least possible on # the strict notation - pattern_dir: Path = utils.get_pattern_dir_from_id(p_id, language, tp_lib) - pattern_dir_name: str = pattern_dir.name - instance_dir_name: str = f"{pi_id}_instance_{pattern_dir_name}" - instance_dir: Path = pattern_dir / instance_dir_name - if not instance_dir.is_dir(): - ee = InstanceDoesNotExists(instance_id=pi_id) - logger.exception(ee) - raise ee + pattern_dir_name: str = pattern.pattern_path.name + instance_dir_name: str = instance.instance_path.name + # TODO: continue here + # instance_dir: Path = pattern_dir / instance_dir_name + # if not instance_dir.is_dir(): + # ee = InstanceDoesNotExists(instance_id=pi_id) + # logger.exception(ee) + # raise ee measurement_dir_for_pattern_instance: Path = utils.get_measurement_dir_for_language(tp_lib, language) / pattern_dir_name / instance_dir_name if not measurement_dir_for_pattern_instance.is_dir(): - ee = MeasurementNotFound(p_id) + ee = MeasurementNotFound(pattern.pattern_id) logger.exception(ee) raise ee meas_file_list = list( @@ -137,7 +139,7 @@ def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, p_id measurements) ) if not measurements_for_tool: - logger.warning(f'No measurement has been found for tool {tool["name"]}:{tool["version"]} on pattern {p_id} instance {pi_id}') + logger.warning(f'No measurement has been found for tool {tool["name"]}:{tool["version"]} on pattern {pattern.pattern_id} instance {instance.instance_id}') return None return sorted(measurements_for_tool, reverse=True)[0] diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index a7923c3..66e22c1 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -3,7 +3,7 @@ from os import listdir from pathlib import Path -from core.exceptions import PatternInvalid, PatternDoesNotExists +from core.exceptions import PatternInvalid, PatternDoesNotExists, InstanceDoesNotExists from core.instance import Instance from core.pattern_repair import PatternRepair from core import utils @@ -68,11 +68,10 @@ def _init_from_id_and_language(self, id: int, language: str, tp_lib_path: Path): def _init_instances(self, instance_paths_from_json: list): instances = [] for instance_json in instance_paths_from_json: - print('\033[93m', instance_json, '\033[0m') abs_path = Path(self.pattern_path / Path(instance_json)) if not abs_path.is_file(): raise PatternInvalid(f"{self._log_prefix()}The instance path '{instance_json}' is not valid.") - instances += [Instance.init_from_json_path(abs_path, self.pattern_id)] + instances += [Instance.init_from_json_path(abs_path, self.pattern_id, self.language)] instances = sorted(instances, key=lambda instance: instance.instance_id) return instances @@ -120,6 +119,12 @@ def copy_to_tplib(self): instance.copy_to_tplib(new_pattern_path) utils.copy_dir_content(self.pattern_path, new_pattern_path) + def get_instance_by_id(self, tpi_id: int) -> Instance: + try: + return list(filter(lambda tpi: tpi.instance_id == tpi_id, self.instances))[0] + except KeyError: + raise InstanceDoesNotExists(tpi_id, ) + def validate_for_measurement(self): pass diff --git a/tp_framework/core/pattern_operations.py b/tp_framework/core/pattern_operations.py index a9494e7..4bc17d3 100644 --- a/tp_framework/core/pattern_operations.py +++ b/tp_framework/core/pattern_operations.py @@ -39,10 +39,10 @@ async def start_add_measurement_for_pattern(language: str, sast_tools: list[Dict logger.warning( f"SAST measurement - failed in fetching instances for pattern {tp_id}. Pattern will be ignored. Exception raised: {utils.get_exception_message(e)}") return d_status_tp - + for instance in target_pattern.instances: try: - d_status_tp[target_pattern.pattern_id]: list[SASTjob] = await analysis.analyze_pattern_instance( + d_status_tp[instance.instance_id]: list[SASTjob] = await analysis.analyze_pattern_instance( instance, sast_tools, language, now, output_dir ) except Exception as e: @@ -62,12 +62,12 @@ async def save_measurement_for_patterns(language: str, now: datetime, d_tp_meas = meas_list_to_tp_dict(l_meas) for tp_id in d_tp_meas: + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_dir) for tpi_id in d_tp_meas[tp_id]: l_tpi_meas = [] for meas in d_tp_meas[tp_id][tpi_id]: - # meas.instance - tp_rel_dir = utils.get_pattern_dir_name_from_name(meas.instance.name, meas.instance.pattern_id) - tpi_rel_dir = utils.get_instance_dir_name_from_pattern(meas.instance.name, meas.instance.pattern_id, meas.instance.instance_id) + tp_rel_dir = target_pattern.pattern_path.name + tpi_rel_dir = meas.instance.instance_path.name meas_dir = utils.get_measurement_dir_for_language(tp_lib_dir, language) / tp_rel_dir / tpi_rel_dir meas_dir.mkdir(parents=True, exist_ok=True) d_tpi_meas_ext: Dict = meas.__dict__ @@ -79,4 +79,4 @@ async def save_measurement_for_patterns(language: str, now: datetime, l_tpi_meas.append(d_tpi_meas_ext) with open(meas_dir / utils.get_measurement_file(now), "w") as f_meas: - json.dump(l_tpi_meas, f_meas, indent=4) \ No newline at end of file + json.dump(l_tpi_meas, f_meas, indent=4) diff --git a/tp_framework/core/report_for_sast.py b/tp_framework/core/report_for_sast.py index d38bc5a..f2ec373 100644 --- a/tp_framework/core/report_for_sast.py +++ b/tp_framework/core/report_for_sast.py @@ -6,8 +6,10 @@ logger = logging.getLogger(loggermgr.logger_name(__name__)) import config -from core import utils, measurement, instance +from core import utils, measurement from core.exceptions import InstanceDoesNotExists, MeasurementNotFound +from core.pattern import Pattern +from core.instance import Instance def report_sast_measurement_for_pattern_list(tools: list[Dict], language: str, l_tp_id: list[int], @@ -19,11 +21,10 @@ def report_sast_measurement_for_pattern_list(tools: list[Dict], language: str, l utils.add_loggers(output_dir) results = [] for tp_id in l_tp_id: - l_tpi_path: list[Path] = utils.list_tpi_paths_by_tp_id( - language, tp_id, tp_lib_path - ) - for tpi_path in l_tpi_path: - tpi_id = utils.get_id_from_name(tpi_path.name) + target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + instance: Instance + for instance in target_pattern.instances: + tpi_id = instance.instance_id msgpre = f"{language} pattern {tp_id} instance {tpi_id} - " logger.info(f"{msgpre}Fetching last measurements...") for tool in tools: @@ -40,17 +41,15 @@ def report_sast_measurement_for_pattern_list(tools: list[Dict], language: str, l } try: meas: measurement.Measurement = measurement.load_last_measurement_for_tool( - tool, language, tp_lib_path, tp_id, tpi_id + tool, language, tp_lib_path, target_pattern, instance ) except InstanceDoesNotExists: row["results"] = "PATTERN_INSTANCE_DOES_NOT_EXIST" except MeasurementNotFound: - tpi: instance.Instance = instance.load_instance_from_json(tpi_path, tp_lib_path, language) - row["pattern_name"] = tpi.name + row["pattern_name"] = instance.name row["results"] = "NOT_FOUND" - row["negative_test_case"] = "YES" if tpi.properties_negative_test_case else "NO" - row["expectation"] = tpi.expectation - pass + row["negative_test_case"] = "YES" if instance.properties_negative_test_case else "NO" + row["expectation"] = instance.expectation_expectation if meas: row["pattern_name"] = meas.instance.name row["tool"] = f"{meas.tool}:{meas.version}" # rewrite `saas` occurrences with precise versions diff --git a/tp_framework/core/sast_job_runner.py b/tp_framework/core/sast_job_runner.py index 50fa758..24d6fb6 100644 --- a/tp_framework/core/sast_job_runner.py +++ b/tp_framework/core/sast_job_runner.py @@ -63,6 +63,9 @@ def set_extracted(self, value: bool=True): def set_measurement(self, meas: Measurement): self.measurement = meas + + def __str__(self) -> str: + return f'{self.tp_id} - {self.tpi_id}' def job_list_to_dict(l: list[SASTjob]) -> Dict: From a3125e14f4fd27b93d65a726e148795519fb2a5f Mon Sep 17 00:00:00 2001 From: felix-20 Date: Thu, 29 Jun 2023 14:20:43 +0200 Subject: [PATCH 07/16] tests for repair scripts --- qualitytests/core/test_instance.py | 135 +++++++ qualitytests/core/test_instanceR.py | 112 ------ qualitytests/core/test_instance_repair.py | 181 ++++++++++ qualitytests/core/test_instance_repair_php.py | 108 ++++++ qualitytests/core/test_pattern.py | 335 +++++++++--------- qualitytests/core/test_patternR.py | 105 ------ qualitytests/core/test_pattern_repair.py | 155 ++++++-- qualitytests/core/test_repair_tool.py | 198 +++++++++++ qualitytests/core/test_utils.py | 60 ---- qualitytests/qualitytests_utils.py | 124 +++++++ tp_framework/cli/interface.py | 12 +- tp_framework/cli/tpf_commands.py | 7 +- tp_framework/core/analysis.py | 3 +- tp_framework/core/discovery.py | 18 +- tp_framework/core/instance.py | 158 ++++++--- tp_framework/core/instance_repair.py | 196 ++++++++++ tp_framework/core/measurement.py | 6 +- tp_framework/core/pattern.py | 105 +++--- tp_framework/core/pattern_operations.py | 10 +- tp_framework/core/pattern_repair.py | 113 ++++-- tp_framework/core/repair_tool.py | 87 +++++ tp_framework/core/utils.py | 283 +++++++-------- 22 files changed, 1736 insertions(+), 775 deletions(-) create mode 100644 qualitytests/core/test_instance.py delete mode 100644 qualitytests/core/test_instanceR.py create mode 100644 qualitytests/core/test_instance_repair.py create mode 100644 qualitytests/core/test_instance_repair_php.py delete mode 100644 qualitytests/core/test_patternR.py create mode 100644 qualitytests/core/test_repair_tool.py create mode 100644 tp_framework/core/instance_repair.py create mode 100644 tp_framework/core/repair_tool.py diff --git a/qualitytests/core/test_instance.py b/qualitytests/core/test_instance.py new file mode 100644 index 0000000..e8c6bfc --- /dev/null +++ b/qualitytests/core/test_instance.py @@ -0,0 +1,135 @@ +import json +import pytest +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.instance import Instance +from core.exceptions import InstanceInvalid +from qualitytests.qualitytests_utils import join_resources_path, create_instance, example_instance_dict + + +class TestInstance: + sample_tp_lib: Path = join_resources_path("sample_patlib") + + invalid_instances = [ + # (Path("./test_instance.json"), False, {}, "The provided instance path 'test_instance.json' does not exist."), + (Path("./1_instance_test_pattern.json"), True, {}, "Could not get id from ''."), + # (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {}, "Pattern 1 - Instance 1 - Please check ") + ] + + @pytest.mark.parametrize("json_file_path, is_file_return, read_json_return, expected_error", invalid_instances) + def test_init_invalid_instance_from_json_path(self, + json_file_path: Path, + is_file_return: bool, + read_json_return: dict, + expected_error: str): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch('core.utils.read_json') as read_json_mock, \ + pytest.raises(InstanceInvalid) as e_info: + is_file_mock.return_value = is_file_return + read_json_mock.return_value = read_json_return + Instance.init_from_json_path(json_file_path, 1, "js", TestInstance.sample_tp_lib) + is_file_mock.assert_called_once() + assert expected_error in str(e_info.value) + + def test_init_valid_instance_from_json_path(self): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = example_instance_dict + test_instance = Instance.init_from_json_path(Path("/1_instance_test_pattern/1_instance_test_pattern.json"), 1, "js", TestInstance.sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + assert Path("/1_instance_test_pattern/") == test_instance.path + assert Path("/1_instance_test_pattern/1_instance_test_pattern.json") == test_instance.json_path + assert 1 == test_instance.instance_id + assert Path("/1_instance_test_pattern/", "") == test_instance.code_path + assert "Some description" == test_instance.description + assert test_instance.code_injection_skeleton_broken + assert "xss" == test_instance.expectation_type + assert Path("/1_instance_test_pattern/", "") == test_instance.expectation_sink_file + assert 5 == test_instance.expectation_sink_line + assert Path("/1_instance_test_pattern/", "") == test_instance.expectation_source_file + assert 9 == test_instance.expectation_source_line + assert test_instance.expectation_expectation + assert None == test_instance.compile_binary + assert test_instance.compile_instruction is None + assert test_instance.compile_dependencies is None + assert Path("/1_instance_test_pattern/", "") == test_instance.discovery_rule + assert "joern" == test_instance.discovery_method + assert "Perfect" == test_instance.discovery_rule_accuracy + assert "Some notes" == test_instance.discovery_notes + assert "S0" == test_instance.properties_category + assert "FEATURE" == test_instance.properties_feature_vs_internal_api + assert not test_instance.properties_input_sanitizer + assert not test_instance.properties_source_and_sink + assert not test_instance.properties_negative_test_case + assert "./docs/remediation_notes.md" == test_instance.remediation_notes + assert test_instance.remediation_transformation is None + assert test_instance.remediation_modeling_rule is None + + def test_copy_to_tp_lib(self): + test_instance = create_instance() + with patch("pathlib.Path.mkdir") as mkdir_mock, \ + patch("core.utils.copy_dir_content") as copy_mock: + + new_tp_lib_path = Path("/test_path") + old_path = test_instance.path + test_instance.copy_to_tplib(new_tp_lib_path) + + mkdir_mock.assert_called_once() + expected_new_instance_path = new_tp_lib_path / old_path.name + copy_mock.assert_called_once_with(old_path, expected_new_instance_path) + assert expected_new_instance_path == test_instance.path + + def test_set_new_instance_path(self): + test_instance = create_instance() + new_path = Path("/test_path") + with patch("shutil.move") as move_mock: + test_instance.set_new_instance_path(new_path) + move_mock.assert_called_once() + assert new_path == test_instance.path + + def test_to_dict(self): + test_instance = create_instance() + with patch("core.utils.get_relative_paths") as rel_path_mock: + rel_path_mock.return_value = None + actual = test_instance.to_dict() + path_to_instance_json = test_instance.json_path + with open(path_to_instance_json, "r") as jfile: + expected = json.load(jfile) + expected["code"]["path"] = None + expected["discovery"]["rule"] = None + expected["compile"]["binary"] = None + expected["expectation"]["sink_file"] = None + expected["description"] = None + expected["expectation"]["source_file"] = None + assert expected == actual + + def test_get_description_from_file(self): + test_pattern = create_instance() + test_pattern.description = "not None" + expected_description = "Some description in a file\nTest description.\n\n" + with patch("builtins.open", mock_open(read_data=expected_description), create=True), \ + patch("pathlib.Path.is_file") as isfile_mock: + + isfile_mock.return_value = True + + is_file, actual = test_pattern.get_description() + assert is_file + assert expected_description.strip() == actual + + def test_get_description_(self): + test_pattern = create_instance() + expected_description = "Some description in a file\nTest description." + test_pattern.description = expected_description + with patch("pathlib.Path.is_file") as isfile_mock: + isfile_mock.return_value = False + + is_file, actual = test_pattern.get_description() + assert not is_file + assert expected_description.strip() == actual \ No newline at end of file diff --git a/qualitytests/core/test_instanceR.py b/qualitytests/core/test_instanceR.py deleted file mode 100644 index 2893030..0000000 --- a/qualitytests/core/test_instanceR.py +++ /dev/null @@ -1,112 +0,0 @@ -import pytest -from pathlib import Path -from unittest.mock import patch - -from core.instance import Instance -from core.exceptions import PatternDoesNotExists, InstanceInvalid -from qualitytests.qualitytests_utils import join_resources_path - -class mockPattern: - def __init__(self) -> None: - self.pattern_id = 1 - -class TestInstance: - sample_tp_lib: Path = join_resources_path("sample_patlib") - - example_instance_dict = { - "code": { - "path": "", - "injection_skeleton_broken": True - }, - "discovery": { - "rule": "", - "method": "joern", - "rule_accuracy": "Perfect", - "notes": "Some notes" - }, - "remediation": { - "notes": "./docs/remediation_notes.md", - "transformation": None, - "modeling_rule": None - }, - "compile": { - "binary": "", - "dependencies": None, - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "", - "sink_line": 5, - "source_file": "", - "source_line": 9, - "expectation": True - }, - "properties": { - "category": "S0", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - } - } - - invalid_instances = [ - (Path("./test_instance.json"), False, {}, "The provided instance path 'test_instance.json' does not exist."), - (Path("./1_instance_test_pattern.json"), True, {}, "Could not get id from ''."), - (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {}, "Pattern 1 - Instance 1 - Please check 1_instance_test_pattern/1_instance_test_pattern.json."), - (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {"name": "instance"}, "Pattern 1 - Instance 1 - 'code:path' must be contained in instance json.") - ] - - @pytest.mark.parametrize("json_file_path, is_file_return, read_json_return, expected_error", invalid_instances) - def test_init_invalid_instance_from_json_path(self, - json_file_path: Path, - is_file_return: bool, - read_json_return: dict, - expected_error: str): - with patch("pathlib.Path.is_file") as is_file_mock, \ - patch('core.utils.read_json') as read_json_mock, \ - pytest.raises(InstanceInvalid) as e_info: - is_file_mock.return_value = is_file_return - read_json_mock.return_value = read_json_return - Instance.init_from_json_path(json_file_path, mockPattern()) - is_file_mock.assert_called_once() - assert f"{expected_error} Instance is invalid." == str(e_info.value) - - def test_init_valid_instance_from_json_path(self): - with patch('core.utils.read_json') as read_json_mock, \ - patch('pathlib.Path.is_file') as is_file_mock: - - is_file_mock.return_value = True - read_json_mock.return_value = TestInstance.example_instance_dict - test_instance = Instance.init_from_json_path(Path("./1_instance_test_pattern/1_instance_test_pattern.json"), mockPattern()) - - read_json_mock.assert_called_once() - is_file_mock.assert_called_once() - assert Path("./1_instance_test_pattern/") == test_instance.instance_path - assert Path("./1_instance_test_pattern/1_instance_test_pattern.json") == test_instance.instance_json_path - assert 1 == test_instance.instance_id - assert "" == test_instance.code_path - assert test_instance.description is None - assert test_instance.code_injection_skeleton_broken - assert "xss" == test_instance.expectation_type - assert "" == test_instance.expectation_sink_file - assert 5 == test_instance.expectation_sink_line - assert "" == test_instance.expectation_source_file - assert 9 == test_instance.expectation_source_line - assert test_instance.expectation_expectation - assert "" == test_instance.compile_binary - assert test_instance.compile_instruction is None - assert test_instance.compile_dependencies is None - assert "" == test_instance.discovery_rule - assert "joern" == test_instance.discovery_method - assert "Perfect" == test_instance.discovery_rule_accuracy - assert "Some notes" == test_instance.discovery_notes - assert "S0" == test_instance.properties_category - assert "FEATURE" == test_instance.properties_feature_vs_internal_api - assert not test_instance.properties_input_sanitizer - assert not test_instance.properties_source_and_sink - assert not test_instance.properties_negative_test_case - assert "./docs/remediation_notes.md" == test_instance.remediation_notes - assert test_instance.remediation_transformation is None - assert test_instance.remediation_modeling_rule is None \ No newline at end of file diff --git a/qualitytests/core/test_instance_repair.py b/qualitytests/core/test_instance_repair.py new file mode 100644 index 0000000..85507be --- /dev/null +++ b/qualitytests/core/test_instance_repair.py @@ -0,0 +1,181 @@ +import pytest +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.instance_repair import InstanceRepair +from core.exceptions import PatternRepairError +from qualitytests.qualitytests_utils import create_instance, create_pattern + +class TestInstanceRepair: + template_json_dict = { + "description": "", + "code": { + "path": "./pattern_src_code.js|php|java", + "injection_skeleton_broken": False + }, + "expectation": { + "type": "xss", + "sink_file": "./pattern_src_code.js|php|java", + "sink_line": 0, + "source_file": "./pattern_src_code.js|php|java", + "source_line": 0, + "expectation": True + }, + "compile": { + "binary": None, + "instruction": None, + "dependencies": None + }, + "discovery": { + "rule": "./pattern_discovery_rule.sc", + "method": "joern", + "rule_accuracy": "FN|FP|FPFN|Perfect", + "notes": None + }, + "properties": { + "category": "S0|D1|D2|D3", + "feature_vs_internal_api": "FEATURE", + "input_sanitizer": False, + "source_and_sink": False, + "negative_test_case": False + }, + "remediation": { + "notes": "", + "transformation": None, + "modeling_rule": None + } + } + def _get_instance_repair(self) -> InstanceRepair: + test_instance = create_instance() + test_pattern = create_pattern() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.instance_repair.globals") as global_mock: + is_file_mock.return_value = True + read_json_mock.return_value = TestInstanceRepair.template_json_dict + + repair_tool = InstanceRepair(test_instance, test_pattern) + + global_mock.assert_called_once() + read_json_mock.assert_called_once() + is_file_mock.assert_called_once() + return repair_tool + + def test_init_instance_repair_with_wrong_language(self): + test_instance = create_instance() + test_instance.language = "TEST" + test_pattern = create_pattern() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.instance_repair.logger.error") as logger_error, \ + pytest.raises(PatternRepairError) as e_info: + + is_file_mock.return_value = True + InstanceRepair(test_instance, test_pattern) + is_file_mock.assert_called_once() + read_json_mock.assert_called_once() + logger_error.assert_called_once_with("InstanceRepairTEST could not be found, maybe it is not imported?") + assert "Could not instantiate language specific instance repair" in str(e_info) + + example_rule = """@main def main(name : String): Unit = { + importCpg(name) + // TODO: replace line below with your detection query + val x2 = (name, "ID_pattern_name_i1", cpg.method.l); + println(x2) + delete; + }\n\n + """ + + discovery_rule_test_cases = [ + # one instance, remove "delete;" from scala rule and test if the right warn log message is exposed + ([None], "", None, "delete;", 'Could not find "delete;" in'), + # one instance, remove 2 and test if the right warn log message is provided + ([None], "", None, "2", 'Could not find the pattern id in'), + # one instance, dr is not in instance directory (as in samplepatlib) + ([None], 'Changed lines in Scala rule for instance JS - p1:1:\n[\'val x1 = (name, "1_unset_element_array_iall", cpg.method.l);\', \'println(x1)\']', None, "", ""), + # two instances, dr is not in instance directory (as in samplepatlib) + ([None, None], 'Changed lines in Scala rule for instance JS - p1:1:\n[\'val x1 = (name, "1_unset_element_array_iall", cpg.method.l);\', \'println(x1)\']', None, "", ""), + # two instance, dr is in instance directory + ([None, None], 'Changed lines in Scala rule for instance JS - p1:1:\n[\'val x1 = (name, "1_unset_element_array_i1", cpg.method.l);\', \'println(x1)\']', Path("dr_rule.sc"), "", "") + ] + + @pytest.mark.parametrize("instances, expected_info, dr_rule_path, dr_rule_replace, warn_logger_msg", discovery_rule_test_cases) + def test_adjust_variable_number_in_discovery_works(self, instances, expected_info, dr_rule_path, dr_rule_replace, warn_logger_msg): + test_instance_repair = self._get_instance_repair() + + test_instance_repair.pattern.instances = instances + test_instance_repair.to_repair.path = Path("/1_unset_element_array/1_instance_1_unset_element_array") + test_instance_repair.pattern.path = Path("/1_unset_element_array") + if dr_rule_path: + test_instance_repair.to_repair.discovery_rule = dr_rule_path + dr_rule = TestInstanceRepair.example_rule.replace(dr_rule_replace, "") + with patch("builtins.open", mock_open(read_data=dr_rule), create=True), \ + patch("core.instance_repair.logger.info") as info_logger, \ + patch("core.instance_repair.logger.warning") as warn_logger: + test_instance_repair._adjust_variable_number_in_discovery_rule() + + if dr_rule_replace: + warn_logger.assert_called_once_with(f"{warn_logger_msg} {test_instance_repair.to_repair.discovery_rule}") + info_logger.assert_not_called() + else: + info_logger.assert_called_once_with(expected_info) + + def test_check_rule_accuracy_given(self): + test_instance_repair = self._get_instance_repair() + + test_instance_repair.to_repair.discovery_rule_accuracy = "FP" + with patch("core.instance_repair.logger.warning") as warn_logger: + test_instance_repair._check_rule_accuracy() + warn_logger.assert_not_called() + + test_instance_repair.to_repair.discovery_rule_accuracy = "" + with patch("core.instance_repair.logger.warning") as warn_logger: + test_instance_repair._check_rule_accuracy() + warn_logger.assert_called_once_with("PatternRepair (JS - p1:1) Discovery rule given, but no rule accuracy.") + + repair_scala_rules_testcases = [ + # no discovery rule given + (None, True, "PatternRepair (JS - p1:1) Could not find rule for JS - p1:1, skipping...", None), + # discovery rule, but it is not a file + (Path("discovery_rule.sc"), False, "PatternRepair (JS - p1:1) Could not find rule for JS - p1:1, skipping...", None), + # discovery_rule, but has wrong suffix + (Path("discovery_rule.py"), True, None, "PatternRepair (JS - p1:1) Found a rule, but it is no scala rule, don't know how to repair this, skipping..."), + # everything is alright + (Path("discovery_rule.sc"), True, None, None), + ] + + @pytest.mark.parametrize("dr_rule, is_file_return, warn, info", repair_scala_rules_testcases) + def test_repair_scala_rule(self, dr_rule, is_file_return, warn, info): + test_instance_repair = self._get_instance_repair() + test_instance_repair.to_repair.discovery_rule = dr_rule + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.instance_repair.InstanceRepair._adjust_variable_number_in_discovery_rule") as adjust_mock, \ + patch("core.instance_repair.InstanceRepair._check_rule_accuracy") as check_rule_mock, \ + patch("core.instance_repair.logger.warning") as logger_warn_mock, \ + patch("core.instance_repair.logger.info") as logger_info_mock: + is_file_mock.return_value = is_file_return + + test_instance_repair._repair_scala_rule() + + if warn: + logger_warn_mock.assert_called_once_with(warn) + logger_info_mock.assert_not_called() + if info: + logger_info_mock.assert_called_once_with(info) + logger_warn_mock.assert_not_called() + if not warn and not info: + logger_info_mock.assert_not_called() + logger_warn_mock.assert_not_called() + + check_rule_mock.assert_called_once() + adjust_mock.assert_called_once() + + def test_repair(self): + test_instance_repair = self._get_instance_repair() + with patch("core.instance_repair.InstanceRepair._ensure_json_file_exists") as func1_mock, \ + patch("core.instance_repair.InstanceRepair._repair_scala_rule") as func2_mock, \ + patch("core.instance_repair.RepairTool.to_json") as func3_mock: + test_instance_repair.repair() + func1_mock.assert_called_once() + func2_mock.assert_called_once() + func3_mock.assert_called_once() diff --git a/qualitytests/core/test_instance_repair_php.py b/qualitytests/core/test_instance_repair_php.py new file mode 100644 index 0000000..403f4a4 --- /dev/null +++ b/qualitytests/core/test_instance_repair_php.py @@ -0,0 +1,108 @@ +import pytest +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.instance_repair import InstanceRepairPHP +from qualitytests.qualitytests_utils import create_instance, create_pattern, create_instance_php, join_resources_path + +class TestInstanceRepairPHP: + + def _get_instance_repair(self): + test_instance = create_instance_php() + return InstanceRepairPHP(test_instance) + + def test_get_source_and_sink_for_file(self): + test_instance_php_repair = self._get_instance_repair() + code = """ {expected} 1>/dev/null") + mask_line_mock.assert_called_once() + + def test_repair_opcode(self): + test_instance_php_repair = self._get_instance_repair() + with patch("core.instance_repair.InstanceRepairPHP._remove_bash_files") as bash_file_remove_mock, \ + patch("core.instance_repair.InstanceRepairPHP._make_opcode_from_php_file") as make_opcode_mock, \ + patch("core.utils.list_files") as list_files_mock: + + list_files_mock.return_value = ["file1"] + + test_instance_php_repair._repair_opcode() + bash_file_remove_mock.assert_called_once() + make_opcode_mock.assert_called_once() + list_files_mock.assert_called() + + repair_source_sink_testcases = [ + ((None, None), True, 99, 99), + ((1, None), True, 1, 99), + ((None, 1), True, 99, 1), + ((42, 24), False, 42, 24) + ] + + @pytest.mark.parametrize("source_sink_ret, warning, exp_source, exp_sink", repair_source_sink_testcases) + def test_repair_source_line_sink_line(self, source_sink_ret, warning, exp_source, exp_sink): + test_instance_php_repair = self._get_instance_repair() + expected_file = test_instance_php_repair.instance.expectation_sink_file + test_instance_php_repair.instance.expectation_sink_line = 99 + test_instance_php_repair.instance.expectation_source_line = 99 + with patch("core.instance_repair.InstanceRepairPHP._get_source_and_sink_for_file") as source_sink_mock, \ + patch("core.instance_repair.logger.warning") as warn_logger: + + source_sink_mock.return_value = source_sink_ret + test_instance_php_repair._repair_source_line_sink_line() + + source_sink_mock.assert_called_with(expected_file) + if warning: + warn_logger.assert_called() + + assert exp_source == test_instance_php_repair.instance.expectation_source_line + assert exp_sink == test_instance_php_repair.instance.expectation_sink_line + + diff --git a/qualitytests/core/test_pattern.py b/qualitytests/core/test_pattern.py index f7da7f4..0c093bd 100644 --- a/qualitytests/core/test_pattern.py +++ b/qualitytests/core/test_pattern.py @@ -1,174 +1,167 @@ -from typing import Dict - import pytest -import json +from copy import deepcopy from pathlib import Path +from unittest.mock import patch, mock_open + +from core.pattern import Pattern +from core.exceptions import PatternDoesNotExists, PatternInvalid, InstanceDoesNotExists +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, example_pattern_dict + +class TestPatternR: + sample_tp_lib: Path = join_resources_path("sample_patlib") + + example_pattern_dict = { + "name": "Test Pattern", + "description": "./docs/description.md", + "family": "test_pattern", + "tags": ["sast", "language"], + "instances": [ + "./1_instance_1_test_pattern/1_instance_1_test_pattern.json" + ] + } + + not_existing_patterns = [(1000, "php"), (1000, "js"), (1000, "java")] + invalid_patterns = [ + (3, "php", {}, "The pattern needs a valid JSON file."), + (3, "php", {"name": "test_instances_key_in_json_missing"}, "Pattern 3 (PHP) - Pattern JSON file needs an 'instances' key with valid relative links."), + (3, "php", {"instances": ["test_instances_invalid_relative_path"]}, "Pattern 3 (PHP) - The instance path 'test_instances_invalid_relative_path' is not valid.") + ] + valid_patterns = [ + (1, "php", example_pattern_dict), + (1, "js", example_pattern_dict) + ] + + valid_patterns_without_id = [ + (Path("path_to_json_file"), "php", Path("pattern_path"), 5), + (Path("path_to_json_file"), "js", Path("pattern_path"), 3) + ] + + @pytest.mark.parametrize("pattern_id, language", not_existing_patterns) + def test_not_exising_pattern_init_from_id_and_language(self, pattern_id: int, language: str): + with pytest.raises(PatternDoesNotExists) as e_info: + Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + assert f"Specified Pattern `{pattern_id}` does not exists." == str(e_info.value) + + @pytest.mark.parametrize("pattern_id, language, read_json_return, expected_assertion_error", invalid_patterns) + def test_init_invalid_pattern_from_id_and_language(self, + pattern_id: int, language: str, + read_json_return: dict, + expected_assertion_error: str): + with patch('core.utils.read_json') as read_json_mock, \ + pytest.raises(PatternInvalid) as e_info: + + read_json_mock.return_value = read_json_return + Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + + read_json_mock.assert_called_once() + assert f"{expected_assertion_error} Pattern is invalid." == str(e_info.value) + + @pytest.mark.parametrize("path_to_json, language, pattern_path, expected_id", valid_patterns_without_id) + def test_init_from_json_file_without_pattern_id(self, path_to_json: Path, language: str, pattern_path: Path, expected_id: int): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = TestPatternR.example_pattern_dict + pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPatternR.sample_tp_lib) + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + assert expected_id == pattern.pattern_id + assert path_to_json == pattern.json_path + assert pattern_path == pattern.path + assert language.upper() == pattern.language + + + @pytest.mark.parametrize("pattern_id, language, read_json_return", valid_patterns) + def test_init_valid_pattern_from_id_and_language(self, pattern_id: int, language: str, + read_json_return: dict): + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = read_json_return + test_pattern = Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + assert "Test Pattern" == test_pattern.name + assert "./docs/description.md" == test_pattern.description + assert "test_pattern" == test_pattern.family + assert ["sast", "language"] == test_pattern.tags + + copy_to_tp_lib_testcases = [(1, "1_unset_element_array"), (None, "1_1_unset_element_array")] + + @pytest.mark.parametrize("ret_pattern_id, expected_name", copy_to_tp_lib_testcases) + def test_copy_to_tp_lib(self, ret_pattern_id, expected_name): + test_pattern = create_pattern() + new_tplib_path = Path("/tp_lib") + with patch("core.instance.Instance.copy_to_tplib") as copy_instance_mock, \ + patch("core.utils.copy_dir_content") as copy_dir_mock, \ + patch("core.utils.get_id_from_name") as get_id_mock: + get_id_mock.return_value = ret_pattern_id + test_pattern.tp_lib_path = new_tplib_path + test_pattern.copy_to_tplib() + copy_instance_mock.assert_called_once() + copy_dir_mock.assert_called_once() + expected_pattern_path = new_tplib_path / "JS" / expected_name + assert expected_pattern_path == test_pattern.path + + def test_to_dict(self): + test_pattern = create_pattern() + with patch("core.utils.get_relative_paths") as rel_path_mock: + rel_path_mock.return_value = None + + actual = test_pattern.to_dict() + expected = deepcopy(example_pattern_dict) + expected["instances"] = [None] + assert expected == actual + + def test_get_instance_by_id(self): + test_pattern = create_pattern() + instance = test_pattern.get_instance_by_id(1) + assert test_pattern.instances[0] == instance + + with pytest.raises(InstanceDoesNotExists) as e_info: + test_pattern.get_instance_by_id(2) + assert "Specified Pattern Instance `2` does not exists." in str(e_info) + + def test_get_description_from_file(self): + test_pattern = create_pattern() + expected_description = "Some description in a file\nTest description.\n\n" + with patch("builtins.open", mock_open(read_data=expected_description), create=True), \ + patch("pathlib.Path.is_file") as isfile_mock: + + isfile_mock.return_value = True + + is_file, actual = test_pattern.get_description() + assert is_file + assert expected_description.strip() == actual + + def test_get_description_(self): + test_pattern = create_pattern() + expected_description = "Some description in a file\nTest description." + test_pattern.description = expected_description + with patch("pathlib.Path.is_file") as isfile_mock: + isfile_mock.return_value = False + + is_file, actual = test_pattern.get_description() + assert not is_file + assert expected_description.strip() == actual + -from core.exceptions import PatternDoesNotExists, PatternValueError -from core.pattern import Pattern, pattern_from_dict, get_pattern_path_by_pattern_id - - -def setup_three_pattern(tmp_path: Path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - p1 = tmp_tp_path / "1_pattern_one" - p2 = tmp_tp_path / "2_pattern_two" - p3 = tmp_tp_path / "3_pattern_three" - p1.mkdir() - p2.mkdir() - p3.mkdir() - - return language, tmp_tp_path, p1, p2, p3 - - -class TestPattern: - - def test_pattern_init_with_id(self): - pattern = Pattern("TestName", "PHP", [], "FAMILY", "TestDesc", [], 1) - assert pattern.pattern_id == 1 - assert pattern.name == "TestName" - assert pattern.description == "TestDesc" - assert pattern.family == "FAMILY" - assert len(pattern.tags) == 0 - assert len(pattern.instances) == 0 - - def test_pattern_init_without_id(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pattern = Pattern("TestName", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - assert pattern.pattern_id == 4 - assert pattern.name == "TestName" - assert pattern.description == "TestDesc" - assert pattern.family == "FAMILY" - assert len(pattern.tags) == 0 - assert len(pattern.instances) == 0 - - def test_pattern_init_without_id_and_empty_tp_library(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - pattern = Pattern("TestName", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - assert pattern.pattern_id == 1 - assert pattern.name == "TestName" - assert pattern.description == "TestDesc" - assert pattern.family == "FAMILY" - assert len(pattern.tags) == 0 - assert len(pattern.instances) == 0 - - def test_pattern_non_existing_language(self, tmp_path): - pattern: Pattern = Pattern("TestName", "JS", [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - assert pattern.pattern_id == 1 - - def test_get_pattern_path_by_pattern_id(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - assert p3 == get_pattern_path_by_pattern_id(language, 3, tmp_path) - - def test_get_pattern_path_by_pattern_id_non_exist(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - with pytest.raises(PatternDoesNotExists): - get_pattern_path_by_pattern_id(language, 5, tmp_path) - - # TODO: to be fixed - @pytest.mark.skip() - def test_add_pattern_to_tp_library(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - p1 = tmp_tp_path / "1_pattern_one" - p1.mkdir() - - pattern: Pattern = Pattern("Pattern Two", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - pattern.add_pattern_to_tp_library(language, tmp_path, tmp_path) - - expected_new_pattern_path: Path = tmp_tp_path / "2_pattern_two" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "2_pattern_two.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert pattern.name == pattern_from_tp_lib["name"] - assert pattern.description == pattern_from_tp_lib["definition"] - assert len(pattern.instances) == len(pattern_from_tp_lib["instances"]) - - # TODO: to be fixed - @pytest.mark.skip() - def test_add_pattern_to_tp_library_new_language(self, tmp_path): - language: str = "JS" - tmp_tp_path: Path = tmp_path / language - - pattern: Pattern = Pattern("Pattern One JS", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - pattern.add_pattern_to_tp_library(language, tmp_path) - - expected_new_pattern_path: Path = tmp_tp_path / "1_pattern_one_js" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "1_pattern_one_js.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert pattern.name == pattern_from_tp_lib["name"] - assert pattern.description == pattern_from_tp_lib["definition"] - assert len(pattern.instances) == len(pattern_from_tp_lib["instances"]) - - # TODO: to be fixed - @pytest.mark.skip() - def test_add_new_instance_reference(self, tmp_path): - language: str = "JS" - tmp_tp_path: Path = tmp_path / language - - pattern: Pattern = Pattern("Pattern One JS", language, [], "FAMILY", "TestDesc", [], pattern_dir=tmp_path) - pattern.add_pattern_to_tp_library(language, tmp_path) - - pattern.add_new_instance_reference(language, tmp_path, "./new_instance_test") - - expected_new_pattern_path: Path = tmp_tp_path / "1_pattern_one_js" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "1_pattern_one_js.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert ["./new_instance_test"] == pattern_from_tp_lib["instances"] - - - def test_pattern_from_dict(self): - pattern_dict: Dict = { - "name": "Try Catch Finally", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - pattern = pattern_from_dict(pattern_dict, "PHP", 1) - assert pattern.name == pattern_dict["name"] - assert pattern.pattern_id == 1 - assert pattern.language == "PHP" - assert pattern.instances == pattern_dict["instances"] - - - def test_pattern_from_dict_missing_non_mand_field(self): - pattern_dict: Dict = { - "name": "Try Catch Finally", - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - pattern = pattern_from_dict(pattern_dict, "PHP", 1) - assert pattern.name == pattern_dict["name"] - assert pattern.pattern_id == 1 - assert pattern.language == "PHP" - assert pattern.instances == pattern_dict["instances"] - - - def test_pattern_from_dict_missing_mandatory_field(self): - # name is a mandatory field - pattern_dict: Dict = { - # "name": "Try Catch Finally", - "description": "", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - with pytest.raises(PatternValueError): - pattern_from_dict(pattern_dict, "PHP", 1) diff --git a/qualitytests/core/test_patternR.py b/qualitytests/core/test_patternR.py deleted file mode 100644 index 60a52f1..0000000 --- a/qualitytests/core/test_patternR.py +++ /dev/null @@ -1,105 +0,0 @@ -import pytest -from pathlib import Path -from unittest.mock import patch - -from core.pattern import Pattern -from core.exceptions import PatternDoesNotExists, PatternInvalid -from qualitytests.qualitytests_utils import join_resources_path - -class TestPatternR: - sample_tp_lib: Path = join_resources_path("sample_patlib") - - example_pattern_dict = { - "name": "Test Pattern", - "description": "./docs/description.md", - "family": "test_pattern", - "tags": ["sast", "language"], - "instances": [ - "./1_instance_1_test_pattern/1_instance_1_test_pattern.json" - ] - } - - not_existing_patterns = [(1000, "php"), (1000, "js"), (1000, "java")] - invalid_patterns = [ - (3, "php", {}, "The pattern needs a valid JSON file."), - (3, "php", {"name": "test_instances_key_in_json_missing"}, "Pattern 3 (PHP) - Pattern JSON file needs an 'instances' key with valid relative links."), - (3, "php", {"instances": ["test_instances_invalid_relative_path"]}, "Pattern 3 (PHP) - The instance path 'test_instances_invalid_relative_path' is not valid.") - ] - valid_patterns = [ - (1, "php", example_pattern_dict), - (1, "js", example_pattern_dict) - ] - - valid_patterns_without_id = [ - (Path("path_to_json_file"), "php", Path("pattern_path"), 5), - (Path("path_to_json_file"), "js", Path("pattern_path"), 3) - ] - - @pytest.mark.parametrize("pattern_id, language", not_existing_patterns) - def test_not_exising_pattern_init_from_id_and_language(self, pattern_id: int, language: str): - with pytest.raises(PatternDoesNotExists) as e_info: - Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) - assert f"Specified Pattern `{pattern_id}` does not exists." == str(e_info.value) - - @pytest.mark.parametrize("pattern_id, language, read_json_return, expected_assertion_error", invalid_patterns) - def test_init_invalid_pattern_from_id_and_language(self, - pattern_id: int, language: str, - read_json_return: dict, - expected_assertion_error: str): - with patch('core.utils.read_json') as read_json_mock, \ - pytest.raises(PatternInvalid) as e_info: - - read_json_mock.return_value = read_json_return - Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) - - read_json_mock.assert_called_once() - assert f"{expected_assertion_error} Pattern is invalid." == str(e_info.value) - - @pytest.mark.parametrize("path_to_json, language, pattern_path, expected_id", valid_patterns_without_id) - def test_init_from_json_file_without_pattern_id(self, path_to_json: Path, language: str, pattern_path: Path, expected_id: int): - with patch('core.utils.read_json') as read_json_mock, \ - patch('pathlib.Path.is_file') as is_file_mock, \ - patch("pathlib.Path.is_dir") as is_dir_mock, \ - patch("core.pattern.isinstance") as isinstance_mock, \ - patch('core.instance.Instance.init_from_json_path') as instance_init_mock: - - is_dir_mock.return_value = True - is_file_mock.return_value = True - isinstance_mock.return_value = True - read_json_mock.return_value = TestPatternR.example_pattern_dict - pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPatternR.sample_tp_lib) - read_json_mock.assert_called_once() - is_file_mock.assert_called() - is_dir_mock.assert_called() - isinstance_mock.assert_called() - instance_init_mock.assert_called_once() - assert expected_id == pattern.pattern_id - assert path_to_json == pattern.pattern_json_path - assert pattern_path == pattern.pattern_path - assert language.upper() == pattern.language - - - @pytest.mark.parametrize("pattern_id, language, read_json_return", valid_patterns) - def test_init_valid_pattern_from_id_and_language(self, pattern_id: int, language: str, - read_json_return: dict): - with patch('core.utils.read_json') as read_json_mock, \ - patch('pathlib.Path.is_file') as is_file_mock, \ - patch("pathlib.Path.is_dir") as is_dir_mock, \ - patch("core.pattern.isinstance") as isinstance_mock, \ - patch('core.instance.Instance.init_from_json_path') as instance_init_mock: - - is_dir_mock.return_value = True - is_file_mock.return_value = True - isinstance_mock.return_value = True - read_json_mock.return_value = read_json_return - test_pattern = Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) - - read_json_mock.assert_called_once() - is_file_mock.assert_called() - is_dir_mock.assert_called() - isinstance_mock.assert_called() - instance_init_mock.assert_called_once() - assert "Test Pattern" == test_pattern.name - assert "./docs/description.md" == test_pattern.description - assert "test_pattern" == test_pattern.family - assert ["sast", "language"] == test_pattern.tags \ No newline at end of file diff --git a/qualitytests/core/test_pattern_repair.py b/qualitytests/core/test_pattern_repair.py index 4a45cc1..9894ff0 100644 --- a/qualitytests/core/test_pattern_repair.py +++ b/qualitytests/core/test_pattern_repair.py @@ -1,37 +1,144 @@ import pytest -from pathlib import Path from unittest.mock import patch -from core.pattern import Pattern from core.pattern_repair import PatternRepair -from qualitytests.qualitytests_utils import join_resources_path +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, create_instance2 +class TestPatternRepair: + template_json_dict = { + "name": "Pattern Name", + "description": "", + "family": "code_pattern_LANG", + "tags": ["sast", "LANG"], + "instances": [ + "./IID_instance_ID_pattern_name/IID_instance_ID_pattern_name.json" + ], + "version": "v0.draft" + } + def _get_pattern_repair(self) -> PatternRepair: + test_pattern = create_pattern() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock: + is_file_mock.return_value = True + read_json_mock.return_value = TestPatternRepair.template_json_dict -class MockedPattern: - def __init__(self) -> None: - self.tp_lib_path: Path = join_resources_path("sample_patlib") + repair_tool = PatternRepair(test_pattern) + read_json_mock.assert_called_once() + is_file_mock.assert_called_once() + return repair_tool -class TestPatternRepair: - mocked_pattern = MockedPattern() + def test_complete_instances_no_new_instance0(self): + test_repair_tool = self._get_pattern_repair() + base_path = test_repair_tool.to_repair.path + instance_path = test_repair_tool.to_repair.instances[0].path + with patch("core.utils.list_directories") as listdir_mock, \ + patch("core.utils.get_json_file") as get_json_file_mock, \ + patch("core.instance.Instance.init_from_json_path") as instance_mock, \ + patch("core.instance.Instance.set_new_instance_path") as i_set_instance_path_mock: - def test_repair_pattern_json(self): - with patch("pathlib.Path.is_file") as is_file_mock_init: - is_file_mock_init.return_value = True - pattern_repair = PatternRepair(TestPatternRepair.mocked_pattern) - - is_file_mock_init.assert_called_once() + listdir_mock.return_value = [instance_path] + test_repair_tool._complete_instances() + listdir_mock.assert_called_once_with(base_path) + get_json_file_mock.assert_not_called() + instance_mock.assert_not_called() + i_set_instance_path_mock.assert_not_called() - with patch("pathlib.Path.is_file") as is_file_mock, \ - patch("core.utils.get_pattern_json") as get_pattern_json_mock, \ - patch("shutil.copy") as copy_mock: - is_file_mock.return_value = False - get_pattern_json_mock.return_value = None + def test_complete_instances_no_new_instance1(self): + test_repair_tool = self._get_pattern_repair() + base_path = test_repair_tool.to_repair.path + instance_path = test_repair_tool.to_repair.instances[0].path + with patch("core.utils.list_directories") as listdir_mock, \ + patch("core.utils.get_json_file") as get_json_file_mock, \ + patch("core.instance.Instance.init_from_json_path") as instance_mock, \ + patch("core.instance.Instance.set_new_instance_path") as i_set_instance_path_mock: + + listdir_mock.return_value = [instance_path, base_path / "docs"] + get_json_file_mock.return_value = None + + test_repair_tool._complete_instances() + listdir_mock.assert_called_once_with(base_path) + get_json_file_mock.assert_called_once() + instance_mock.assert_not_called() + i_set_instance_path_mock.assert_not_called() + + def test_complete_instances_one_new_instance1(self): + sample_tp_lib = join_resources_path("sample_patlib") + test_repair_tool = self._get_pattern_repair() + test_instance = create_instance2() + base_path = test_repair_tool.to_repair.path + instance_path = test_repair_tool.to_repair.instances[0].path + with patch("core.utils.list_directories") as listdir_mock, \ + patch("core.utils.get_json_file") as get_json_file_mock, \ + patch("core.instance.Instance.init_from_json_path") as instance_mock, \ + patch("core.instance.Instance.set_new_instance_path") as i_set_instance_path_mock: + + listdir_mock.return_value = [instance_path, base_path / "2_instance_test_instance"] + get_json_file_mock.return_value = "some_path" + instance_mock.return_value = test_instance + + test_repair_tool._complete_instances() + listdir_mock.assert_called_once_with(base_path) + get_json_file_mock.assert_called_once() + instance_mock.assert_called_once_with("some_path", 1, "JS", sample_tp_lib) + i_set_instance_path_mock.assert_called_once_with(sample_tp_lib / "JS" / "2_uri" / "1_instance_1_unset_element_array") - pattern_repair.repair_pattern_json() + def test_repair_name(self): + test_repair_tool = self._get_pattern_repair() + test_repair_tool.to_repair.name = "Test" + test_repair_tool._repair_name() + assert "Unset Element Array" == test_repair_tool.to_repair.name + + repair_description_testcases = [ + ((True, ""), (True, ""), True, False, False), + ((True, "Some description in file"), (True, ""), False, False, False), + ((False, "Short description in JSON"), (False, ""), False, False, False), + ((False, "A"*141), (False, ""), False, True, True), + ((False, "A"*140), (False, ""), False, False, False), + ((False, "Same description"), (False, "Same description"), False, True, False) + ] + + @pytest.mark.parametrize("pattern_description_ret, instance_description_ret, should_warn, should_info, should_open", repair_description_testcases) + def test_repair_description(self, pattern_description_ret, instance_description_ret, should_warn, should_info, should_open): + test_repair_tool = self._get_pattern_repair() + + with patch("core.pattern.Pattern.get_description") as get_pattern_description_mock, \ + patch("core.instance.Instance.get_description") as get_instance_description_mock, \ + patch("core.pattern_repair.logger.warn") as warn_logger, \ + patch("core.pattern_repair.logger.info") as info_logger, \ + patch("pathlib.Path.mkdir") as mkdir_mock, \ + patch("builtins.open") as open_mock: + + get_pattern_description_mock.return_value = pattern_description_ret + get_instance_description_mock.return_value = instance_description_ret + + test_repair_tool._repair_description() + get_pattern_description_mock.assert_called_once() + get_instance_description_mock.assert_called() if not should_warn else get_instance_description_mock.assert_not_called() + open_mock.assert_called_once() if should_open else open_mock.assert_not_called() + mkdir_mock.assert_called_once() if should_open else mkdir_mock.assert_not_called() + warn_logger.assert_called_once() if should_warn else warn_logger.assert_not_called() + info_logger.assert_called() if should_info else info_logger.assert_not_called() + + def test_repair_tags(self): + test_repair_tool = self._get_pattern_repair() - is_file_mock.assert_called_once() - get_pattern_json_mock.assert_called_once() - copy_mock.assert_called_once() + test_repair_tool.to_repair.tags = [] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "LANG"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "js"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags + + test_repair_tool.to_repair.tags = ["sast", "JS"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags - \ No newline at end of file + test_repair_tool.to_repair.tags = ["sast", "Js"] + test_repair_tool._repair_tags() + assert ["JS", "sast"] == test_repair_tool.to_repair.tags diff --git a/qualitytests/core/test_repair_tool.py b/qualitytests/core/test_repair_tool.py new file mode 100644 index 0000000..8eb11fc --- /dev/null +++ b/qualitytests/core/test_repair_tool.py @@ -0,0 +1,198 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.pattern import Pattern +from core.repair_tool import RepairTool +from core.exceptions import PatternRepairError +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, create_instance + + +class TestRepairTool: + pattern = create_pattern() + tp_lib: Path = join_resources_path("sample_patlib") + template_json_dict = { + "name": "Pattern Name", + "description": "", + "family": "code_pattern_LANG", + "tags": ["sast", "LANG"], + "instances": [ + "./IID_instance_ID_pattern_name/IID_instance_ID_pattern_name.json" + ], + "version": "v0.draft" + } + + def test_init_pattern_repair1(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + pytest.raises(PatternRepairError) as e_info: + is_file_mock.return_value = False + + RepairTool(TestRepairTool.pattern, Path(".")) + is_file_mock.assert_called_once() + # logger.assert_called_once() + assert "PatternRepair (JS - p1) No template JSON found in" in str(e_info) + + def test_init_pattern_repair2(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + pytest.raises(PatternRepairError) as e_info: + is_file_mock.return_value = True + read_json_mock.return_value = {} + + RepairTool(TestRepairTool.pattern, Path(".")) + is_file_mock.assert_called_once() + read_json_mock.assert_called_once() + assert "PatternRepair (JS - p1) The template JSON" in str(e_info) and " is empty" in str(e_info) + + def test_copy_template(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair_tool.logger.info") as logger, \ + patch("shutil.copy") as copy_file_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + RepairTool(TestRepairTool.pattern, Path("."))._copy_template() + + logger.assert_called_once_with("PatternRepair (JS - p1) Copying template JSON.") + copy_file_mock.assert_called_once() + + ensure_json_file_exist_testcases = [ + (False, "test_pattern_path", {"name": "test"}, False, False), + (False, None, {"name": "test"}, True, False), + (True, "", {"name": "test"}, False, False), + (True, "", {"name": "test"}, False, True), + ] + + @pytest.mark.parametrize("is_file_mock_ret, get_pattern_json_ret, read_json_ret, should_call_copy, should_rename_json", ensure_json_file_exist_testcases) + def test_ensure_json_file_exists(self, is_file_mock_ret: bool, + get_pattern_json_ret: Path | None, + read_json_ret: dict | None, + should_call_copy: bool, + should_rename_json: bool): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair_tool.logger.info"), \ + patch("core.utils.get_json_file") as get_pattern_json_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.utils.write_json") as write_json_mock, \ + patch("shutil.copy") as copy_template_mock, \ + patch("shutil.move") as move_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool = RepairTool(TestRepairTool.pattern, Path(".")) + json_path = get_pattern_json_ret if get_pattern_json_ret else repair_tool.to_repair.json_path + is_file_mock.reset_mock() + is_file_mock.return_value = is_file_mock_ret + get_pattern_json_mock.return_value = get_pattern_json_ret + read_json_mock.return_value = read_json_ret + + if should_rename_json: + repair_tool.to_repair.json_path = repair_tool.to_repair.json_path.parent / "test_json.json" + json_path = repair_tool.to_repair.json_path.parent / "test_json.json" + + repair_tool._ensure_json_file_exists() + if should_call_copy: + copy_template_mock.assert_called_once() + if should_rename_json: + move_mock.assert_called_once() + else: + move_mock.assert_not_called() + is_file_mock.assert_called_once() + read_json_mock.assert_called_with(json_path) + write_json_mock.assert_called_once() + expected_dict = TestRepairTool.template_json_dict + expected_dict["name"] = "test" + assert expected_dict == write_json_mock.call_args.args[1] + + def test_to_json1(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.pattern.Pattern.to_dict") as to_dict_mock, \ + patch("core.utils.write_json") as write_json_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool = RepairTool(TestRepairTool.pattern, Path(".")) + + read_json_mock.reset_mock() + read_json_mock.return_value = {} + to_dict_mock.return_value = {} + repair_tool.to_json() + read_json_mock.assert_called_once() + to_dict_mock.assert_called_once() + write_json_mock.assert_not_called() + + def test_to_json2(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.pattern.Pattern.to_dict") as to_dict_mock, \ + patch("core.utils.write_json") as write_json_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool = RepairTool(TestRepairTool.pattern, Path(".")) + + read_json_mock.reset_mock() + read_json_mock.return_value = {"name": "test"} + to_dict_mock.return_value = {} + repair_tool.to_json() + read_json_mock.assert_called_once() + to_dict_mock.assert_called_once() + write_json_mock.assert_called_once() + + def test_check_paths_pattern_exist_all_correct(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair_tool.logger.warning") as warn_logger_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool_pattern = RepairTool(TestRepairTool.pattern, Path(".")) + + repair_tool_pattern._check_paths_exists() + warn_logger_mock.assert_not_called() + + def check_path_instance_exist_all_correct(self): + test_instance = create_instance() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair_tool.logger.warning") as warn_logger_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + + repair_tool_instance = RepairTool(test_instance, Path(".")) + + repair_tool_instance._check_paths_exists() + warn_logger_mock.assert_not_called() + + def check_path_instance_exist_non_correct(self): + test_instance = create_instance() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("pathlib.Path.exists") as exist_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.repair_tool.logger.warning") as warn_logger_mock: + + is_file_mock.return_value = True + read_json_mock.return_value = TestRepairTool.template_json_dict + exist_mock.return_value = False + + repair_tool_instance = RepairTool(test_instance, Path(".")) + + repair_tool_instance._check_paths_exists() + warn_logger_mock.assert_called() + assert test_instance.code_path is None + assert test_instance.expectation_sink_file is None + assert test_instance.expectation_source_file is None + assert test_instance.compile_binary is None + assert test_instance.discovery_rule is None + + + diff --git a/qualitytests/core/test_utils.py b/qualitytests/core/test_utils.py index 417da83..b790ee5 100644 --- a/qualitytests/core/test_utils.py +++ b/qualitytests/core/test_utils.py @@ -33,31 +33,6 @@ def test_check_tp_lib_2(self, tmp_path): utils.check_tp_lib(tmp_path) - def test_list_pattern_paths_for_language(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - path_list_expected = [p1, p2, p3] - - path_list = utils.list_pattern_paths_for_language(language, tmp_path) - assert sorted(path_list) == sorted(path_list_expected) - - - def test_list_pattern_paths_for_language_void_dir(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - - path_list_expected = [] - - path_list = utils.list_pattern_paths_for_language(language, tmp_path) - assert sorted(path_list) == sorted(path_list_expected) - - - def test_list_pattern_paths_for_non_existing_language(self, tmp_path): - language: str = "PHP" - with pytest.raises(LanguageTPLibDoesNotExist): - utils.list_pattern_paths_for_language(language, tmp_path) - - # TODO: to be fixed, misses the json file @pytest.mark.skip() def test_list_pattern_instances_by_pattern_id(self, tmp_path): @@ -93,22 +68,6 @@ def test_get_or_create_tp_lib_for_lang_existing_folder(self, tmp_path): path_tp_language_act = utils.get_or_create_language_dir(language, tmp_path) assert path_tp_language_exp.is_dir() == path_tp_language_act.is_dir() - - def test_get_or_create_pattern_dir_existing_lang_dir(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - path_pattern_exp = tmp_tp_path / "4_pattern_four" - path_pattern_act = utils.get_or_create_pattern_dir(language, 4, "Pattern Four", tmp_path) - assert path_pattern_exp.is_dir() == path_pattern_act.is_dir() - - - def test_get_or_create_pattern_dir_non_existing_lang_dir(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - path_pattern_exp = tmp_tp_path / "1_pattern_one" - path_pattern_act = utils.get_or_create_pattern_dir(language, 1, "Pattern One", tmp_path) - assert path_pattern_exp.is_dir() == path_pattern_act.is_dir() - - def test_get_last_measurement_for_pattern_instance(self, tmp_path): m1: Path = tmp_path / "measurement-2022-03-24_10-28-00.json" m2: Path = tmp_path / "measurement-2022-04-10_12-25-00.json" @@ -193,25 +152,6 @@ def test_get_pattern_dir_from_id(self): utils.get_pattern_dir_from_id(99, "PHP", tp_lib) - def test_get_instance_dir_from_id(self): - tp_path = qualitytests_utils.join_resources_path("sample_patlib") / "PHP" / "3_global_array" - assert utils.get_instance_dir_from_id(1, tp_path).name == "1_instance_3_global_array" - assert utils.get_instance_dir_from_id(2, tp_path).name == "2_instance_3_global_array" - with pytest.raises(Exception): - utils.get_instance_dir_from_id(3, tp_path) - - - def test_get_tpi_id_from_jsonpath(self): - jp = qualitytests_utils.join_resources_path( - "sample_patlib") / "PHP" / "3_global_array" / "1_instance_3_global_array" / "1_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 1 - jp = qualitytests_utils.join_resources_path( - "sample_patlib") / "PHP" / "3_global_array" / "1_instance_3_global_array" / "111_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 1 - jp = qualitytests_utils.join_resources_path( - "sample_patlib") / "PHP" / "3_global_array" / "2_instance_3_global_array" / "111_instance_3_global_array.json" - assert utils.get_tpi_id_from_jsonpath(jp) == 2 - next_free_pattern_id_test_cases = [ ([Path('1_instance_test_pattern'), Path('2_instance_test_pattern')], 3, 1), ([Path('1_instance_test_pattern'), Path('3_instance_test_pattern')], 2, 1), diff --git a/qualitytests/qualitytests_utils.py b/qualitytests/qualitytests_utils.py index e850519..65fccb0 100644 --- a/qualitytests/qualitytests_utils.py +++ b/qualitytests/qualitytests_utils.py @@ -2,6 +2,7 @@ import subprocess from pathlib import Path from typing import Dict +from unittest.mock import patch import shutil pyexe = sys.executable @@ -13,6 +14,55 @@ resource_path = "resources" cpg_binary_rel_path = "sample_joern/cpg_binary.bin" +example_instance_dict = { + "description": "Some description", + "code": { + "path": "", + "injection_skeleton_broken": True + }, + "discovery": { + "rule": "", + "method": "joern", + "rule_accuracy": "Perfect", + "notes": "Some notes" + }, + "remediation": { + "notes": "./docs/remediation_notes.md", + "transformation": None, + "modeling_rule": None + }, + "compile": { + "binary": "", + "dependencies": None, + "instruction": None + }, + "expectation": { + "type": "xss", + "sink_file": "", + "sink_line": 5, + "source_file": "", + "source_line": 9, + "expectation": True + }, + "properties": { + "category": "S0", + "feature_vs_internal_api": "FEATURE", + "input_sanitizer": False, + "source_and_sink": False, + "negative_test_case": False + } + } + +example_pattern_dict = { + "name": "Test Pattern", + "description": "./docs/description.md", + "family": "test_pattern", + "tags": ["sast", "language"], + "instances": [ + "./1_instance_1_test_pattern/1_instance_1_test_pattern.json" + ], + "version": "v0.draft" + } def join_resources_path(relativepath): dirname = Path(__file__).parent.resolve() @@ -100,3 +150,77 @@ def init_sastreport_test(init, mocker): # "tool_interface": "qualitytests.core.sast_test.SastTest" # } # mocker.patch("core.utils.load_sast_specific_config", return_value=mocked_tool_interface) + +def create_instance(): + from core.instance import Instance + sample_tp_lib = join_resources_path("sample_patlib") + with patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + # read_json_mock.return_value = example_instance_dict + json_path = sample_tp_lib / "JS" / "1_unset_element_array" / "1_instance_1_unset_element_array" / "1_instance_1_unset_element_array.json" + test_instance = Instance.init_from_json_path(json_path, 1, "js", sample_tp_lib) + + # read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + return test_instance + + +def create_instance2(): + from core.instance import Instance + sample_tp_lib = join_resources_path("sample_patlib") + with patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + # read_json_mock.return_value = example_instance_dict + json_path = sample_tp_lib / "JS" / "2_uri" / "1_instance_2_uri" / "1_instance_2_uri.json" + test_instance = Instance.init_from_json_path(json_path, 1, "js", sample_tp_lib) + + # read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + return test_instance + + +def create_instance_php(): + from core.instance import Instance + sample_tp_lib = join_resources_path("sample_patlib") + with patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock: + + is_file_mock.return_value = True + # read_json_mock.return_value = example_instance_dict + json_path = sample_tp_lib / "PHP" / "1_static_variables" / "1_instance_1_static_variables" / "1_instance_1_static_variables.json" + test_instance = Instance.init_from_json_path(json_path, 1, "php", sample_tp_lib) + + # read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + return test_instance + +def create_pattern(): + from core.pattern import Pattern + sample_tp_lib = join_resources_path("sample_patlib") + test_instance = create_instance() + with patch('core.utils.read_json') as read_json_mock, \ + patch('pathlib.Path.is_file') as is_file_mock, \ + patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.pattern.isinstance") as isinstance_mock, \ + patch('core.instance.Instance.init_from_json_path') as instance_init_mock: + + is_dir_mock.return_value = True + is_file_mock.return_value = True + isinstance_mock.return_value = True + read_json_mock.return_value = example_pattern_dict + instance_init_mock.return_value = test_instance + test_pattern = Pattern.init_from_id_and_language(1, "JS", sample_tp_lib) + + read_json_mock.assert_called_once() + is_file_mock.assert_called() + is_dir_mock.assert_called() + isinstance_mock.assert_called() + instance_init_mock.assert_called_once() + return test_pattern diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index 305ef8f..14e4371 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -24,7 +24,7 @@ def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict print(errors.patternFolderNotFound(pattern_dir_path)) return - pattern_json_path = Path(pattern_json) if pattern_json else utils.get_pattern_json(pattern_dir_path) + pattern_json_path = Path(pattern_json) if pattern_json else utils.get_json_file(pattern_dir_path) if not pattern_json_path: print(errors.patternDefaultJSONNotFound(pattern_dir)) return @@ -191,9 +191,15 @@ def repair_patterns(language: str, pattern_ids: list, output_dir.mkdir(exist_ok=True, parents=True) utils.add_loggers(output_dir) - # for pattern_id in pattern_ids: - # pattern = Pattern.init_from_id_and_language(pat) # (pattern_id, language, tp_lib_path) + for tp_id in pattern_ids: + try: + pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + except PatternInvalid as e: + print(f"Failed to init pattern: {tp_id} due to {e}") + continue + pattern.repair() # print(pattern) + # # pattern_path = get_pattern_path_by_pattern_id(language, pattern_id, tp_lib_path) # # PatternRepair( # # pattern_path, diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index bf3b4e6..1d8f741 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -591,8 +591,7 @@ def execute_command(self, args): language: str = args.language.upper() tp_lib_path: str = parse_tp_lib(args.tp_lib) l_pattern_id = sorted(parse_patterns(args.all_patterns, args.pattern_range, args.patterns, - tp_lib_path, - language)) + tp_lib_path, language, init_patterns=False)) output_dir: Path = parse_dir_or_file(args.output_dir) measurement_results: Path = parse_dir_or_file(args.measurement_dir, config.MEASUREMENT_REL_DIR, "Measurement directory") checkdiscoveryrules_results: Path = parse_dir_or_file(args.checkdiscoveryrules_file, "checkdiscoveryrules.csv", "Checkdiscoveryrules csv file") @@ -648,7 +647,7 @@ def parse_tool_list(tools: list[str]): exit(1) -def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path: Path, language: str): +def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path: Path, language: str, init_patterns: bool = True): # is this necessary? Should be ensured by `.add_mutually_exclusive_group(required=True)` in the parser try: assert sum(bool(e) for e in [all_patterns, pattern_range, patterns]) == 1 # these elements are in mutual exclusion @@ -680,7 +679,7 @@ def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path # return only the pattern_id, to be compatible with current implementation # TODO: refactor to use the Pattern instances instead of the ids return sorted([Pattern.init_from_id_and_language(idx, language, tp_lib_path).pattern_id \ - for idx in id_list]) + for idx in id_list]) if init_patterns else id_list def parse_dir_or_file(path_to_file_or_dir: str, diff --git a/tp_framework/core/analysis.py b/tp_framework/core/analysis.py index f0a079e..bea4965 100644 --- a/tp_framework/core/analysis.py +++ b/tp_framework/core/analysis.py @@ -7,7 +7,6 @@ from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -import core.utils from core import utils from core.instance import Instance from core.measurement import Measurement @@ -45,7 +44,7 @@ async def analyze_pattern_instance(instance: Instance, # TODO: what about using the sast_job object in the queue? InQueue().put_nowait((job_id, tool_name, tool_version, instance, date, - sast.launcher(instance.instance_path, language, output_dir, lib_dir=lib_dir, measurement=True))) + sast.launcher(instance.path, language, output_dir, lib_dir=lib_dir, measurement=True))) l_status_tpi.append(sast_job) except Exception as e: logger.warning(f"SAST measurement - failed for pattern {instance.pattern_id} instance {instance.instance_id} with tool {tool}. Instance will be ignored. Exception raised: {utils.get_exception_message(e)}") diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index 4fb5bcd..9e90fbb 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -19,7 +19,7 @@ from core.measurement import Measurement from core.instance import Instance #, instance_from_dict, load_instance_from_metadata -from core.pattern import get_pattern_by_pattern_id, Pattern +from core.pattern import Pattern # mand_finding_joern_keys = ["filename", "methodFullName", "lineNumber"] mand_finding_joern_keys = ["filename", "lineNumber"] @@ -327,7 +327,7 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito f"{msgpre}No measurements for this instance. {msgpost}") d_res_tpi[tpi.instance_id] = { "measurement": "not_found", - "jsonpath": tpi.instance_json_path + "jsonpath": tpi.json_path } continue l_last_meas = measurement.load_measurements(utils.get_last_measurement_for_pattern_instance(meas_tpi_path), @@ -339,14 +339,14 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito f"{msgpre}No measurements of the tools specified ({[t['name'] + ':' + t['version'] for t in tools]}) for the instance. {msgpost}") d_res_tpi[tpi.instance_id] = { "measurement": "not_found", - "jsonpath": tpi.instance_json_path + "jsonpath": tpi.json_path } continue tpi_instance = meas_tpi_by_tools[0].instance d_tpi = { "instance": tpi_instance, "measurement": "supported", - "jsonpath": tpi.instance_json_path, + "jsonpath": tpi.json_path, "discovery": {} } # discovery continue iff at least one tool not supporting the tpi @@ -363,7 +363,7 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito d_tpi["measurement"] = "not_supported" # discovery per tpi measurement_stop: bool = d_tpi["measurement"] not in ["ignore", "not_supported"] - d_tpi["discovery"] = discovery_for_tpi(tpi_instance, tpi.instance_json_path, cpg, disc_output_dir, + d_tpi["discovery"] = discovery_for_tpi(tpi_instance, tpi.json_path, cpg, disc_output_dir, measurement_stop=measurement_stop, already_executed=d_dr_executed) d_res_tpi[tpi.instance_id] = d_tpi d_res[tp_id]["instances"] = d_res_tpi @@ -390,7 +390,7 @@ def discovery_ignore_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, d_res_tpi = {} d_dr_executed = {} for instance in target_pattern.instances: - tpi_json_path = instance.instance_json_path + tpi_json_path = instance.json_path d_tpi = {"instance": instance, "measurement": "ignored", "jsonpath": tpi_json_path, "discovery": discovery_for_tpi(instance, tpi_json_path, cpg, disc_output_dir, measurement_stop=False, already_executed=d_dr_executed)} @@ -699,7 +699,7 @@ def get_check_discovery_rule_result(pattern: Pattern, instance: Instance | None= return { "pattern_id": pattern.pattern_id, "instance_id": instance.instance_id if instance else None, - "instance_path": instance.instance_path if instance else None, + "instance_path": instance.path if instance else None, "pattern_name": pattern.name, "language": pattern.language, "discovery_rule": instance.discovery_rule if instance else None, @@ -753,7 +753,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], err += 1 continue - target_src = instance.instance_path + target_src = instance.path build_name, disc_output_dir = utils.get_operation_build_name_and_dir( "check_discovery_rules", target_src, language, output_dir) @@ -781,7 +781,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], )) except Exception as e: logger.warning( - f"Something went wrong for the instance at {instance.instance_path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") + f"Something went wrong for the instance at {instance.path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance) results.append(res) err += 1 diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index b53ca6d..ffe261e 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -1,23 +1,28 @@ +import shutil from pathlib import Path +from typing import Tuple from core import utils from core.exceptions import InstanceInvalid +from core.instance_repair import InstanceRepair class Instance: @classmethod - def init_from_json_path(cls, path_to_instance_json: Path, pattern_id: int, language: str): + def init_from_json_path(cls, path_to_instance_json: Path, + pattern_id: int, language: str, tp_lib_path: Path): if not path_to_instance_json.is_file(): raise InstanceInvalid(f"The provided instance path '{path_to_instance_json}' does not exist.") - return cls._init_from_json(cls(), path_to_instance_json, pattern_id, language) + return cls._init_from_json(cls(), path_to_instance_json, pattern_id, language, tp_lib_path) def __init__(self) -> None: - self.instance_path = None - self.instance_json_path = None + self.path = None + self.json_path = None self.instance_id = None self.pattern_id = None self.language = None self.name = None self.pattern = None + self.tp_lib_path = None # JSON fields self.description = None @@ -49,72 +54,125 @@ def _assert_instance(self): try: int(self.instance_id) assert self.language.isupper() - assert self.instance_path.is_dir() - assert self.instance_json_path.is_file() - assert self.code_path.is_file() + assert self.path.is_dir() + assert self.json_path.is_file() except Exception as e: raise InstanceInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") - def _init_from_json(self, path_to_instance_json: Path, pattern_id: int, language: str): - self.instance_path = path_to_instance_json.parent - self.name = self.instance_path.name - self.instance_json_path = Path(path_to_instance_json.name) + def _init_from_json(self, path_to_instance_json: Path, pattern_id: int, language: str, tp_lib_path: Path): + self.path = path_to_instance_json.parent + self.name = self.path.name + self.json_path = Path(path_to_instance_json.name) self.language = language.upper() + self.tp_lib_path = tp_lib_path try: - self.instance_id = utils.get_id_from_name(self.instance_path.name) + self.instance_id = utils.get_id_from_name(self.path.name) except Exception as e: - raise InstanceInvalid(f"Could not get id from '{self.instance_path.name}'.") - + raise InstanceInvalid(f"Could not get id from '{self.path.name}'.") self.pattern_id = pattern_id - instance_properties = utils.read_json(self.instance_json_path) + instance_properties = utils.read_json(self.json_path) if not instance_properties: - raise InstanceInvalid(f"{self._log_prefix()}Please check {self.instance_json_path}.") + raise InstanceInvalid(f"{self._log_prefix()}Please check {self.json_path}.") self.description = instance_properties.get("description", None) - self.code_path = utils.get_path_or_none(instance_properties.get("code", {}).get("path", None)) - self.code_injection_skeleton_broken = instance_properties.get("code", {}).get("injection_skeleton_broken", None) - self.expectation_type = instance_properties.get("expectation", {}).get("type", None) - self.expectation_sink_file = utils.get_path_or_none(instance_properties.get("expectation", {}).get("sink_file", None)) - self.expectation_sink_line = instance_properties.get("expectation", {}).get("sink_line", None) - self.expectation_source_file = utils.get_path_or_none(instance_properties.get("expectation", {}).get("source_file", None)) - self.expectation_source_line = instance_properties.get("expectation", {}).get("source_line", None) - self.expectation_expectation = instance_properties.get("expectation", {}).get("expectation", None) - self.compile_binary = utils.get_path_or_none(instance_properties.get("compile", {}).get("binary", None)) - self.compile_instruction = instance_properties.get("compile", {}).get("instruction", None) - self.compile_dependencies = instance_properties.get("compile", {}).get("dependencies", None) - self.discovery_rule = utils.get_path_or_none(instance_properties.get("discovery", {}).get("rule", None)) - self.discovery_method = instance_properties.get("discovery", {}).get("method", None) - self.discovery_rule_accuracy = instance_properties.get("discovery", {}).get("rule_accuracy", None) - self.discovery_notes = instance_properties.get("discovery", {}).get("notes", None) - self.properties_category = instance_properties.get("properties", {}).get("category", None) - self.properties_feature_vs_internal_api = instance_properties.get("properties", {}).get("feature_vs_internal_api", None) - self.properties_input_sanitizer = instance_properties.get("properties", {}).get("input_sanitizer", None) - self.properties_source_and_sink = instance_properties.get("properties", {}).get("source_and_sink", None) - self.properties_negative_test_case = instance_properties.get("properties", {}).get("negative_test_case", None) - self.remediation_notes = instance_properties.get("remediation", {}).get("notes", None) - self.remediation_transformation = instance_properties.get("remediation", {}).get("transformation", None) - self.remediation_modeling_rule = instance_properties.get("remediation", {}).get("modeling_rule", None) + self.code_path = utils.get_path_or_none(utils.get_from_dict(instance_properties, "code", "path")) + self.code_injection_skeleton_broken = utils.get_from_dict(instance_properties, "code", "injection_skeleton_broken") + self.expectation_type = utils.get_from_dict(instance_properties, "expectation", "type") + self.expectation_sink_file = utils.get_path_or_none(utils.get_from_dict(instance_properties, "expectation", "sink_file")) + self.expectation_sink_line = utils.get_from_dict(instance_properties, "expectation", "sink_line") + self.expectation_source_file = utils.get_path_or_none(utils.get_from_dict(instance_properties, "expectation", "source_file")) + self.expectation_source_line = utils.get_from_dict(instance_properties, "expectation", "source_line") + self.expectation_expectation = utils.get_from_dict(instance_properties, "expectation", "expectation") + self.compile_binary = utils.get_path_or_none(utils.get_from_dict(instance_properties, "compile", "binary")) + self.compile_instruction = utils.get_from_dict(instance_properties, "compile", "instruction") + self.compile_dependencies = utils.get_from_dict(instance_properties, "compile", "dependencies") + self.discovery_rule = utils.get_path_or_none(utils.get_from_dict(instance_properties, "discovery", "rule")) + self.discovery_method = utils.get_from_dict(instance_properties, "discovery", "method") + self.discovery_rule_accuracy = utils.get_from_dict(instance_properties, "discovery", "rule_accuracy") + self.discovery_notes = utils.get_from_dict(instance_properties, "discovery", "notes") + self.properties_category = utils.get_from_dict(instance_properties, "properties", "category") + self.properties_feature_vs_internal_api = utils.get_from_dict(instance_properties, "properties", "feature_vs_internal_api") + self.properties_input_sanitizer = utils.get_from_dict(instance_properties, "properties", "input_sanitizer") + self.properties_source_and_sink = utils.get_from_dict(instance_properties, "properties", "source_and_sink") + self.properties_negative_test_case = utils.get_from_dict(instance_properties, "properties", "negative_test_case") + self.remediation_notes = utils.get_from_dict(instance_properties, "remediation", "notes") + self.remediation_transformation = utils.get_from_dict(instance_properties, "remediation", "transformation") + self.remediation_modeling_rule = utils.get_from_dict(instance_properties, "remediation", "modeling_rule") + self._assert_instance() return self def __getattribute__(self, name): - base_path = super().__getattribute__("instance_path") + base_path = super().__getattribute__("path") attr = super().__getattribute__(name) if isinstance(attr, Path) and attr != base_path: attr = Path(base_path / attr).resolve() return attr + + def __str__(self) -> str: + return f"{self.language} - p{self.pattern_id}:{self.instance_id}" def _log_prefix(self): return f"Pattern {self.pattern_id} - Instance {self.instance_id} - " - def _make_path(self, path_name: str): - return Path(self.instance_path / path_name).resolve() if path_name else None - - def __str__(self) -> str: - return f"Instance {self.instance_id} {self.name}" - def copy_to_tplib(self, pattern_path: Path): - new_instance_path = pattern_path / self.instance_path.name + new_instance_path = pattern_path / self.path.name new_instance_path.mkdir(parents=True, exist_ok=True) - utils.copy_dir_content(self.instance_path, new_instance_path) - self.instance_path = new_instance_path - self.name = self.instance_path.name + utils.copy_dir_content(self.path, new_instance_path) + self.path = new_instance_path + self.name = self.path.name + + # same function as in Pattern, could use some interface for that, or move to utils? + def get_description(self) -> Tuple[bool, str]: + if self.description and Path(self.path / self.description).resolve().is_file(): + with open(Path(self.path / self.description).resolve(), "r") as desc_file: + return True, "".join(desc_file.readlines()).strip() + else: + return False, self.description.strip() + + def set_new_instance_path(self, new_path): + old_path = self.path + self.path = new_path + shutil.move(old_path, self.path) + + def repair(self, pattern): + InstanceRepair(self, pattern).repair() + + def to_dict(self): + return { + "description": self.description, + "code": { + "path": utils.get_relative_paths(self.code_path, self.path), + "injection_skeleton_broken": self.code_injection_skeleton_broken + }, + "discovery": { + "rule": utils.get_relative_paths(self.discovery_rule, self.path), + "method": self.discovery_method, + "rule_accuracy": self.discovery_rule_accuracy, + "notes": self.discovery_notes + }, + "compile": { + "binary": utils.get_relative_paths(self.compile_binary, self.path), + "instruction": self.compile_instruction, + "dependencies": self.compile_dependencies + }, + "expectation": { + "type": self.expectation_type, + "sink_file": utils.get_relative_paths(self.expectation_sink_file, self.path), + "sink_line": self.expectation_sink_line, + "source_file": utils.get_relative_paths(self.expectation_source_file, self.path), + "source_line": self.expectation_source_line, + "expectation": self.expectation_expectation + }, + "properties": { + "category": self.properties_category, + "feature_vs_internal_api": self.properties_feature_vs_internal_api, + "input_sanitizer": self.properties_input_sanitizer, + "source_and_sink": self.properties_source_and_sink, + "negative_test_case": self.properties_negative_test_case + }, + "remediation": { + "notes": self.remediation_notes, + "transformation": self.remediation_transformation, + "modeling_rule": self.remediation_modeling_rule + } + } diff --git a/tp_framework/core/instance_repair.py b/tp_framework/core/instance_repair.py new file mode 100644 index 0000000..b01d110 --- /dev/null +++ b/tp_framework/core/instance_repair.py @@ -0,0 +1,196 @@ +import time +import os +import re +from pathlib import Path + +from core import utils +from core.exceptions import PatternRepairError + +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + +from core.repair_tool import RepairTool + +class InstanceRepair(RepairTool): + def __init__(self, instance, pattern: Path) -> None: + self.pattern = pattern + template = instance.tp_lib_path / "pattern_template" / "ID_pattern_name" / "IID_instance_ID_pattern_name" / "IID_instance_ID_pattern_name.json" + super().__init__(instance, template) + try: + self.instance_repair_class = globals()[f"InstanceRepair{self.to_repair.language}"] + except KeyError: + logger.error( + f"InstanceRepair{self.to_repair.language} could not be found, maybe it is not imported?" + ) + raise PatternRepairError("Could not instantiate language specific instance repair") + + def _adjust_variable_number_in_discovery_rule(self) -> None: + dr_path = self.to_repair.discovery_rule + with open(dr_path, "r") as fp: + result = fp.readlines() + + # assume, that a scala files end with + # println() + # delete; + try: + println_line = result[result.index(list(filter(lambda line: "delete;" in line, result))[0]) - 1] + except IndexError: + logger.warning(f'Could not find "delete;" in {dr_path}') + return + try: + real_number = re.search(r"println\(x(\d+)\)", println_line).group(1) + except AttributeError: + logger.warning(f"Could not find the pattern id in {dr_path}") + return + # determine the name for the rule in scala file + # if there is more than one instance, it should be _i + # if this rule is for multiple patterns, it should be _iall + #assert False, f"{len(self.pattern.instances)}\n{dr_path.parent}\n{self.pattern.path}" + rule_name = ( + f'{self.pattern.path.name}_i{self.to_repair.instance_id}' + if len(self.pattern.instances) > 1 and dr_path.parent != self.pattern.path + else f"{self.pattern.path.name}_iall" + ) + # make sure the number and the pattern name + new_rule = [] + for line in result: + new_line = line.replace(f"x{real_number}", f"x{self.pattern.pattern_id}") + new_rule += [ + re.sub( + f"({self.pattern.path.name}_i(\d+|all)|ID_pattern_name_i1)", + rule_name, + new_line, + ) + ] + + diff = [line for line in new_rule if line not in result] + # assert False, f"{new_rule}\n\n{result}\n\n{diff}" + if diff: + logger.info( + f"Changed lines in Scala rule for instance {self.to_repair}:\n{[line.strip() for line in diff]}" + ) + with open(dr_path, "w") as fp: + fp.writelines(new_rule) + + def _check_rule_accuracy(self): + if not self.to_repair.discovery_rule_accuracy: + logger.warning(f"{self._log_prefix()}Discovery rule given, but no rule accuracy.") + + def _repair_scala_rule(self): + if not self.to_repair.discovery_rule or not self.to_repair.discovery_rule.is_file(): + logger.warning(f"{self._log_prefix()}Could not find rule for {self.to_repair}, skipping...") + return + if not self.to_repair.discovery_rule.suffix == ".sc": + logger.info(f"{self._log_prefix()}Found a rule, but it is no scala rule, don't know how to repair this, skipping...") + return + self._adjust_variable_number_in_discovery_rule() + self._check_rule_accuracy() + + def repair(self): + # ensure JSON file exists + self._ensure_json_file_exists() + self._check_paths_exists() + # language specific repair instructions + self.instance_repair_class(self.to_repair).repair() + # repair scala rule if exists + self._repair_scala_rule() + # check description + if not self.to_repair.description: + logger.warning(f"{self._log_prefix()}No description provided for {self.to_repair}") + # check properties_negative_test_case vs expectation_expectation + if self.to_repair.expectation_expectation == self.to_repair.properties_negative_test_case: + logger.warning(f"{self._log_prefix()}Changing properites_negative_test_case, it has to be `not` expectation_expectation") + self.to_repair.properties_negative_test_case = not self.to_repair.expectation_expectation + + self.to_json() + + +class InstanceRepairPHP: + def __init__(self, instance_to_repair) -> None: + self.instance = instance_to_repair + + def _log_prefix(self): + return f"PatternRepair - PHPInstanceRepair {self.instance} " + + def _get_source_and_sink_for_file(self, path_to_file: Path) -> tuple: + if not path_to_file: + return (None, None) + with open(path_to_file, "r") as fp: + file_lines = fp.readlines() + sink = None + source = None + for idx, line in enumerate(file_lines): + if "// sink" in line: + sink = idx + 1 + if "// source" in line: + source = idx + 1 + return (source, sink) + + def _remove_bash_files(self): + all_bash_files = utils.list_files(self.instance.path, ".bash") + for file in all_bash_files: + file.unlink() + + def _mask_line(self, input_line: str, php_file: str) -> str: + if not php_file in input_line: + return input_line + line_prefix = input_line.split(os.sep)[0] + line_suffix = input_line[input_line.rfind(".php") + 4 :] + actual_filepath = Path(input_line.replace(line_prefix, "").replace(line_suffix, "")) + new_path = f"{os.sep}...{os.sep}{actual_filepath.relative_to(self.instance.path.parent.parent.parent)}" + return line_prefix + new_path + line_suffix + + def _make_opcode_from_php_file(self, php_file_path: Path) -> Path: + # define necessary paths + bash_file_path = php_file_path.parent / f"{php_file_path.stem}.bash" + + # opcache will only compile and cache files older than the script execution start (https://www.php.net/manual/en/function.opcache-compile-file.php) + # therefor we have to modify the time the php file was created + one_minute_ago = time.time() - 60 + os.utime(php_file_path, (one_minute_ago, one_minute_ago)) + + # Generate the bash file + os.system( + f"php -d zend_extension=opcache -d opcache.enable_cli=1 -d opcache.opt_debug_level=0x10000 --syntax-check {php_file_path} 2> {bash_file_path} 1>/dev/null" + ) + + # Sanitize the opcode: on some systems, there is an error included in the bash file + with open(bash_file_path, "r") as file: + result = file.readlines() + for idx, line in enumerate(result): + if line.startswith("$_main"): + result = result[max(idx - 1, 0) :] + break + # mask the path to file + final_lines = [self._mask_line(line, str(php_file_path)) for line in result] + with open(bash_file_path, "w") as file: + file.writelines(final_lines) + return Path(bash_file_path) + + def _repair_opcode(self): + # we are radical, remove all '.bash' file and generate new ones for the '.php' files + self._remove_bash_files() + all_php_files = utils.list_files(self.instance.path, ".php", True) + for file in all_php_files: + bash_file_path = self._make_opcode_from_php_file(file) + if not self.instance.compile_binary or not self.instance.compile_binary.is_file(): + self.instance.compile_binary = bash_file_path.relative_to(self.instance.path) + + all_bash_files = utils.list_files(self.instance.path, ".bash", recursive=True) + if len(all_bash_files) != len(all_php_files): + logger.warning(f"{self._log_prefix()}The number of php files and bash files missmatches.") + + def _repair_source_line_sink_line(self): + _, sink_line = self._get_source_and_sink_for_file(self.instance.expectation_sink_file) + source_line, _ = self._get_source_and_sink_for_file(self.instance.expectation_source_file) + if not sink_line: + logger.warning(f"{self._log_prefix()}Could not find '// sink' in sink file '{self.instance.expectation_sink_file}'") + if not source_line: + logger.warning(f"{self._log_prefix()}Could not find '// source' in source file '{self.instance.expectation_source_file}'") + self.instance.expectation_sink_line = sink_line if sink_line else self.instance.expectation_sink_line + self.instance.expectation_source_line = source_line if source_line else self.instance.expectation_source_line + + def repair(self): + self._repair_opcode() + self._repair_source_line_sink_line() \ No newline at end of file diff --git a/tp_framework/core/measurement.py b/tp_framework/core/measurement.py index 368cc81..df8de5e 100644 --- a/tp_framework/core/measurement.py +++ b/tp_framework/core/measurement.py @@ -92,7 +92,7 @@ def load_measurements(meas_file: Path, tp_lib: Path, language: str) -> list[Meas parsed_meas: list[Measurement] = [] for m in meas: instance_json_path = tp_lib / Path(m["instance"]) - instance = Instance.init_from_json_path(instance_json_path, None, language) + instance = Instance.init_from_json_path(instance_json_path, None, language, tp_lib) # NOTE 06/2023: if not expectation in measurement, then we take it from instance (backword compatibility though it could introduce mistakes if the instance expectation was changed after the measurement) expected_result = m["expected_result"] if "expected_result" in m.keys() else instance.expectation_expectation parsed_meas.append(Measurement( @@ -112,8 +112,8 @@ def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, patt # patterns and pattern instances. Make sure to factorize in function what needs to # and to generalize the approach as much as we can to rely the least possible on # the strict notation - pattern_dir_name: str = pattern.pattern_path.name - instance_dir_name: str = instance.instance_path.name + pattern_dir_name: str = pattern.path.name + instance_dir_name: str = instance.path.name # TODO: continue here # instance_dir: Path = pattern_dir / instance_dir_name # if not instance_dir.is_dir(): diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index 66e22c1..29bfd96 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -31,8 +31,8 @@ def __init__(self) -> None: self.language = None # TODO: needed? self.tp_lib_path = None # TODO needed? self.language = None - self.pattern_path = None - self.pattern_json_path = None + self.path = None + self.json_path = None # json fields self.name = None @@ -41,17 +41,17 @@ def __init__(self) -> None: self.tags = None self.version = None self.instances = [] - - # repairing tools - self.pattern_repair = None + + def __str__(self) -> str: + return f"{self.language} - p{self.pattern_id}" def _assert_pattern(self): try: assert int(self.pattern_id) assert self.language assert self.tp_lib_path.is_dir() - assert self.pattern_path.is_dir() - assert self.pattern_json_path.is_file() + assert self.path.is_dir() + assert self.json_path.is_file() assert self.instances and all([isinstance(instance, Instance) for instance in self.instances]) except Exception as e: raise PatternInvalid(f"{self._log_prefix()}Instance Variables are not properly set. '{e}'") @@ -60,24 +60,28 @@ def _init_from_id_and_language(self, id: int, language: str, tp_lib_path: Path): self.pattern_id = id self.language = language.upper() self.tp_lib_path = tp_lib_path - self.pattern_path = utils.get_pattern_dir_from_id(id, language, tp_lib_path) - self._init_from_json_file(utils.get_pattern_json(self.pattern_path)) + self.path = utils.get_pattern_dir_from_id(id, language, tp_lib_path) + self._init_from_json_file(utils.get_json_file(self.path)) self._assert_pattern() return self def _init_instances(self, instance_paths_from_json: list): instances = [] for instance_json in instance_paths_from_json: - abs_path = Path(self.pattern_path / Path(instance_json)) + abs_path = Path(self.path / Path(instance_json)) if not abs_path.is_file(): raise PatternInvalid(f"{self._log_prefix()}The instance path '{instance_json}' is not valid.") - instances += [Instance.init_from_json_path(abs_path, self.pattern_id, self.language)] - instances = sorted(instances, key=lambda instance: instance.instance_id) + try: + instances += [Instance.init_from_json_path(abs_path, self.pattern_id, self.language, self.tp_lib_path)] + except Exception as e: + raise PatternInvalid(f"{self._log_prefix()}Could not instantiate instance, due to '{e}'") return instances def _init_from_json_file(self, json_file_path: Path): - self.pattern_json_path = json_file_path - pattern_properties = utils.read_json(self.pattern_json_path) + if not json_file_path: + raise PatternInvalid(f"The provided JSON Path is not valid '{json_file_path}'") + self.json_path = json_file_path + pattern_properties = utils.read_json(self.json_path) if not pattern_properties: raise PatternInvalid("The pattern needs a valid JSON file.") self.name = pattern_properties["name"] if "name" in pattern_properties.keys() else None @@ -87,6 +91,7 @@ def _init_from_json_file(self, json_file_path: Path): self.version = pattern_properties["version"] if "version" in pattern_properties.keys() else None if "instances" in pattern_properties.keys() and pattern_properties["instances"]: self.instances = self._init_instances(pattern_properties["instances"]) + self._sort_instances() else: # Raise exception raise PatternInvalid(f"{self._log_prefix()}Pattern JSON file needs an 'instances' key with valid relative links.") @@ -94,11 +99,11 @@ def _init_from_json_file(self, json_file_path: Path): def _init_from_json_without_id(self, json_file_path: Path, language: str, pattern_path: Path, tp_lib_path: Path): self.language = language.upper() - self.pattern_path = pattern_path + self.path = pattern_path self.tp_lib_path = tp_lib_path self._init_from_json_file(json_file_path) try: - given_id = utils.get_id_from_name(self.pattern_path.name) + given_id = utils.get_id_from_name(self.path.name) except Exception: given_id = None free_id = utils.get_next_free_pattern_id_for_language(self.language, self.tp_lib_path, given_id) @@ -109,49 +114,47 @@ def _init_from_json_without_id(self, json_file_path: Path, language: str, patter def _log_prefix(self): return f"Pattern {self.pattern_id} ({self.language}) - " - def __str__(self) -> str: - return str(vars(self)) + def _sort_instances(self): + self.instances = sorted(self.instances, key=lambda instance: instance.instance_id) def copy_to_tplib(self): # copies the pattern and all its instances into the tp_lib - new_pattern_path = self.tp_lib_path / self.language / f'{self.pattern_id}_{self.pattern_path.name}' + # try to get the id from the name: + given_id = None + try: + given_id = utils.get_id_from_name(self.path.name) + except (KeyError, ValueError): + pass + # if the given id is not the id, the algorithm identified, give it a new id + pattern_name = f'{self.pattern_id}_{self.path.name}' if given_id != self.pattern_id else self.path.name + new_pattern_path = self.tp_lib_path / self.language / pattern_name for instance in self.instances: instance.copy_to_tplib(new_pattern_path) - utils.copy_dir_content(self.pattern_path, new_pattern_path) + utils.copy_dir_content(self.path, new_pattern_path) + self.path = new_pattern_path def get_instance_by_id(self, tpi_id: int) -> Instance: try: return list(filter(lambda tpi: tpi.instance_id == tpi_id, self.instances))[0] - except KeyError: - raise InstanceDoesNotExists(tpi_id, ) - - def validate_for_measurement(self): - pass - - def repair(self, soft: bool = False): - # soft repair enforces the instances structure (and names) and updates relative instance links in pattern JSON - self.pattern_repair = PatternRepair(self) - self.pattern_repair.repair_pattern_json() - if not soft: - pass - - -# TODO: These functions could be obsolete, if Pattern will be used in measure, discover etc. -def get_pattern_by_pattern_id(language: str, pattern_id: int, tp_lib_dir: Path) -> Tuple[Pattern, Path]: - pattern = Pattern.init_from_id_and_language(pattern_id, language, tp_lib_dir) - return pattern, pattern.pattern_path + except IndexError: + raise InstanceDoesNotExists(tpi_id, "") + def get_description(self) -> Tuple[bool, str]: + if self.description and Path(self.path / self.description).resolve().is_file(): + with open(Path(self.path / self.description).resolve(), "r") as desc_file: + return True, "".join(desc_file.readlines()).strip() + else: + return False, self.description.strip() -def list_tpi_paths_by_tp_id(language: str, pattern_id: int, tp_lib_dir: Path) -> list[Path]: - try: - pattern = Pattern.init_from_id_and_language(pattern_id, language, tp_lib_dir) - return [instance.instance_json_path for instance in pattern.instances] - except Exception as e: - logger.exception(e) - raise e -# try: -# pattern = Pattern. -# p, p_dir = pattern.get_pattern_by_pattern_id(language, pattern_id, tp_lib_dir) -# return list(map(lambda i: (tp_lib_dir / language / p_dir / i).resolve(), p.instances)) -# except: -# + def repair(self): + PatternRepair(self).repair(self) + + def to_dict(self): + return { + "name": self.name, + "description": self.description, + "family": self.family, + "tags": self.tags, + "instances": [utils.get_relative_paths(i.json_path, self.path) for i in self.instances], + "version": self.version + } diff --git a/tp_framework/core/pattern_operations.py b/tp_framework/core/pattern_operations.py index 4bc17d3..f9df85b 100644 --- a/tp_framework/core/pattern_operations.py +++ b/tp_framework/core/pattern_operations.py @@ -15,7 +15,7 @@ from core import utils, analysis from core.exceptions import PatternValueError from core.instance import Instance #, PatternCategory, FeatureVsInternalApi # , instance_from_dict -from core.pattern import Pattern, list_tpi_paths_by_tp_id, get_pattern_by_pattern_id +from core.pattern import Pattern from core.sast_job_runner import SASTjob, job_list_to_dict from core.measurement import meas_list_to_tp_dict @@ -26,7 +26,7 @@ def add_testability_pattern_to_lib_from_json(language: str, pattern_json: Path, pattern = Pattern.init_from_json_file_without_pattern_id(pattern_json, language, pattern_src_dir, pattern_lib_dest) # dump the pattern to the tplib pattern.copy_to_tplib() - logger.info(f"The pattern has been copied to {pattern.pattern_path}, You might need to adjust relative path links.") + logger.info(f"The pattern has been copied to {pattern.path}, You might need to adjust relative path links.") return pattern @@ -48,7 +48,7 @@ async def start_add_measurement_for_pattern(language: str, sast_tools: list[Dict except Exception as e: d_status_tp[target_pattern.pattern_id] = [] logger.warning( - f"SAST measurement - failed in preparing SAST jobs for instance at {instance.instance_path} of the pattern {tp_id}. Instance will be ignored. Exception raised: {utils.get_exception_message(e)}") + f"SAST measurement - failed in preparing SAST jobs for instance at {instance.path} of the pattern {tp_id}. Instance will be ignored. Exception raised: {utils.get_exception_message(e)}") continue return d_status_tp @@ -66,8 +66,8 @@ async def save_measurement_for_patterns(language: str, now: datetime, for tpi_id in d_tp_meas[tp_id]: l_tpi_meas = [] for meas in d_tp_meas[tp_id][tpi_id]: - tp_rel_dir = target_pattern.pattern_path.name - tpi_rel_dir = meas.instance.instance_path.name + tp_rel_dir = target_pattern.path.name + tpi_rel_dir = meas.instance.path.name meas_dir = utils.get_measurement_dir_for_language(tp_lib_dir, language) / tp_rel_dir / tpi_rel_dir meas_dir.mkdir(parents=True, exist_ok=True) d_tpi_meas_ext: Dict = meas.__dict__ diff --git a/tp_framework/core/pattern_repair.py b/tp_framework/core/pattern_repair.py index 0fb4057..a792570 100644 --- a/tp_framework/core/pattern_repair.py +++ b/tp_framework/core/pattern_repair.py @@ -1,31 +1,94 @@ -import shutil -from core.exceptions import PatternRepairError -from core import utils - +from pathlib import Path +from copy import deepcopy import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -class PatternRepair: + +from core.exceptions import InstanceInvalid +from core.instance import Instance +from core.repair_tool import RepairTool +from core import utils + +class PatternRepair(RepairTool): def __init__(self, pattern) -> None: - self.pattern_to_repair = pattern - self.pattern_json_template = pattern.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" - if not self.pattern_json_template.is_file(): - logger.warn(f"{self._log_prefix()}Expects a template JSON file in {self.pattern_json_template}") - raise PatternRepairError(f"No template JSON found in {self.pattern_json_template}") - - def _log_prefix(self): - return f"PatternRepair ({self.pattern_to_repair.pattern_id} - {self.pattern_to_repair.language}) " - - def repair_pattern_json_instances_paths(self): - # make sure there is a pattern JSON file - if not self.pattern_to_repair.pattern_json_path.is_file(): - self.pattern_json_path = utils.get_pattern_json() - if not self.pattern_json_path: - logger.info("Copying template JSON.") - expected_json_path = self.pattern_to_repair.pattern_path / f'{self.pattern_to_repair.name}.json' - shutil.copy(self.pattern_json_template, expected_json_path) - # make sure the instances are correct - for instance in self.pattern_to_repair.instances: - instance.repair() \ No newline at end of file + json_template = pattern.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" + super().__init__(pattern, json_template) + + def _complete_instances(self): + # list pattern directory and try to find all instances + potential_instances = utils.list_directories(self.to_repair.path) + actual_instances = [i.path for i in self.to_repair.instances] + + # potentially all dirs, that are in the symmetric_difference of potential_instances and actual_instances could be missing instances + missing_instances = set(potential_instances) ^ set(actual_instances) + for m_instance in missing_instances: + instance_json = utils.get_json_file(m_instance) + if instance_json: + # if there is a JSON file, try to instantiate an Instance from it + try: + new_instance = Instance.init_from_json_path(instance_json, self.to_repair.pattern_id, self.to_repair.language, self.to_repair.tp_lib_path) + except Exception: + logger.warn(f"Found potential instance JSON at {instance_json}, but cannot initialize instance.") + continue + self.to_repair.instances += [new_instance] + self.to_repair._sort_instances() + # check if instances are named after naming scheme {instance_id}_instance_{pattern_name} + for instance in self.to_repair.instances: + expected_name = f"{instance.instance_id}_instance_{self.to_repair.path.name}" + actual_name = instance.name + if expected_name != actual_name: + new_path = instance.path.parent / expected_name + instance.set_new_instance_path(new_path) + + def _repair_name(self): + self.to_repair.name = " ".join([w.title() for w in self.to_repair.path.name.split("_")[1:]]) + if not self.to_repair.name: + logger.warn(f"{self._log_prefix()}The name of this pattern is weird.") + + def _repair_description(self): + is_file, description = self.to_repair.get_description() + if not description: + logger.warn(f"{self._log_prefix()}Could not find description.") + return + + # check if description is in JSON and is longer than 140 symbols + if not is_file and len(description) > 140: + # description is a bit to long, put it into file + path_to_new_description_file = self.to_repair.path / "docs" / "description.md" + path_to_new_description_file.parent.mkdir(parents=True, exist_ok=True) + with open(path_to_new_description_file, "w") as desc_file: + desc_file.write(description) + logger.info(f"{self._log_prefix()}Moving description into ./docs/description.md") + self.to_repair.description = utils.get_relative_paths(path_to_new_description_file, self.to_repair.path) + + # check if instances have the same description + for instance in self.to_repair.instances: + if description == instance.get_description()[1].strip(): + logger.info(f"{self._log_prefix()}Instance description is the same as pattern description, removing instance description.") + instance.description = "" + + def _repair_tags(self): + if not self.to_repair.tags or set(self.to_repair.tags) == set(self.template_dict["tags"]): + # default tags have not been changed, or there are no tags, set default tags. + self.to_repair.tags = ["sast", self.to_repair.language] + self.to_repair.tags = [t.upper() if t.upper() == self.to_repair.language else t for t in self.to_repair.tags] + self.to_repair.tags = sorted(self.to_repair.tags, key=lambda x: x.lower()) + + def repair(self, pattern): + # make sure, that the JSON file exist + self._ensure_json_file_exists() + self._check_paths_exists() + # get all instances + self._complete_instances() + # repair instances + for instance in self.to_repair.instances: + instance.repair(pattern) + # fix name + self._repair_name() + self._repair_description() + self._repair_tags() + + # write to json + self.to_json() diff --git a/tp_framework/core/repair_tool.py b/tp_framework/core/repair_tool.py new file mode 100644 index 0000000..fed2263 --- /dev/null +++ b/tp_framework/core/repair_tool.py @@ -0,0 +1,87 @@ +import shutil +from pathlib import Path +from copy import deepcopy +from core.exceptions import PatternRepairError +from core import utils + + +import logging +from core import loggermgr +logger = logging.getLogger(loggermgr.logger_name(__name__)) + +class RepairTool: + def __init__(self, to_repair, template_json_file: Path) -> None: + self.to_repair = to_repair + self.json_template = template_json_file# to_repair.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" + if not self.json_template.is_file(): + raise PatternRepairError(f"{self._log_prefix()} No template JSON found in {self.json_template}") + try: + self.template_dict = utils.read_json(self.json_template) + except Exception: + raise PatternRepairError(f"{self._log_prefix()} The template JSON file {self.json_template} is corrupt, please check") + if not self.template_dict: + raise PatternRepairError(f"{self._log_prefix()} The template JSON {self.json_template} is empty") + + def _log_prefix(self): + return f"PatternRepair ({self.to_repair}) " + + def _copy_template(self): + logger.info(f"{self._log_prefix()}Copying template JSON.") + expected_json_path = self.to_repair.path / f'{self.to_repair.path.name}.json' + shutil.copy(self.json_template, expected_json_path) + self.to_repair.json_path = expected_json_path + return expected_json_path + + def _ensure_json_file_exists(self): + to_repair_json_path = self.to_repair.json_path + # check if json path is a file + if not to_repair_json_path.is_file(): + # try to get the file, if not possible copy the template + to_repair_json_path = utils.get_json_file() + if not to_repair_json_path: + to_repair_json_path = self._copy_template() + # read the given file to check if there are errors or keys missing + pattern_dict = {} + try: + org_pattern_dict = utils.read_json(to_repair_json_path) + except Exception: + self._copy_template() + org_pattern_dict = utils.read_json(self.to_repair.json_path) + + pattern_dict = deepcopy(org_pattern_dict) + # check for missing keys + missing_keys_in_pattern_dict = set(self.template_dict.keys()) - set(pattern_dict.keys()) + for key in missing_keys_in_pattern_dict: + pattern_dict[key] = self.template_dict[key] + + if pattern_dict != org_pattern_dict: + utils.write_json(self.to_repair.json_path, pattern_dict) + + # rename the JSON file to the expected format + expected_json_name = f"{self.to_repair.path.name}.json" + actual_name = self.to_repair.json_path.name + + logger.error(f"{expected_json_name}, {actual_name}") + if expected_json_name != actual_name: + new_path = self.to_repair.path / expected_json_name + shutil.move(self.to_repair.json_path, new_path) + self.to_repair.json_path = new_path + + def _check_paths_exists(self): + for k, v in vars(self.to_repair).items(): + if isinstance(v, Path): + attr = getattr(self.to_repair, k) + if not attr.exists(): + logger.warning(f"{self._log_prefix()}Could not find path {v}") + setattr(self.to_repair, k, None) + + + + def repair(self): + raise NotImplementedError() + + def to_json(self): + repaired_dict = self.to_repair.to_dict() + original_dict = utils.read_json(self.to_repair.json_path) + if repaired_dict != original_dict: + utils.write_json(self.to_repair.json_path, repaired_dict) \ No newline at end of file diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index 103510e..312b49c 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -24,31 +24,9 @@ from core import errors -def is_windows(): - return system() == "Windows" - - -def list_pattern_paths_for_language(language: str, tp_lib_dir: Path) -> list[Path]: - all_pattern_dirs_by_lang: Path = tp_lib_dir / language - if not all_pattern_dirs_by_lang.is_dir(): - raise LanguageTPLibDoesNotExist - return list_dirs_only(all_pattern_dirs_by_lang) - - -# TODO: reimplement -# def list_tpi_paths_by_tp_id(language: str, pattern_id: int, tp_lib_dir: Path) -> list[Path]: -# try: -# pattern = Pattern. -# p, p_dir = pattern.get_pattern_by_pattern_id(language, pattern_id, tp_lib_dir) -# return list(map(lambda i: (tp_lib_dir / language / p_dir / i).resolve(), p.instances)) -# except: -# ee = PatternDoesNotExists(pattern_id) -# logger.exception(ee) -# raise ee - - -def get_tpi_id_from_jsonpath(jp: Path) -> int: - return get_id_from_name(jp.parent.name) +################################################################################ +# PATTERNS +# def get_pattern_dir_from_id(pattern_id: int, language: str, tp_lib_dir: Path) -> Path: # needed @@ -76,60 +54,30 @@ def get_next_free_pattern_id_for_language(language: str, tp_lib_dir: Path, propo return free_ids[0] if free_ids else max(taken_ids) + 1 -def get_instance_dir_from_id(instance_id: int, pattern_dir: Path) -> Path: - if pattern_dir.is_dir(): - return get_instance_dir_from_list(instance_id, list_dirs_only(pattern_dir)) - else: - raise PatternFolderNotFound() +################################################################################ +# INSTANCES +# +# TODO: TESTING def get_instance_dir_from_list(instance_id: int, l_pattern_dir: list[Path]): instance_with_id = list(filter(lambda tpi_dir: get_id_from_name(tpi_dir.name) == instance_id, l_pattern_dir)) if not instance_with_id: raise InstanceDoesNotExists() return instance_with_id[0] -# def get_or_create_language_dir(language: str, tp_lib_dir: Path) -> Path: -# tp_lib_for_lang: Path = tp_lib_dir / language -# tp_lib_for_lang.mkdir(parents=True, exist_ok=True) -# return tp_lib_for_lang - - -def get_or_create_pattern_dir(language: str, pattern_id: int, pattern_name: str, tp_lib_dir: Path) -> Path: - pattern_dir = tp_lib_dir / language / get_pattern_dir_name_from_name(pattern_name, pattern_id) - pattern_dir.mkdir(parents=True, exist_ok=True) - return pattern_dir - - -def get_pattern_dir_name_from_name(name: str, pattern_id: int) -> str: - return f"{pattern_id}_{name.lower().replace(' ', '_')}" - - -def get_instance_dir_name_from_pattern(name: str, pattern_id: int, instance_id: int) -> str: - return f"{instance_id}_instance_{get_pattern_dir_name_from_name(name, pattern_id)}" - - -def get_id_from_name(name: str) -> int: - return int(name.split("_")[0]) - - -def get_class_from_str(class_str: str) -> object: - try: - module_path, class_name = class_str.rsplit('.', 1) - module = import_module(module_path) - return getattr(module, class_name) - except (ImportError, AttributeError) as e: - raise ImportError(class_str) -def get_tp_dir_for_language(tp_lib_dir: Path, language: str): - return Path(tp_lib_dir / language) +################################################################################ +# MEASUREMENT +# def get_measurement_dir_for_language(tp_lib_dir: Path, language: str): return Path(tp_lib_dir / config.MEASUREMENT_REL_DIR / language) +# TODO: TESTING def get_measurement_file(date: datetime): date_time_str = date.strftime("%Y-%m-%d_%H-%M-%S") return f"measurement-{date_time_str}.json" @@ -147,43 +95,15 @@ def get_last_measurement_for_pattern_instance(meas_inst_dir: Path) -> Path: return sorted_meas[-1][1] -# Useful for some SAST tools that accepts a zip file of the source code to scan -def zipdir(path, ziph): - for root, dirs, files in os.walk(path): - for file in files: - ziph.write(os.path.join(root, file), - os.path.relpath(os.path.join(root, file), - os.path.join(path, '..'))) - - -################################################################################ -# TODO (LC): are these related to pattern instance ? -# -def get_path_or_none(p: str) -> Path | None: - if p: - return Path(p) - return None - - -def get_enum_value_or_none(enum) -> str | None: - try: - return enum.value - except AttributeError: - return None - - -def get_relative_path_str_or_none(path) -> str | None: - if path: - return f"./{path}" - return None - - -def get_from_dict(d, k1, k2): - return d.get(k1, {}).get(k2, None) +def check_measurement_results_exist(measurement_dir: Path): + if not measurement_dir.is_dir(): + e = MeasurementResultsDoNotExist() + logger.error(get_exception_message(e)) + raise e ################################################################################ -# Discovery +# DISCOVERY # def get_discovery_rule_ext(discovery_method: str): @@ -214,16 +134,36 @@ def get_discovery_rules(discovery_rule_list: list[str], discovery_rule_ext: str) logger.warning(errors.wrongDiscoveryRule(discovery_rule)+ " The script will try to continue ignoring this discovery rule.") return list(discovery_rules_to_run) - ################################################################################ -# Others +# SAST # -def check_measurement_results_exist(measurement_dir: Path): - if not measurement_dir.is_dir(): - e = MeasurementResultsDoNotExist() - logger.error(get_exception_message(e)) +def sast_tool_version_match(v1, v2, nv_max=3, ignore_saas=True): + if ignore_saas and (v1 == "saas" or v2 == "saas"): + return True + sv1 = v1.split(".") + sv2 = v2.split(".") + nv = max(len(sv1), len(sv2)) + for i in range(0, min(nv, nv_max)): + try: + if sv1[i] != sv2[i]: + return False + except IndexError: + return False + return True + + +def load_sast_specific_config(tool_name: str, tool_version: str) -> Dict: + try: + tool_config_path: Path = config.ROOT_SAST_DIR / load_yaml(config.SAST_CONFIG_FILE)["tools"][tool_name]["version"][tool_version]["config"] + except KeyError: + e = InvalidSastTool(f"{tool_name}:{tool_version}") raise e + return load_yaml(tool_config_path) + +################################################################################ +# PATTERN REPAIR +# def check_file_exist(file_path: Path, file_suffix = ".csv"): @@ -232,7 +172,61 @@ def check_file_exist(file_path: Path, file_suffix = ".csv"): logger.error(get_exception_message(e)) raise e +# TODO: TESTGIN +def get_relative_paths(file_path: Path, base_path: Path): + if not file_path: + return None + try: + return f"./{file_path.relative_to(base_path)}" + except ValueError: + try: + return f"../{file_path.relative_to(base_path.parent)}" + except ValueError as e: + logger.warning(f"Could not parse filepath {file_path} to a relative path.") + return file_path + +################################################################################ +# OTHER +# TODO: Could be sorted alphabetically? + + +# Useful for some SAST tools that accepts a zip file of the source code to scan +# Where is it used in the code? +def zipdir(path, ziph): + for root, dirs, files in os.walk(path): + for file in files: + ziph.write(os.path.join(root, file), + os.path.relpath(os.path.join(root, file), + os.path.join(path, '..'))) + + +# TODO: TESTING +def get_id_from_name(name: str) -> int: + return int(name.split("_")[0]) + +# TODO: TESTING +def get_class_from_str(class_str: str) -> object: + try: + module_path, class_name = class_str.rsplit('.', 1) + module = import_module(module_path) + return getattr(module, class_name) + except (ImportError, AttributeError) as e: + raise ImportError(class_str) + + +# TODO (LC): are these related to pattern instance ? +# TODO: TESTING +def get_path_or_none(p: str) -> Path | None: + if p: + return Path(p) + return None + +# TODO: TESTING +def get_from_dict(d: dict, k1: str, k2: str): + return d.get(k1, {}).get(k2, None) + +# TODO: TESTING def build_timestamp_language_name(name: Path | None, language: str, now: datetime, extra: str = None) -> str: res = language if name: @@ -251,21 +245,21 @@ def check_tp_lib(tp_lib_path: Path): logger.error(get_exception_message(e)) raise e - +# TODO: TESTING def check_lang_tp_lib_path(lang_tp_lib_path: Path): if not lang_tp_lib_path.is_dir(): e = LanguageTPLibDoesNotExist() logger.error(get_exception_message(e)) raise e - +# TODO: TESTING def check_target_dir(target_dir: Path): if not target_dir.is_dir(): e = TargetDirDoesNotExist() logger.error(get_exception_message(e)) raise e - +# TODO: TESTING def filter_sast_tools(itools: list[Dict], language: str, exception_raised=True): for t in itools: t["supported_languages"] = load_sast_specific_config(t["name"], t["version"])["supported_languages"] @@ -277,36 +271,12 @@ def filter_sast_tools(itools: list[Dict], language: str, exception_raised=True): return tools -def sast_tool_version_match(v1, v2, nv_max=3, ignore_saas=True): - if ignore_saas and (v1 == "saas" or v2 == "saas"): - return True - sv1 = v1.split(".") - sv2 = v2.split(".") - nv = max(len(sv1), len(sv2)) - for i in range(0, min(nv, nv_max)): - try: - if sv1[i] != sv2[i]: - return False - except IndexError: - return False - return True - - def load_yaml(fpath): with open(fpath) as f: fdict: Dict = yaml.load(f, Loader=yaml.Loader) return fdict -def load_sast_specific_config(tool_name: str, tool_version: str) -> Dict: - try: - tool_config_path: Path = config.ROOT_SAST_DIR / load_yaml(config.SAST_CONFIG_FILE)["tools"][tool_name]["version"][tool_version]["config"] - except KeyError: - e = InvalidSastTool(f"{tool_name}:{tool_version}") - raise e - return load_yaml(tool_config_path) - - def write_csv_file(ofile: Path, header: list[str], data: list[dict]): with open(ofile, "w", newline='') as report: writer = csv.DictWriter(report, fieldnames=header) @@ -326,7 +296,6 @@ def add_loggers(output_dir_path: Path, filename: str=None, console=True): loggermgr.add_console_logger() - def get_operation_build_name_and_dir(op: str, src_dir: Path | None, language: str, output_dir: Path): now = datetime.now() if not src_dir: @@ -373,7 +342,7 @@ def get_tpi_op_status_string(t_tp_info, t_tpi_info=None, status="started...", op op_str = f"{op} - " return f"{i}/{tot} -{tpi_count_str} {op_str}pattern id {tp_id}{tpi_id_str}: {status}" - +# TODO: TESTING def list_dirs_only(dir: Path): return [e for e in dir.iterdir() if e.is_dir()] @@ -388,26 +357,36 @@ def get_file_hash(fpath, bigfile=False): hash.update(chunk) return hash.hexdigest() - - -########################### New utils - -def list_files(path_to_parent_dir: Path, suffix: str): +# TODO: TESTING +def list_files(path_to_parent_dir: Path, suffix: str, recursive: bool = False): assert suffix[0] == ".", "Suffix has to start with '.'" - return list(filter(lambda file_name: file_name.suffix == suffix, [path_to_parent_dir / f for f in os.listdir(path_to_parent_dir)])) + if recursive: + matches = [] + for root, _, filenames in os.walk(path_to_parent_dir): + for filename in filter(lambda f: Path(f).suffix == suffix, filenames): + matches += [Path(root) / filename] + return matches + else: + return list(filter(lambda file_name: file_name.suffix == suffix, [path_to_parent_dir / f for f in path_to_parent_dir.iterdir()])) +# TODO: TESTING +def list_directories(parent_dir: Path): + return list(filter(lambda name: name.is_dir(), [parent_dir / d for d in parent_dir.iterdir()])) -def get_pattern_json(path_to_pattern: Path) -> Path: - json_files_in_pattern_dir = list_files(path_to_pattern, ".json") - if len(json_files_in_pattern_dir) == 1: - return json_files_in_pattern_dir[0] - elif not json_files_in_pattern_dir: - logger.warning(f"Could not find a pattern JSON file in {path_to_pattern.name}") +# TODO: TESTING +def get_json_file(path_to_pattern_or_instance: Path) -> Path: + if path_to_pattern_or_instance.name == 'docs': + return None + json_files_in_dir = list_files(path_to_pattern_or_instance, ".json") + if len(json_files_in_dir) == 1: + return json_files_in_dir[0] + elif not json_files_in_dir: + logger.warning(f"Could not find a JSON file in {path_to_pattern_or_instance.name}") return None else: - logger.warning(f"Found multiple '.json' files for {path_to_pattern.name}") - if path_to_pattern / f"{path_to_pattern.name}.json" in json_files_in_pattern_dir: - return path_to_pattern / f"{path_to_pattern.name}.json" + logger.warning(f"Found multiple '.json' files for {path_to_pattern_or_instance.name}") + if path_to_pattern_or_instance / f"{path_to_pattern_or_instance.name}.json" in json_files_in_dir: + return path_to_pattern_or_instance / f"{path_to_pattern_or_instance.name}.json" logger.warning("Could not determine the right pattern JSON file. Please name it _.json") return None @@ -428,6 +407,12 @@ def read_json(path_to_json_file: Path): return result +def write_json(path_to_json_file: Path, result_dict: dict): + path_to_json_file.parent.mkdir(exist_ok=True, parents=True) + with open(path_to_json_file, "w") as json_file: + json.dump(result_dict, json_file, indent=4) + +# TODO: TESTING def copy_dir_content(path_to_src_dir: Path, path_to_dst_dir: Path): for element in os.listdir(path_to_src_dir): src_path = path_to_src_dir / element @@ -438,7 +423,3 @@ def copy_dir_content(path_to_src_dir: Path, path_to_dst_dir: Path): shutil.copy2(src_path, dest_path) else: shutil.copytree(src_path, dest_path) - - -if __name__ == "__main__": - print(get_pattern_json(Path('./testability_patterns/PHP/85_test_pattern'))) \ No newline at end of file From 7690124f44f4dfb4aa9e4858ca0edcbbd19db144 Mon Sep 17 00:00:00 2001 From: felix-20 Date: Thu, 29 Jun 2023 15:57:21 +0200 Subject: [PATCH 08/16] fixed tests --- qualitytests/cli/test_interface.py | 1 - qualitytests/cli/test_main.py | 7 +- qualitytests/cli/test_tpf_commands.py | 4 +- qualitytests/core/test_discovery.py | 10 +-- qualitytests/core/test_utils.py | 92 +++++++++++++++---------- qualitytests/qualitytests_utils.py | 3 +- tp_framework/cli/interface.py | 14 +--- tp_framework/core/analysis.py | 4 +- tp_framework/core/pattern_operations.py | 1 + tp_framework/core/utils.py | 26 +++---- 10 files changed, 84 insertions(+), 78 deletions(-) diff --git a/qualitytests/cli/test_interface.py b/qualitytests/cli/test_interface.py index af6fa88..777693f 100644 --- a/qualitytests/cli/test_interface.py +++ b/qualitytests/cli/test_interface.py @@ -16,7 +16,6 @@ init_sastreport_test, init_test -@pytest.mark.asyncio class TestInterface: diff --git a/qualitytests/cli/test_main.py b/qualitytests/cli/test_main.py index 3c1a832..9829f52 100644 --- a/qualitytests/cli/test_main.py +++ b/qualitytests/cli/test_main.py @@ -11,6 +11,7 @@ class TestMain: testdir = Path(__file__).parent.parent.resolve() tpf = testdir.parent / "tp_framework/cli/main.py" + sample_tp_lib = str(join_resources_path("sample_patlib")) def test_cli_help_1(self): @@ -122,7 +123,7 @@ def test_cli_measure_4(self, tmp_path, mocker): main.main(['measure', '-p', self.tp1, self.tp2, '--tools', self.tool1, 'whatever', '-l', self.test_lang, - '--tp-lib', str(tmp_path)]) + '--tp-lib', TestMain.sample_tp_lib]) def test_cli_measure_5(self, tmp_path, mocker): @@ -131,7 +132,7 @@ def test_cli_measure_5(self, tmp_path, mocker): main.main(['measure', '-p', self.tp1, self.tp2, '--tools', self.tool1, self.tool2, '-l', self.test_lang, - '--tp-lib', str(tmp_path)]) + '--tp-lib', TestMain.sample_tp_lib]) def _init_cli_report(self, mocker): @@ -156,7 +157,7 @@ def test_cli_report_2(self, tmp_path, mocker): '--print', '-p', self.tp1, self.tp2, '--tools', self.tool1, self.tool2, '-l', self.test_lang, - '--tp-lib', str(tmp_path)]) + '--tp-lib', TestMain.sample_tp_lib]) def test_cli_report_3(self, tmp_path, mocker): diff --git a/qualitytests/cli/test_tpf_commands.py b/qualitytests/cli/test_tpf_commands.py index b34ca12..128e955 100644 --- a/qualitytests/cli/test_tpf_commands.py +++ b/qualitytests/cli/test_tpf_commands.py @@ -51,9 +51,9 @@ def test_parse_patterns(self): tp_ids = tpf_commands.parse_patterns(False, tp_range, [], test_tp_lib_path, test_lang) assert tp_ids == [2, 3] # one and only one mutual exclusion params: pattern ids - itp_ids = [1,2,5,10] + itp_ids = [1,3] tp_ids = tpf_commands.parse_patterns(False, "", itp_ids, test_tp_lib_path, test_lang) assert tp_ids == itp_ids # one and only one mutual exclusion params: all tp_ids = tpf_commands.parse_patterns(True, "", [], test_tp_lib_path, test_lang) - assert tp_ids == [1,2,3] + assert tp_ids == [1,2,3,4] diff --git a/qualitytests/core/test_discovery.py b/qualitytests/core/test_discovery.py index 6f532be..270d160 100644 --- a/qualitytests/core/test_discovery.py +++ b/qualitytests/core/test_discovery.py @@ -9,7 +9,7 @@ import config from core import utils, discovery, instance, pattern from core.exceptions import MeasurementNotFound, CPGGenerationError -from qualitytests.qualitytests_utils import join_resources_path, get_result_output_dir +from qualitytests.qualitytests_utils import join_resources_path, create_instance class TestDiscovery: @@ -232,12 +232,8 @@ def test_patch_PHP_discovery_rule_2(self, tmp_path): assert str(tmp_path) in str(pdr) def test_dicovery_with_empty_rule(self): - with open(join_resources_path("sample_patlib/PHP/4_empty_pattern/4_empty_pattern.json"), "r") as json_file: - pattern_dict = json.load(json_file) - test_pattern = pattern.pattern_from_dict(pattern_dict, "PHP", 4) - with open(join_resources_path("sample_patlib/PHP/4_empty_pattern/1_instance_4_empty_pattern/1_instance_4_empty_pattern.json"), "r") as json_file: - instance_dict = json.load(json_file) - tpi_instance = instance.instance_from_dict(instance_dict, test_pattern, "PHP", 1) + tpi_instance = create_instance() + tpi_instance.discovery_rule = None assert not tpi_instance.discovery_rule, "The test case is broken, instance 1 of PHP pattern 4 is not supposed to have a discovery rule" expected = dict.fromkeys(["rule_path", "method", "rule_name", "rule_accuracy", "rule_hash", "rule_name", "results", "rule_already_executed"], None) actual = discovery.discovery_for_tpi(tpi_instance, None, None, None) diff --git a/qualitytests/core/test_utils.py b/qualitytests/core/test_utils.py index b790ee5..049461d 100644 --- a/qualitytests/core/test_utils.py +++ b/qualitytests/core/test_utils.py @@ -33,41 +33,6 @@ def test_check_tp_lib_2(self, tmp_path): utils.check_tp_lib(tmp_path) - # TODO: to be fixed, misses the json file - @pytest.mark.skip() - def test_list_pattern_instances_by_pattern_id(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - - pi1 = p2 / ("1_instance_" + p2.name) - pi2 = p2 / ("2_instance_" + p2.name) - pi3 = p2 / ("3_instance_" + p2.name) - pi1.mkdir() - pi2.mkdir() - pi3.mkdir() - - path_list_expected = [pi1, pi2, pi3] - path_list = utils.list_tpi_paths_by_tp_id(language, 2, tmp_path) - assert sorted(path_list) == sorted(path_list_expected) - - - def test_list_pattern_instances_by_pattern_id_with_non_existing_pattern(self, tmp_path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - - with pytest.raises(PatternDoesNotExists): - utils.list_tpi_paths_by_tp_id(language, 5, tmp_path) - - - # TODO: to be fixed - @pytest.mark.skip() - def test_get_or_create_tp_lib_for_lang_existing_folder(self, tmp_path): - language: str = "PHP" - path_tp_language_exp = tmp_path / language - path_tp_language_exp.mkdir() - path_tp_language_act = utils.get_or_create_language_dir(language, tmp_path) - assert path_tp_language_exp.is_dir() == path_tp_language_act.is_dir() - def test_get_last_measurement_for_pattern_instance(self, tmp_path): m1: Path = tmp_path / "measurement-2022-03-24_10-28-00.json" m2: Path = tmp_path / "measurement-2022-04-10_12-25-00.json" @@ -163,4 +128,59 @@ def test_get_next_free_pattern_id_for_language(self, list_dir_ret_value: list, e tp_lib_path = qualitytests_utils.join_resources_path("sample_patlib") with patch("core.utils.list_dirs_only") as list_dir_mock: list_dir_mock.return_value = list_dir_ret_value - assert expected_value == utils.get_next_free_pattern_id_for_language("PHP", tp_lib_path) \ No newline at end of file + assert expected_value == utils.get_next_free_pattern_id_for_language("PHP", tp_lib_path) + + get_relative_paths_testcases = [ + (Path("/tp_framework/file.sc"), Path("/tp_framework"), "./file.sc"), + (Path("/tp_framework/file.sc"), Path("/tp_framework"), "./file.sc"), + (Path("/file.sc"), Path("/tp_framework/PHP"), Path("/file.sc")), + ] + + @pytest.mark.parametrize("file_path, base_path, expected", get_relative_paths_testcases) + def test_get_relative_paths_testcases(self, file_path, base_path, expected): + assert expected == utils.get_relative_paths(file_path, base_path) + + def test_get_id_from_name_error(self): + with pytest.raises(ValueError): + utils.get_id_from_name("name") + + assert 1 == utils.get_id_from_name("1_instance_85_test_pattern") + assert 42 == utils.get_id_from_name("42_test_pattern") + + def test_get_path_or_none(self): + assert utils.get_path_or_none("") is None + assert utils.get_path_or_none(None) is None + assert Path("file") == utils.get_path_or_none("file") + + def test_get_from_dict(self): + assert utils.get_from_dict({}, "key1", "key2") is None + assert utils.get_from_dict({"key1": 3}, "key1", "key2") is None + assert utils.get_from_dict({"key1": {"key3": 3}}, "key1", "key2") is None + assert 3 == utils.get_from_dict({"key1": {"key2": 3}}, "key1", "key2") + + get_json_file_testcases = [ + # special shortcut case to avoid warnings + (Path("./docs"), None, None, None), + # works as expected, only one possible JSON file + (Path("./1_instance"), Path("instance.json"), [Path("instance.json")], None), + # No JSON file at all + (Path("./1_instance"), None, [], "Could not find a JSON file in 1_instance"), + # multiple JSON files, none of them named as wanted + (Path("./1_instance"), None, ["instance.json", "insteresting.json"], "Could not determine the right pattern JSON file. Please name it _.json"), + # multiple JSON files, but one is named correctly + (Path("./1_instance"), Path("./1_instance/1_instance.json"), [Path("./1_instance/1_instance.json"), Path("./1_instance/interesting.json")], "Found multiple '.json' files for 1_instance"), + ] + + @pytest.mark.parametrize("path, expected, list_file_return, warn", get_json_file_testcases) + def test_get_json_file(self, path, expected, list_file_return, warn): + with patch("core.utils.logger.warning") as warn_logger, \ + patch("core.utils.list_files") as list_file_mock: + list_file_mock.return_value = list_file_return + + actual = utils.get_json_file(path) + + assert expected == actual + if warn: + warn_logger.assert_called_with(warn) + else: + warn_logger.assert_not_called() diff --git a/qualitytests/qualitytests_utils.py b/qualitytests/qualitytests_utils.py index 65fccb0..8bea2b1 100644 --- a/qualitytests/qualitytests_utils.py +++ b/qualitytests/qualitytests_utils.py @@ -113,8 +113,9 @@ def init_test(init, language="PHP"): init["tp_lib_path"] = join_resources_path(temp_meas).resolve() try: shutil.copytree(join_resources_path("sample_patlib"), init["tp_lib_path"]) - except: + except Exception as e: pass + # assert False, f"stop your tests will fail {e}" init["patterns"] = [1,2,3] diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index 14e4371..170c472 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -197,16 +197,4 @@ def repair_patterns(language: str, pattern_ids: list, except PatternInvalid as e: print(f"Failed to init pattern: {tp_id} due to {e}") continue - pattern.repair() - # print(pattern) - - # # pattern_path = get_pattern_path_by_pattern_id(language, pattern_id, tp_lib_path) - # # PatternRepair( - # # pattern_path, - # # language, - # # tp_lib_path, - # # checkdiscoveryrule_results, - # # masking_file, - # # measurement_results, - # # ).repair(should_include_readme) - # pass \ No newline at end of file + pattern.repair() \ No newline at end of file diff --git a/tp_framework/core/analysis.py b/tp_framework/core/analysis.py index bea4965..e0f7faa 100644 --- a/tp_framework/core/analysis.py +++ b/tp_framework/core/analysis.py @@ -33,7 +33,7 @@ async def analyze_pattern_instance(instance: Instance, tool_name: str = tool["name"] tool_version: str = tool["version"] - sast_config: Dict = core.utils.load_sast_specific_config(tool_name, tool_version) + sast_config: Dict = utils.load_sast_specific_config(tool_name, tool_version) sast_interface_class: str = sast_config["tool_interface"] sast_class = utils.get_class_from_str(sast_interface_class) @@ -72,7 +72,7 @@ async def inspect_analysis_results(d_job: Dict, language) -> list[Measurement]: # if not csv_res, then the SAST job would have failed and no measurement in that case if csv_res: - sast_config: Dict = core.utils.load_sast_specific_config(tool_name, tool_version) + sast_config: Dict = utils.load_sast_specific_config(tool_name, tool_version) sast_interface_class: str = sast_config["tool_interface"] sast_class = utils.get_class_from_str(sast_interface_class) diff --git a/tp_framework/core/pattern_operations.py b/tp_framework/core/pattern_operations.py index f9df85b..304187d 100644 --- a/tp_framework/core/pattern_operations.py +++ b/tp_framework/core/pattern_operations.py @@ -32,6 +32,7 @@ def add_testability_pattern_to_lib_from_json(language: str, pattern_json: Path, async def start_add_measurement_for_pattern(language: str, sast_tools: list[Dict], tp_id: int, now, tp_lib_dir: Path, output_dir: Path) -> Dict: + d_status_tp = {} try: target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_dir) diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index 312b49c..9e76105 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -172,7 +172,7 @@ def check_file_exist(file_path: Path, file_suffix = ".csv"): logger.error(get_exception_message(e)) raise e -# TODO: TESTGIN + def get_relative_paths(file_path: Path, base_path: Path): if not file_path: return None @@ -200,11 +200,10 @@ def zipdir(path, ziph): os.path.join(path, '..'))) -# TODO: TESTING def get_id_from_name(name: str) -> int: return int(name.split("_")[0]) -# TODO: TESTING + def get_class_from_str(class_str: str) -> object: try: module_path, class_name = class_str.rsplit('.', 1) @@ -215,18 +214,19 @@ def get_class_from_str(class_str: str) -> object: # TODO (LC): are these related to pattern instance ? -# TODO: TESTING def get_path_or_none(p: str) -> Path | None: if p: return Path(p) return None -# TODO: TESTING + def get_from_dict(d: dict, k1: str, k2: str): - return d.get(k1, {}).get(k2, None) + try: + return d.get(k1, {}).get(k2, None) + except AttributeError: + return None -# TODO: TESTING def build_timestamp_language_name(name: Path | None, language: str, now: datetime, extra: str = None) -> str: res = language if name: @@ -245,14 +245,14 @@ def check_tp_lib(tp_lib_path: Path): logger.error(get_exception_message(e)) raise e -# TODO: TESTING + def check_lang_tp_lib_path(lang_tp_lib_path: Path): if not lang_tp_lib_path.is_dir(): e = LanguageTPLibDoesNotExist() logger.error(get_exception_message(e)) raise e -# TODO: TESTING + def check_target_dir(target_dir: Path): if not target_dir.is_dir(): e = TargetDirDoesNotExist() @@ -342,7 +342,7 @@ def get_tpi_op_status_string(t_tp_info, t_tpi_info=None, status="started...", op op_str = f"{op} - " return f"{i}/{tot} -{tpi_count_str} {op_str}pattern id {tp_id}{tpi_id_str}: {status}" -# TODO: TESTING + def list_dirs_only(dir: Path): return [e for e in dir.iterdir() if e.is_dir()] @@ -357,7 +357,7 @@ def get_file_hash(fpath, bigfile=False): hash.update(chunk) return hash.hexdigest() -# TODO: TESTING + def list_files(path_to_parent_dir: Path, suffix: str, recursive: bool = False): assert suffix[0] == ".", "Suffix has to start with '.'" if recursive: @@ -369,7 +369,7 @@ def list_files(path_to_parent_dir: Path, suffix: str, recursive: bool = False): else: return list(filter(lambda file_name: file_name.suffix == suffix, [path_to_parent_dir / f for f in path_to_parent_dir.iterdir()])) -# TODO: TESTING + def list_directories(parent_dir: Path): return list(filter(lambda name: name.is_dir(), [parent_dir / d for d in parent_dir.iterdir()])) @@ -412,7 +412,7 @@ def write_json(path_to_json_file: Path, result_dict: dict): with open(path_to_json_file, "w") as json_file: json.dump(result_dict, json_file, indent=4) -# TODO: TESTING + def copy_dir_content(path_to_src_dir: Path, path_to_dst_dir: Path): for element in os.listdir(path_to_src_dir): src_path = path_to_src_dir / element From 296cc0416ba4fb7d22997527f2e20f70f0bc9bf3 Mon Sep 17 00:00:00 2001 From: felix-20 Date: Mon, 3 Jul 2023 15:09:38 +0200 Subject: [PATCH 09/16] added some more tests --- qualitytests/core/test_instance.py | 26 +- .../core/test_instance_readme_generation.py | 300 +++++++++++++ qualitytests/core/test_instance_repair_php.py | 2 - qualitytests/core/test_markdown_elements.py | 43 ++ qualitytests/core/test_pattern.py | 45 +- qualitytests/core/test_pattern_operations.py | 406 +++--------------- qualitytests/core/test_readme_generator.py | 181 ++++++++ qualitytests/core/test_utils.py | 28 +- .../1_instance_1_static_variables.json | 2 +- .../PHP/1_static_variables/README.md | 101 +++-- tp_framework/cli/interface.py | 6 +- tp_framework/cli/tpf_commands.py | 2 +- tp_framework/core/discovery.py | 1 + tp_framework/core/errors.py | 2 + tp_framework/core/exceptions.py | 11 + tp_framework/core/instance.py | 18 +- tp_framework/core/instance_repair.py | 3 +- tp_framework/core/measurement.py | 23 +- tp_framework/core/pattern.py | 21 +- tp_framework/core/pattern_operations.py | 12 +- tp_framework/core/readme_generator.py | 381 ++++++++++++++++ tp_framework/core/readme_markdown_elements.py | 171 ++++++++ tp_framework/core/repair_tool.py | 1 - tp_framework/core/utils.py | 57 ++- tp_framework/tmp.py | 17 - 25 files changed, 1405 insertions(+), 455 deletions(-) create mode 100644 qualitytests/core/test_instance_readme_generation.py create mode 100644 qualitytests/core/test_markdown_elements.py create mode 100644 qualitytests/core/test_readme_generator.py create mode 100644 tp_framework/core/readme_generator.py create mode 100644 tp_framework/core/readme_markdown_elements.py delete mode 100644 tp_framework/tmp.py diff --git a/qualitytests/core/test_instance.py b/qualitytests/core/test_instance.py index e8c6bfc..2161759 100644 --- a/qualitytests/core/test_instance.py +++ b/qualitytests/core/test_instance.py @@ -112,7 +112,7 @@ def test_to_dict(self): def test_get_description_from_file(self): test_pattern = create_instance() - test_pattern.description = "not None" + test_pattern.description = "file.md" expected_description = "Some description in a file\nTest description.\n\n" with patch("builtins.open", mock_open(read_data=expected_description), create=True), \ patch("pathlib.Path.is_file") as isfile_mock: @@ -132,4 +132,26 @@ def test_get_description_(self): is_file, actual = test_pattern.get_description() assert not is_file - assert expected_description.strip() == actual \ No newline at end of file + assert expected_description.strip() == actual + + path_properties_testcases = [ + (Path("/test")), Path("../tplib"), Path("/tpframework/tplib") + ] + + @pytest.mark.parametrize("new_path", path_properties_testcases) + def test_path_properties_are_relative_and_resolve_to_path_when_called(self, new_path: Path): + test_instance = create_instance() + test_instance.json_path = Path("./my_awesome_json.json") + test_instance.code_path = Path("./awesome_js_code.js") + test_instance.expectation_sink_file = Path("./awesome_js_code.js") + test_instance.expectation_source_file = Path("./awesome_js_code.js") + test_instance.compile_binary = None + test_instance.discovery_rule = Path("../test_scala.sc") + + test_instance.path = new_path + assert Path(new_path / "my_awesome_json.json").resolve() == test_instance.json_path + assert Path(new_path / "awesome_js_code.js").resolve() == test_instance.code_path + assert Path(new_path / "awesome_js_code.js").resolve() == test_instance.expectation_sink_file + assert Path(new_path / "awesome_js_code.js").resolve() == test_instance.expectation_source_file + assert test_instance.compile_binary is None + assert Path(new_path / "../test_scala.sc").resolve() == test_instance.discovery_rule \ No newline at end of file diff --git a/qualitytests/core/test_instance_readme_generation.py b/qualitytests/core/test_instance_readme_generation.py new file mode 100644 index 0000000..d11e30c --- /dev/null +++ b/qualitytests/core/test_instance_readme_generation.py @@ -0,0 +1,300 @@ +import pytest +from copy import deepcopy +from pathlib import Path +from unittest.mock import patch, mock_open + +from core.readme_generator import InstanceREADMEGenerator +from core.readme_markdown_elements import * +from qualitytests.qualitytests_utils import create_pattern + +class TestInstanceREADMEGenerator: + def _get_instance_readme_generator(self): + test_pattern = create_pattern() + instance_readme_gen = InstanceREADMEGenerator(test_pattern, None) + instance_readme_gen.current_instance = instance_readme_gen.pattern.instances[0] + return instance_readme_gen + + def test_instance_name(self): + instance_readme_gen = self._get_instance_readme_generator() + actual = instance_readme_gen._instance_name() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "1 Instance" == actual[0].content + + def test_instance_description(self): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.current_instance.description = None + actual1 = instance_readme_gen._instance_description() + assert [] == actual1 + + instance_readme_gen.current_instance.description = "some description" + actual2 = instance_readme_gen._instance_description() + assert isinstance(actual2, list) + assert 1 == len(actual2) + assert isinstance(actual2[0], MarkdownString) + + def test_instance_code_same_source_and_sink(self): + instance_readme_gen = self._get_instance_readme_generator() + expected_code = instance_readme_gen.current_instance.code_path + instance_readme_gen.current_instance.expectation_source_file = "code_file" + instance_readme_gen.current_instance.expectation_sink_file = "code_file" + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "x = 1" + + actual1 = instance_readme_gen._instance_code() + file_content_mock.assert_called_once_with(expected_code) + assert isinstance(actual1, list) + assert 2 == len(actual1) + assert isinstance(actual1[0], MarkdownHeading) + assert 3 == actual1[0].level + assert "Code" == actual1[0].content + assert isinstance(actual1[1], MarkdownCode) + + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = None + + actual2 = instance_readme_gen._instance_code() + assert [] == actual2 + + def test_instance_code_different_source_and_sink(self): + instance_readme_gen = self._get_instance_readme_generator() + expected_code = instance_readme_gen.current_instance.code_path + instance_readme_gen.current_instance.expectation_source_file = "code_file_source" + instance_readme_gen.current_instance.expectation_sink_file = "code_file_sink" + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "x = 1" + + actual1 = instance_readme_gen._instance_code() + file_content_mock.assert_called() + assert isinstance(actual1, list) + assert 5 == len(actual1) + assert isinstance(actual1[0], MarkdownHeading) + assert 3 == actual1[0].level + assert "Code" == actual1[0].content + assert isinstance(actual1[1], MarkdownHeading) + assert 4 == actual1[1].level + assert "Source File" == actual1[1].content + assert isinstance(actual1[2], MarkdownCode) + assert isinstance(actual1[3], MarkdownHeading) + assert 4 == actual1[3].level + assert "Sink File" == actual1[3].content + assert isinstance(actual1[4], MarkdownCode) + + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.reset_mock() + file_content_mock.return_value = None + + actual2 = instance_readme_gen._instance_code() + file_content_mock.assert_called() + assert [] == actual2 + + def test_instance_properties(self): + instance_readme_gen = self._get_instance_readme_generator() + actual = instance_readme_gen._instance_properties() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 3 == actual[0].level + assert "Instance Properties" == actual[0].content + assert isinstance(actual[1], MarkdownTable) + + def test_instance_more(self): + instance_readme_gen = self._get_instance_readme_generator() + actual = instance_readme_gen._instance_more() + + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + + def test_compile(self): + instance_readme_gen = self._get_instance_readme_generator() + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "binary" + actual1 = instance_readme_gen._compile() + + file_content_mock.assert_called_once() + assert isinstance(actual1, list) + assert 1 == len(actual1) + assert isinstance(actual1[0], MarkdownCollapsible) + + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.return_value = "" + actual2 = instance_readme_gen._compile() + + file_content_mock.assert_called_once() + assert [] == actual2 + + discovery_rule_example1 = """@main def main(name : String): Unit = { + importCpg(name) + val x1 = (name, "1_static_variables_iall", cpg.call(".*BIND_STATIC.*").location.toJson); + println(x1) + delete; + } """ + discovery_rule_example2 = """@main def main(name : String): Unit = { + importCpg(name) + // TODO: replace line below with your detection query + val x2 = (name, "ID_pattern_name_i1", cpg.method.l)}; + println(x2) + delete; + } + + + """ + expected_discovery_rule_example1 = discovery_rule_example1.split("\n")[2].strip() + expected_discovery_rule_example2 = "\n".join([l.strip() for l in discovery_rule_example2.split("\n")[2:4]]) + + discovery_rule_testcases = [ + (discovery_rule_example1, expected_discovery_rule_example1, "./discovery_rule1.sc", "Here some description", "Here some description", MarkdownCode), + (discovery_rule_example2, expected_discovery_rule_example2, "./discovery_rule2.sc", "", "", MarkdownCode), + ("", "No discovery rule yet.", None, None, "", MarkdownString), + ("print('Hello World')", "print('Hello World')", "./discovery_rule.py", "This is a python rule\n", "This is a python rule", MarkdownCode) + ] + + @pytest.mark.parametrize("dr_return, expected_dr, rule_path, desc, expected_desc, code_or_str", discovery_rule_testcases) + def test_discovery_rule_exists(self, dr_return, expected_dr, rule_path, desc, expected_desc, code_or_str): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.current_instance.discovery_rule = rule_path + instance_readme_gen.current_instance.discovery_notes = desc + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.side_effect = [desc, dr_return] + actual = instance_readme_gen._discovery() + file_content_mock.assert_called() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + assert isinstance(actual[0].content, list) + assert 3 == len(actual[0].content) + assert isinstance(actual[0].content[0], MarkdownString) + assert expected_desc == actual[0].content[0].content + assert isinstance(actual[0].content[1], code_or_str) + assert expected_dr == actual[0].content[1].content + assert isinstance(actual[0].content[2], MarkdownTable) + assert isinstance(actual[0].heading, MarkdownHeading) + assert "Discovery" == actual[0].heading.content + assert 3 == actual[0].heading.level + + measurement_dict = { + "date": "1970-01-01 00:00:01", + "result": False, + "tool": "tool1", + "version": "saas", + "instance": "./JS/1_unset_element_array/1_instance_1_unset_element_array/1_instance_1_unset_element_array.json", + "pattern_id": 1, + "instance_id": 1, + "language": "JS" + } + invalid_test_measurement = deepcopy(measurement_dict) + invalid_test_measurement.pop("result") + no_measurements_and_invalid_measurements_testcases = [ + (None, None), + (Path("/"), [invalid_test_measurement]) + ] + + @pytest.mark.parametrize("measurement_paths, measurement_res", no_measurements_and_invalid_measurements_testcases) + def test_measurement_no_measurements_and_invalid_measurements(self, measurement_paths, measurement_res): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.measurements = measurement_paths + with patch("core.utils.list_files") as list_files_mock, \ + patch("core.utils.read_json") as read_json_mock: + list_files_mock.return_value = ["file.json"] + actual = instance_readme_gen._measurement() + read_json_mock.return_value = measurement_res + + assert [] == actual + + + measure_testcases = [ + ({"tool1": "maskedTool1"}, [measurement_dict]), + ({}, [measurement_dict]), + ({}, [measurement_dict] + [measurement_dict]) + ] + + @pytest.mark.parametrize("mask, meas_results", measure_testcases) + def test_measurement(self, mask, meas_results): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.measurements = Path("/") + instance_readme_gen.mask_dict = mask + with patch("core.utils.list_files") as list_files_mock, \ + patch("core.utils.read_json") as read_json_mock: + list_files_mock.return_value = ["file1.json"] + read_json_mock.return_value = meas_results + + actual = instance_readme_gen._measurement() + + list_files_mock.assert_called_once() + read_json_mock.assert_called_once_with("file1.json") + + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + assert actual[0].is_open + assert isinstance(actual[0].heading, MarkdownHeading) + assert "Measurement" == actual[0].heading.content + assert isinstance(actual[0].content, list) + assert 1 == len(actual[0].content) + assert isinstance(actual[0].content[0], MarkdownTable) + if mask: + assert "tool1" not in actual[0].content[0].to_markdown() + else: + assert "tool1" in actual[0].content[0].to_markdown() + + default_note = "Can you think of a transformation, that makes this tarpit less challenging for SAST tools?" + remediation_testcases = [ + (["", "", ""], [], []), + (["", "", "rule"], [MarkdownString, MarkdownHeading, MarkdownString], [default_note, "Modeling Rule", "rule"]), + (["", "transformation", ""], [MarkdownString, MarkdownHeading, MarkdownString], [default_note, "Transformation", "transformation"]), + (["", "transformation", "rule"], [MarkdownString, MarkdownHeading, MarkdownString, MarkdownHeading, MarkdownString], [default_note, "Transformation", "transformation", "Modeling Rule", "rule"]), + (["note", "", ""], [MarkdownString], ["note"]), + (["note", "", "rule"], [MarkdownString, MarkdownHeading, MarkdownString], ["note", "Modeling Rule", "rule"]), + (["note", "transformation", ""], [MarkdownString, MarkdownHeading, MarkdownString], ["note", "Transformation", "transformation"]), + (["note", "transformation", "rule"], [MarkdownString, MarkdownHeading, MarkdownString, MarkdownHeading, MarkdownString], ["note", "Transformation", "transformation", "Modeling Rule", "rule"]) + ] + + @pytest.mark.parametrize("get_file_content_ret, expected_classes, expected_content", remediation_testcases) + def test_remediation(self, get_file_content_ret: list, expected_classes: list, expected_content: list): + instance_readme_gen = self._get_instance_readme_generator() + with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + file_content_mock.side_effect = get_file_content_ret + + actual = instance_readme_gen._remediation() + + file_content_mock.assert_called() + if not expected_classes: + assert [] == actual + return + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownCollapsible) + assert isinstance(actual[0].heading, MarkdownHeading) + assert "Remediation" == actual[0].heading.content + assert isinstance(actual[0].content, list) + assert len(expected_classes) == len(actual[0].content) + assert expected_classes == [type(c) for c in actual[0].content] + assert expected_content == [c.content for c in actual[0].content] + + get_file_content_if_exists_testcases = [ + ("description", "description", None, False), + ("", None, None, False), + ("", "", "", False), + ("description in file", "file.md", "description in file", True), + ("description in file", "file2.md", "description in file\n\n", True) + ] + + @pytest.mark.parametrize("expected, file_path, file_content, is_file_ret", get_file_content_if_exists_testcases) + def test_get_file_content_if_exists(self, expected: str, file_path: str, file_content: str, is_file_ret: bool): + instance_readme_gen = self._get_instance_readme_generator() + with patch("builtins.open", mock_open(read_data=file_content), create=True), \ + patch("pathlib.Path.is_file") as is_file_mock: + is_file_mock.return_value = is_file_ret + actual = instance_readme_gen._get_file_content_if_exists(file_path) + assert expected == actual + + def test_mask(self): + instance_readme_gen = self._get_instance_readme_generator() + instance_readme_gen.mask_dict = {"tool2": "masked_tool2"} + assert "tool1" == instance_readme_gen._mask("tool1") + assert "masked_tool2" == instance_readme_gen._mask("tool2") + instance_readme_gen.mask_dict = {} + assert "tool2" == instance_readme_gen._mask("tool2") diff --git a/qualitytests/core/test_instance_repair_php.py b/qualitytests/core/test_instance_repair_php.py index 403f4a4..2da3e08 100644 --- a/qualitytests/core/test_instance_repair_php.py +++ b/qualitytests/core/test_instance_repair_php.py @@ -104,5 +104,3 @@ def test_repair_source_line_sink_line(self, source_sink_ret, warning, exp_source assert exp_source == test_instance_php_repair.instance.expectation_source_line assert exp_sink == test_instance_php_repair.instance.expectation_sink_line - - diff --git a/qualitytests/core/test_markdown_elements.py b/qualitytests/core/test_markdown_elements.py new file mode 100644 index 0000000..3e478e0 --- /dev/null +++ b/qualitytests/core/test_markdown_elements.py @@ -0,0 +1,43 @@ +from core.readme_markdown_elements import * + + +class TestMarkdownElements: + + def test_markdown_code(self): + code = MarkdownCode('\n\nMore\n\n\nHello\n\n\n' == coll.to_markdown() + + def test_markdown_string(self): + s = MarkdownString("Test") + assert "\nTest\n" == s.to_markdown() + + def test_markdown_link(self): + link = MarkdownLink("Test", MarkdownHeading("Heading 1", 3)) + assert "[Test](#heading-1)" == link.to_markdown() + + def test_markdown_table(self): + test_content = {"0::column1": ["value1", "value1.1"], "column2": ["value2"]} + tab = MarkdownTable(test_content) + expected_tab = "\n| column1 | column2 |\n" + expected_tab += "|-----------|-----------|\n" + expected_tab += "| value1 | value2 |\n" + expected_tab += "| value1.1 | |\n" + assert expected_tab == tab.to_markdown() + + def test_markdown_document(self): + coll = MarkdownCollapsible([MarkdownString("Hello")], MarkdownString("More")) + doc = MarkdownDocument([coll]) + assert '
\n\nMore\n\nHello\n\n
\n' == doc.to_markdown() + diff --git a/qualitytests/core/test_pattern.py b/qualitytests/core/test_pattern.py index 0c093bd..0c5b55a 100644 --- a/qualitytests/core/test_pattern.py +++ b/qualitytests/core/test_pattern.py @@ -7,7 +7,7 @@ from core.exceptions import PatternDoesNotExists, PatternInvalid, InstanceDoesNotExists from qualitytests.qualitytests_utils import join_resources_path, create_pattern, example_pattern_dict -class TestPatternR: +class TestPattern: sample_tp_lib: Path = join_resources_path("sample_patlib") example_pattern_dict = { @@ -39,7 +39,7 @@ class TestPatternR: @pytest.mark.parametrize("pattern_id, language", not_existing_patterns) def test_not_exising_pattern_init_from_id_and_language(self, pattern_id: int, language: str): with pytest.raises(PatternDoesNotExists) as e_info: - Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + Pattern.init_from_id_and_language(pattern_id, language, TestPattern.sample_tp_lib) assert f"Specified Pattern `{pattern_id}` does not exists." == str(e_info.value) @pytest.mark.parametrize("pattern_id, language, read_json_return, expected_assertion_error", invalid_patterns) @@ -51,7 +51,7 @@ def test_init_invalid_pattern_from_id_and_language(self, pytest.raises(PatternInvalid) as e_info: read_json_mock.return_value = read_json_return - Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + Pattern.init_from_id_and_language(pattern_id, language, TestPattern.sample_tp_lib) read_json_mock.assert_called_once() assert f"{expected_assertion_error} Pattern is invalid." == str(e_info.value) @@ -67,8 +67,8 @@ def test_init_from_json_file_without_pattern_id(self, path_to_json: Path, langua is_dir_mock.return_value = True is_file_mock.return_value = True isinstance_mock.return_value = True - read_json_mock.return_value = TestPatternR.example_pattern_dict - pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPatternR.sample_tp_lib) + read_json_mock.return_value = TestPattern.example_pattern_dict + pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPattern.sample_tp_lib) read_json_mock.assert_called_once() is_file_mock.assert_called() is_dir_mock.assert_called() @@ -93,7 +93,7 @@ def test_init_valid_pattern_from_id_and_language(self, pattern_id: int, language is_file_mock.return_value = True isinstance_mock.return_value = True read_json_mock.return_value = read_json_return - test_pattern = Pattern.init_from_id_and_language(pattern_id, language, TestPatternR.sample_tp_lib) + test_pattern = Pattern.init_from_id_and_language(pattern_id, language, TestPattern.sample_tp_lib) read_json_mock.assert_called_once() is_file_mock.assert_called() @@ -141,27 +141,22 @@ def test_get_instance_by_id(self): test_pattern.get_instance_by_id(2) assert "Specified Pattern Instance `2` does not exists." in str(e_info) - def test_get_description_from_file(self): - test_pattern = create_pattern() - expected_description = "Some description in a file\nTest description.\n\n" - with patch("builtins.open", mock_open(read_data=expected_description), create=True), \ - patch("pathlib.Path.is_file") as isfile_mock: - - isfile_mock.return_value = True + get_description_testcases = [ + ("Some description\n", None, "Some description", False), + ("file.md", "Some description inside a file\nTest description. ", "Some description inside a file\nTest description.", True), + (None, None, "", False) + ] - is_file, actual = test_pattern.get_description() - assert is_file - assert expected_description.strip() == actual - def test_get_description_(self): + @pytest.mark.parametrize("file_path, description, expected_desc, is_file", get_description_testcases) + def test_get_description_from_file(self, file_path, description, expected_desc, is_file): test_pattern = create_pattern() - expected_description = "Some description in a file\nTest description." - test_pattern.description = expected_description - with patch("pathlib.Path.is_file") as isfile_mock: - isfile_mock.return_value = False - - is_file, actual = test_pattern.get_description() - assert not is_file - assert expected_description.strip() == actual + test_pattern.description = file_path + with patch("builtins.open", mock_open(read_data=description), create=True), \ + patch("pathlib.Path.is_file") as isfile_mock: + isfile_mock.return_value = is_file + actual_is_file, actual = test_pattern.get_description() + assert is_file == actual_is_file + assert expected_desc == actual diff --git a/qualitytests/core/test_pattern_operations.py b/qualitytests/core/test_pattern_operations.py index d25997a..c0e68db 100644 --- a/qualitytests/core/test_pattern_operations.py +++ b/qualitytests/core/test_pattern_operations.py @@ -1,349 +1,77 @@ -import json from datetime import datetime -from json import JSONDecodeError -from pathlib import Path, WindowsPath -from typing import Dict +from pathlib import Path +from unittest.mock import patch, mock_open import pytest -from freezegun import freeze_time - from core import pattern_operations -from core.exceptions import PatternValueError -from core.instance import PatternCategory, FeatureVsInternalApi, Instance from core.measurement import Measurement -from core.pattern import Pattern - - -def setup_three_pattern(tmp_path: Path): - language: str = "PHP" - tmp_tp_path: Path = tmp_path / language - tmp_tp_path.mkdir() - p1 = tmp_tp_path / "1_pattern_one" - p2 = tmp_tp_path / "2_pattern_two" - p3 = tmp_tp_path / "3_pattern_three" - p1.mkdir() - p2.mkdir() - p3.mkdir() - - pattern1: Dict = { - "name": "Pattern One", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_1_pattern_one/1_instance_1_pattern_one.json", - "./2_instance_1_pattern_one/2_instance_1_pattern_one.json" - ] - } - with open(p1 / (p1.name + ".json"), "w") as pattern_json_file: - json.dump(pattern1, pattern_json_file, indent=4) - - pattern2: Dict = { - "name": "Pattern Two", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_2_pattern_two/1_instance_2_pattern_two.json", - "./2_instance_2_pattern_two/2_instance_2_pattern_two.json" - ] - } - with open(p2 / (p2.name + ".json"), "w") as pattern_json_file: - json.dump(pattern2, pattern_json_file, indent=4) - - pattern3: Dict = { - "name": "Pattern Three", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_3_pattern_three/1_instance_3_pattern_three.json", - "./2_instance_3_pattern_three/2_instance_3_pattern_three.json" - ] - } - with open(p3 / (p3.name + ".json"), "w") as pattern_json_file: - json.dump(pattern3, pattern_json_file, indent=4) - - return language, tmp_tp_path, p1, p2, p3 - - -def setup_two_instances(p_path: Path): - pi1_path = p_path / ("1_instance_" + p_path.name) - pi2_path = p_path / ("2_instance_" + p_path.name) - pi1_path.mkdir() - pi2_path.mkdir() - - instance_dict: Dict = { - "code": "./instance_one.php", - "discovery": { - "rule": "./instance_one.sc", - "method": None, - "rule_accuracy": None - }, - "transformation": "", - "version": "1", - "compile": { - "binary": "./instance_one.bash", - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "./instance_one.php", - "sink_line": 18, - "source_file": "./instance_one.php", - "source_line": 17, - "expectation": True - }, - "properties": { - "category": "D2", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - }, - "measurements": [] - } - - with open(pi1_path / (pi1_path.name + ".json"), "w") as instance_json_file: - json.dump(instance_dict, instance_json_file, indent=4) - - with open(pi2_path / (pi2_path.name + ".json"), "w") as instance_json_file: - json.dump(instance_dict, instance_json_file, indent=4) - return pi1_path, pi2_path +from qualitytests.qualitytests_utils import create_pattern, join_resources_path class TestPatternOperations: - - # TODO: most of these tests need to be updated and do not work - - def test_add_testability_pattern_to_lib(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pattern: Dict = { - "name": "Try Catch Finally", - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - - pattern_operations.add_testability_pattern_to_lib(language, pattern, None, tmp_path) - - expected_new_pattern_path: Path = tmp_tp_path / "4_try_catch_finally" - expected_new_pattern_json_path: Path = expected_new_pattern_path / "4_try_catch_finally.json" - with open(expected_new_pattern_json_path) as json_file: - pattern_from_tp_lib = json.load(json_file) - - assert pattern["name"] == pattern_from_tp_lib["name"] - assert pattern_from_tp_lib["instances"] == [] - - def test_add_testability_pattern_to_lib_with_value_error(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pattern: Dict = { - "description": "", - "family": "None", - "tags": [], - "instances": [ - "./1_instance_52_try_catch_finally/1_instance_52_try_catch_finally.json", - "./2_instance_52_try_catch_finally/2_instance_52_try_catch_finally.json" - ] - } - - with pytest.raises(PatternValueError): - pattern_operations.add_testability_pattern_to_lib(language, pattern, None, tmp_path) - - def test_add_testability_pattern_to_lib_from_json(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - json_path: Path = ( - Path(__file__).resolve().parent / "testing_samples" / "sample_pattern" / "try_catch_finally.json" - ) - - pattern_operations.add_testability_pattern_to_lib_from_json(language, json_path, json_path.parent, tmp_path) - - actual_pattern_path: Path = tmp_tp_path / "4_try_catch_finally" - actual_pattern_json_path: Path = actual_pattern_path / "4_try_catch_finally.json" - with open(actual_pattern_json_path) as json_file: - actual_pattern = json.load(json_file) - - with open(json_path) as json_file: - expected_pattern = json.load(json_file) - - assert expected_pattern["name"] == actual_pattern["name"] - assert actual_pattern["instances"] == [ - './1_instance_4_try_catch_finally/1_instance_4_try_catch_finally.json', - './2_instance_4_try_catch_finally/2_instance_4_try_catch_finally.json' - ] - - def test_add_testability_pattern_to_lib_from_json_bad_encoding(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - json_path: Path = ( - Path(__file__).resolve().parent / "testing_samples" / "sample_broken_pattern" / "try_catch_finally_broken.json" - ) - - with pytest.raises(JSONDecodeError): - pattern_operations.add_testability_pattern_to_lib_from_json(language, json_path, json_path.parent, tmp_path) - - def test_add_testability_pattern_to_lib_from_json_with_missing_field(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - json_path: Path = ( - Path(__file__).resolve().parent / "testing_samples" / "sample_broken_pattern" / "try_catch_finally.json" - ) - - with pytest.raises(PatternValueError): - pattern_operations.add_testability_pattern_to_lib_from_json(language, json_path, json_path.parent, tmp_path) - - def test_add_tp_instance_to_lib(self, tmp_path): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pi1 = p2 / ("1_instance" + p2.name) - pi2 = p2 / ("2_instance" + p2.name) - pi1.mkdir() - pi2.mkdir() - - pattern = Pattern("Try Catch Finally", language, [], "FAMILY", "TestDesc", [], pattern_id=4) - - source_path: Path = Path(__file__).resolve().parent / "testing_samples" / "sample_pattern" - - exp_instance: Dict = { - "code": "./instance_one.php", - "discovery": { - "rule": "./instance_one.sc", - "method": None, - "rule_accuracy": None - }, - "transformation": "", - "version": "1", - "compile": { - "binary": "./instance_one.bash", - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "./instance_one.php", - "sink_line": 18, - "source_file": "./instance_one.php", - "source_line": 17, - "expectation": True - }, - "properties": { - "category": "D2", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - }, - "measurements": [] - } - - pattern_operations.add_tp_instance_to_lib(language, pattern, exp_instance, "instance_one", source_path, - tmp_path) - - actual_instance_path: Path = tmp_tp_path / "4_try_catch_finally" / "1_instance_4_try_catch_finally" - actual_pattern_json_path: Path = actual_instance_path / "1_instance_4_try_catch_finally.json" - with open(actual_pattern_json_path) as act_json_file: - actual_instance = json.load(act_json_file) - - assert exp_instance["expectation"]["type"] == actual_instance["expectation"]["type"] - assert actual_instance["code"] == "./instance_one.php" - assert actual_instance["compile"]["binary"] == "./instance_one.bash" - assert actual_instance["expectation"]["sink_file"] == "./instance_one.php" - assert actual_instance["expectation"]["source_file"] == "./instance_one.php" - assert actual_instance["discovery"]["rule"] == "./instance_one.sc" - - @freeze_time(datetime.now()) - async def test_add_measurement_for_pattern(self, tmp_path, mocker): - language, tmp_tp_path, p1, p2, p3 = setup_three_pattern(tmp_path) - pi1 = p2 / ("1_instance_" + p2.name) - pi1.mkdir() - instance_dict: Dict = { - "code": "./instance_one.php", - "discovery": { - "rule": "./instance_one.sc", - "method": None, - "rule_accuracy": None - }, - "transformation": "", - "version": "1", - "compile": { - "binary": "./instance_one.bash", - "instruction": None - }, - "expectation": { - "type": "xss", - "sink_file": "./instance_one.php", - "sink_line": 18, - "source_file": "./instance_one.php", - "source_line": 17, - "expectation": True - }, - "properties": { - "category": "D2", - "feature_vs_internal_api": "FEATURE", - "input_sanitizer": False, - "source_and_sink": False, - "negative_test_case": False - }, - "measurements": [] - } - - with open(pi1 / (pi1.name + ".json"), "w") as instance_json_file: - json.dump(instance_dict, instance_json_file, indent=4) - - pi1_meas: Path = tmp_path / "measurements" / language / "2_pattern_two/1_instance_2_pattern_two" - - current_time: datetime = datetime.now() - date_time_str_file = current_time.strftime("%Y-%m-%d_%H-%M-%S") - date_time_str = current_time.strftime("%Y-%m-%d %H:%M:%S") - exp_instance1: Instance = Instance( - name='Pattern Two', - definition='What happens for the variables inside the function when the function finish simply they die! and if we run the function again, we will have new variables. But if we want to keep the variable life, we have to use static. At the same time, static variables are challenges for the scanners, because the scanner has to record the last value for the variable with the last call for the function.', - family="", - tags=[], - instances=[Path('./1_instance_2_pattern_two/1_instance_2_pattern_two.json')], - language='PHP', - pattern_id=2, - code=Path('1_instance_2_pattern_two.php'), - compile_binary=Path('1_instance_2_pattern_two.bash'), - version='1', - properties_category=PatternCategory.S0, - properties_negative_test_case=False, - properties_source_and_sink=False, - properties_input_sanitizer=False, - properties_feature_vs_internal_api=FeatureVsInternalApi.FEATURE, - expectation=True, - discovery_rule=Path('1_instance_2_pattern_two.sc'), - discovery_method="", - discovery_rule_accuracy="", - expectation_type='xss', - expectation_sink_file=Path('1_instance_2_pattern_two.php'), - expectation_sink_line=5, - expectation_source_file=Path('1_instance_2_pattern_two.php'), - expectation_source_line=9, - instance_id=1, - ) - - exp_measurements: list[Measurement] = [ - Measurement( - date=date_time_str, - result=True, - expected_result=True, - tool="dummyTool", - version="1", - instance=exp_instance1, - - ) - ] - - mocker.patch("core.pattern_operations.analysis.analyze_pattern_instance", return_value=exp_measurements) - sast_tools: Dict = { - "name": "dummyTool", - "version": "1" + def test_add_testability_pattern_to_lib(self): + test_pattern = create_pattern() + json_path = test_pattern.json_path + pattern_dir = test_pattern.path + tp_lib_dest = Path("/tp_framework/tp_lib") + with patch("core.pattern.Pattern.init_from_json_file_without_pattern_id") as init_pattern_mock, \ + patch("core.pattern.Pattern.copy_to_tplib") as copy_mock, \ + patch("core.pattern_operations.logger.info") as logger_info_mock: + init_pattern_mock.return_value = test_pattern + + pattern_operations.add_testability_pattern_to_lib_from_json("js", json_path, pattern_dir, tp_lib_dest) + + init_pattern_mock.assert_called_once_with(json_path, "js", pattern_dir, tp_lib_dest) + copy_mock.assert_called_once() + logger_info_mock.assert_called_once_with(f"The pattern has been copied to {pattern_dir}, You might need to adjust relative path links.") + + @pytest.mark.asyncio + async def test_add_measurement_for_pattern(self): + sample_tp_lib: Path = join_resources_path("sample_patlib") + test_pattern = create_pattern() + now = datetime.now() + with patch("core.pattern.Pattern.init_from_id_and_language") as pattern_init_mock, \ + patch("core.pattern_operations.logger.warning") as warn_logger_mock, \ + patch("core.analysis.analyze_pattern_instance") as analyze_mock: + pattern_init_mock.return_value = test_pattern + await pattern_operations.start_add_measurement_for_pattern("js", [{"dummyTool": "saas"}], 1, now, sample_tp_lib, Path("non_existing_dir")) + + pattern_init_mock.assert_called_once_with(1, "js", sample_tp_lib) + warn_logger_mock.assert_not_called() + analyze_mock.assert_awaited_once_with(test_pattern.instances[0], [{"dummyTool": "saas"}], "js", now, Path("non_existing_dir")) + + @pytest.mark.asyncio + async def test_save_measurement_for_pattern(self): + test_pattern = create_pattern() + fake_measurement = Measurement(datetime.now(), False, True, "some_tool", "saas", test_pattern.instances[0]) + open_mock = mock_open() + with patch("core.pattern_operations.job_list_to_dict") as job_list_to_dict_mock, \ + patch("core.analysis.inspect_analysis_results") as inspect_analysis_results_mock, \ + patch("core.pattern_operations.meas_list_to_tp_dict") as meas_list_to_tp_dict_mock, \ + patch("core.pattern.Pattern.init_from_id_and_language") as pattern_init_mock, \ + patch("core.utils.get_measurement_dir_for_language") as measurement_dir_for_lang_mock, \ + patch("pathlib.Path.mkdir") as mkdir_mock, \ + patch("builtins.open", open_mock, create=True), \ + patch("json.dump") as json_dump_mock: + + meas_list_to_tp_dict_mock.return_value = {1: {1: [fake_measurement]}} + measurement_dir_for_lang_mock.return_value = Path("/") + pattern_init_mock.return_value = test_pattern + await pattern_operations.save_measurement_for_patterns("js", datetime.now(), ["list_of_sast_jobs"], Path("samplelib")) + + job_list_to_dict_mock.assert_called_once_with(["list_of_sast_jobs"]) + inspect_analysis_results_mock.assert_called_once_with(job_list_to_dict_mock.return_value, "js") + meas_list_to_tp_dict_mock.assert_called_with(inspect_analysis_results_mock.return_value) + pattern_init_mock.assert_called_once_with(1, "js", Path("samplelib")) + measurement_dir_for_lang_mock.assert_called_once_with(Path("samplelib"), "js") + mkdir_mock.assert_called_once() + d_tpi_meas_expected = { + "pattern_id": 1, + "instance_id": 1, + "language": "JS", + "instance": "keks" } - - await pattern_operations.start_add_measurement_for_pattern(language, [sast_tools], 2, tmp_path, tmp_path) - assert list(pi1_meas.iterdir())[0].name == "measurement-{}.json".format(date_time_str_file) - - with open(list(pi1_meas.iterdir())[0]) as meas_json: - assert len(json.load(meas_json)) == 1 + d_tpi_meas_expected.update(vars(fake_measurement)) + l_tpi_meas_expected = [d_tpi_meas_expected] + json_dump_mock.assert_called_once_with(l_tpi_meas_expected, open_mock.return_value, indent=4) diff --git a/qualitytests/core/test_readme_generator.py b/qualitytests/core/test_readme_generator.py new file mode 100644 index 0000000..d25ce65 --- /dev/null +++ b/qualitytests/core/test_readme_generator.py @@ -0,0 +1,181 @@ +import pytest +from pathlib import Path +from unittest.mock import patch + +from core.readme_generator import READMEGenerator +from core.readme_markdown_elements import * +from qualitytests.qualitytests_utils import create_pattern, join_resources_path + +class TestREADMEGenerator: + + def _get_readme_generator(self): + test_pattern = create_pattern() + with patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.utils.read_csv_to_dict") as csv_to_dict_mock: + is_dir_mock.return_value = True + read_json_mock.return_value = {} + csv_to_dict_mock.return_value = {"JS": {"1": {"1": "yes"}}} + + readme_generator = READMEGenerator(test_pattern, "discovery.csv", Path("dont_care"), "mask.json") + + is_dir_mock.assert_called_once() + read_json_mock.assert_called_once() + csv_to_dict_mock.assert_called_once() + return readme_generator + + + init_readme_generator_testcases = [ + # everyting alright + ("discovery.csv", {"JS": {"1": {"1": True}}}, True, "mask.json", None), + # Language "JS" not in discovery dict + ("discovery.csv", {"AWESOME": {"1": {"1": True}}}, True, "mask.json", "Generating README for JS - p1: Cannot find discovery rule results for language JS"), + # discovery dict of language is not of type dict + ("discovery.csv", {"JS": None}, True, "mask.json", "Generating README for JS - p1: Cannot find discovery rule results for language JS"), + # no measurement results + ("discovery.csv", {"JS": {"1": {"1": True}}}, False, "mask.json", "Generating README for JS - p1: Cannot locate `measurement_results` in 'dont_care'"), + ] + + @pytest.mark.parametrize("dr_file, dr_res, is_dir, mask_file, warn", init_readme_generator_testcases) + def test_init_readme_generator_discovery_results(self, dr_file, dr_res, is_dir, mask_file, warn): + test_pattern = create_pattern() + with patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.readme_generator.logger.warning") as warn_logger, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.utils.read_csv_to_dict") as csv_to_dict_mock: + is_dir_mock.return_value = is_dir + csv_to_dict_mock.return_value = dr_res + + readme_generator = READMEGenerator(test_pattern, dr_file, Path("dont_care"), mask_file) + + is_dir_mock.assert_called_once() + csv_to_dict_mock.assert_called_once_with(dr_file) + if warn: + warn_logger.assert_called_once_with(warn) + else: + warn_logger.assert_not_called() + + read_json_mock.assert_called_once_with(mask_file) + assert read_json_mock.return_value == readme_generator.mask + + def test_comment(self): + test_readme_gen = self._get_readme_generator() + actual = test_readme_gen._comment() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownComment) + + def test_heading(self): + test_readme_gen = self._get_readme_generator() + actual = test_readme_gen._heading() + assert isinstance(actual, list) + assert 1 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 1 == actual[0].level + assert "Test Pattern" == actual[0].content + + def test_description(self): + test_readme_gen = self._get_readme_generator() + with patch("core.pattern.Pattern.get_description") as get_description_mock: + get_description_mock.return_value = (True, "test") + actual = test_readme_gen._pattern_description() + get_description_mock.assert_called_once() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "Description" == actual[0].content + assert isinstance(actual[1], MarkdownString) + assert "test" == actual[1].content + + def test_tags(self): + test_readme_gen = self._get_readme_generator() + actual = test_readme_gen._tags() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownString) + assert "Tags" in actual[0].content + assert isinstance(actual[1], MarkdownString) + assert "Version" in actual[1].content + + def test_pattern_metadata_including_discovery_rule_results(self): + test_readme_gen = self._get_readme_generator() + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.translate_bool") as translate_bool_mock: + actual = test_readme_gen._pattern_metadata() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "Overview" == actual[0].content + assert isinstance(actual[1], MarkdownTable) + assert "rule successfull" in actual[1].to_markdown() + + def test_pattern_metadata_without_discovery_rule_results(self): + test_readme_gen = self._get_readme_generator() + test_readme_gen.discovery_rule_results = None + with patch("pathlib.Path.is_file") as is_file_mock, \ + patch("core.utils.translate_bool") as translate_bool_mock: + actual = test_readme_gen._pattern_metadata() + assert isinstance(actual, list) + assert 2 == len(actual) + assert isinstance(actual[0], MarkdownHeading) + assert 2 == actual[0].level + assert "Overview" == actual[0].content + assert isinstance(actual[1], MarkdownTable) + assert "rule successfull" not in actual[1].to_markdown() + + def test_instances(self): + test_readme_gen = self._get_readme_generator() + with patch("core.readme_generator.InstanceREADMEGenerator.generate_md") as generate_md_mock: + actual = test_readme_gen._instances() + generate_md_mock.assert_called_once() + assert generate_md_mock.return_value == actual + + def test_generate_readme(self): + # Could actually assert the complete readme. + # at the moment only assert, that the function works in general + test_readme_gen = self._get_readme_generator() + test_readme_gen.measurement_results = None + test_readme_gen.generate_README() + + # integration test + def test_generate_complete_readme(self): + from core.pattern import Pattern + from core.measurement import Measurement + sample_tp_lib = join_resources_path("sample_patlib") + test_pattern = Pattern.init_from_id_and_language(1, "php", sample_tp_lib) + + with patch("pathlib.Path.is_dir") as is_dir_mock, \ + patch("core.utils.read_json") as mask_json_mock, \ + patch("core.utils.read_csv_to_dict") as discovery_rule_results: + is_dir_mock.return_value = True + mask_json_mock.return_value = {"tool1": "masked_tool"} + discovery_rule_results.return_value = {"PHP": {"1": {"1": "yes", "2": "no"}}} + + readme_generator = READMEGenerator(test_pattern, "discovery.csv", Path("dont_care"), "mask.json") + + is_dir_mock.assert_called_once() + mask_json_mock.assert_called_once() + discovery_rule_results.assert_called_once() + + measurement1 = Measurement("1970-01-01 00:00:01", False, False, "tool1", "saas") + measurement2 = Measurement("1970-01-01 00:00:01", False, True, "tool2", "v2") + measurement3 = Measurement("2023-01-01 00:00:01", True, False, "tool1", "saas") + measurement4 = Measurement("2023-01-01 00:00:01", True, True, "tool2", "v2") + + with patch("core.utils.list_files") as list_files_mock, \ + patch("core.utils.read_json") as read_json_mock, \ + patch("core.measurement.Measurement.init_from_measurement_dict") as measurement_mock: + list_files_mock.return_value = ["file1.md", "file2.md"] + measurement_mock.side_effect = [measurement1, measurement2, measurement3, measurement4] + read_json_mock.return_value = [{}, {}] + + actual = readme_generator.generate_README() + + path_to_expected_readme = sample_tp_lib / "PHP" / "1_static_variables" / "README.md" + with open(path_to_expected_readme, "r") as fp: + expected = fp.read() + + assert expected == actual + diff --git a/qualitytests/core/test_utils.py b/qualitytests/core/test_utils.py index 049461d..2b6b4fa 100644 --- a/qualitytests/core/test_utils.py +++ b/qualitytests/core/test_utils.py @@ -5,7 +5,7 @@ import config from core import utils from core.exceptions import PatternDoesNotExists, TPLibDoesNotExist, LanguageTPLibDoesNotExist, DiscoveryMethodNotSupported -from unittest.mock import patch +from unittest.mock import patch, mock_open import qualitytests.qualitytests_utils as qualitytests_utils def setup_three_pattern(tmp_path: Path): @@ -184,3 +184,29 @@ def test_get_json_file(self, path, expected, list_file_return, warn): warn_logger.assert_called_with(warn) else: warn_logger.assert_not_called() + + def test_read_csv_to_dict(self): + csv_data = """pattern_id,instance_id,instance_path,pattern_name,language,discovery_rule,successful + 1,1,,,JS,,no + 1,2,/some/path,Test Pattern,PHP,discovery_rule.sc,yes + """ + expected = { + "JS": { + "1": {"1": "no"} + }, + "PHP": { + "1": {"2": "yes"} + } + } + with patch("builtins.open", mock_open(read_data=csv_data), create=True): + actual = utils.read_csv_to_dict(Path("some_path")) + + assert expected == actual + with pytest.raises(Exception): + actual["NOT_EXISTING_LANG"] + actual["PHP"]["5"] + actual["PHP"]["1"]["3"] + + def test_translate_bool(self): + assert "YES" == utils.translate_bool(True) + assert "NO" == utils.translate_bool(False) diff --git a/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json b/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json index 71ac204..93a7a98 100644 --- a/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json +++ b/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json @@ -10,7 +10,7 @@ "notes": "The `BIND_STATIC` opcode is only for static variables that are normally used inside code blocks. The SAST tools may not able to keep the proper values for these static variables. As such the discovery rule should be accurate as it is" }, "remediation": { - "notes": "./docs/remediation_notes.md", + "notes": null, "transformation": null, "modeling_rule": null }, diff --git a/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md b/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md index 08f4d24..dfa467c 100644 --- a/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md +++ b/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md @@ -1,45 +1,55 @@ -# Pattern: Static Variables +[//]: # (This file is automatically generated. If you wish to make any changes, please use the JSON files and regenerate this file using the tpframework.) -## Category +# Static Variables -Variables +Tags: sast, php, php_v7.4.9 -## Definition -What happens for the variables inside the function when the function finish simply they die! and if we run the function again, we will have new variables. But if we want to keep the variable life, we have to use static. At the same time, static variables are challenges for the scanners, because the scanner has to record the last value for the variable with the last call for the function. +Version: None -## Instances +## Description -### Instance 1 +This pattern targets `static` variables. When a function terminates, its local variables are destroyed. When we run that function again, new local variables will be allocated. If we want to keep one of those variables alive, we can use the `static` keyword. Static variables may be challenging for SAST tools: is a SAST tool considering a static variable used in a function as alive? -- CATEGORY: S0 -- FEATURE vs INTERNAL API: FEATURE -- INPUT SANITIZERS: NO -- SOURCES AND SINKS: NO -- NEGATIVE TEST CASES: NO -- CODE: +## Overview + +| Instances | has discovery rule | discovery method | rule successfull | +|---------------------------|----------------------|--------------------|--------------------| +| [1 Instance](#1-instance) | YES | joern | yes | + +## 1 Instance + +### Code ```php + +More -| Tool | RIPS | phpSAFE | WAP | Progpilot | Comm_1 | Comm_2 | Correct | -| ------------- | ---- | ------- | ---- | --------- | ------- | --------- | ------- | -| Vulnerability | NO | NO | NO | NO | NO | NO | YES | -Measurements Date: 8 June 2021 +
+ -- OPCODE: +### Compile + + ```bash $_main: ; (lines=13, args=0, vars=1, tmps=5) @@ -74,16 +84,39 @@ L8 (7): EXT_STMT L9 (7): RETURN null ``` -- DISCOVERY: +
-In this pattern, I focus on the static variables in functions not the static properties in objects nor static methods. To discover the static variables in opcode, I search for the opcode BIND_STATIC. -```bash -cpg.call(".*BIND_STATIC.*").location.l -``` -For regex, I can search for the keyword but I cannot distinguish between static variables and static properties. -- PRECONDITIONS: - 1. -- TRANSFORMATION: +
+ + +### Discovery + + + +The `BIND_STATIC` opcode is only for static variables that are normally used inside code blocks. The SAST tools may not able to keep the proper values for these static variables. As such the discovery rule should be accurate as it is + +```scala +val start_line = (name, "1_static_variables_iall", cpg.call(".*BIND_STATIC.*").location.toJson); ``` -``` \ No newline at end of file +| discovery method | expected accuracy | +|--------------------|---------------------| +| joern | Perfect | + +
+ +
+ + +### Measurement + + + +| Tool | masked_tool | tool2 | Ground Truth | +|-------------|---------------|---------|----------------| +| 01 Jan 1970 | NO | NO | YES | +| 01 Jan 2023 | YES | YES | YES | + +
+ + diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index 170c472..923d20c 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -9,7 +9,7 @@ import config from core import utils, pattern_operations, discovery, measure, errors, report_for_sast -from core.exceptions import PatternValueError, PatternInvalid +from core.exceptions import PatternValueError, PatternInvalid, AddPatternError from core.pattern import Pattern @@ -38,7 +38,7 @@ def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict pattern_dir_path, tp_lib_path ) - except PatternInvalid as e: + except (PatternInvalid, AddPatternError) as e: print(e) raise except Exception as e: @@ -197,4 +197,4 @@ def repair_patterns(language: str, pattern_ids: list, except PatternInvalid as e: print(f"Failed to init pattern: {tp_id} due to {e}") continue - pattern.repair() \ No newline at end of file + pattern.repair(should_include_readme) \ No newline at end of file diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index 1d8f741..584584d 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -7,7 +7,7 @@ from cli import interface from core import utils -from core.exceptions import InvalidSastTools +from core.exceptions import InvalidSastTools, PatternInvalid from core.errors import invalidSastTools from core.pattern import Pattern diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index 9e90fbb..aa6990a 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -429,6 +429,7 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di f"{msgpre}running discovery rule...") # related to #42 pdr = patch_PHP_discovery_rule(dr, tpi_instance.language, output_dir=disc_output_dir) + findings = [] try: findings = run_and_process_discovery_rule(cpg, pdr, discovery_method=d_tpi_discovery["method"]) d_tpi_discovery["results"] = findings diff --git a/tp_framework/core/errors.py b/tp_framework/core/errors.py index 24c32da..792930e 100644 --- a/tp_framework/core/errors.py +++ b/tp_framework/core/errors.py @@ -104,3 +104,5 @@ def templateDirDoesNotExist(not_exisitng_dir_or_file): return f"Your tplib does not have {not_exisitng_dir_or_file}." +def addPatternFailed(exception: str): + return f"Adding the pattern to the tplib failed {exception}" \ No newline at end of file diff --git a/tp_framework/core/exceptions.py b/tp_framework/core/exceptions.py index 24a2ec1..23011c4 100644 --- a/tp_framework/core/exceptions.py +++ b/tp_framework/core/exceptions.py @@ -1,6 +1,11 @@ from core import errors +class AddPatternError(Exception): + def __init__(self, message: str) -> None: + self.message = errors.addPatternFailed(message) + super().__init__() + class PatternDoesNotExists(Exception): def __init__(self, pattern_id): self.pattern_id = pattern_id @@ -149,6 +154,12 @@ def __init__(self, message=errors.measurementResultsDirDoesNotExist()): super().__init__(self.message) +class MeasurementInvalid(Exception): + def __init__(self, message) -> None: + self.message = message + super().__init__(self.message) + + class FileDoesNotExist(Exception): def __init__(self, message=errors.fileDoesNotExist()): self.message = message diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index ffe261e..9f5cc65 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -1,11 +1,23 @@ import shutil from pathlib import Path -from typing import Tuple +from typing import Tuple#, Enum from core import utils from core.exceptions import InstanceInvalid from core.instance_repair import InstanceRepair +# class PatternCategory(str, Enum): +# S0 = "S0" +# D1 = "D1" +# D2 = "D2" +# D3 = "D3" +# D4 = "D4" + + +# class FeatureVsInternalApi(str, Enum): +# FEATURE = "FEATURE" +# INTERNAL_API = "INTERNAL_API" + class Instance: @classmethod def init_from_json_path(cls, path_to_instance_json: Path, @@ -123,11 +135,11 @@ def copy_to_tplib(self, pattern_path: Path): # same function as in Pattern, could use some interface for that, or move to utils? def get_description(self) -> Tuple[bool, str]: - if self.description and Path(self.path / self.description).resolve().is_file(): + if self.description and " " not in self.description and Path(self.path / self.description).resolve().is_file(): with open(Path(self.path / self.description).resolve(), "r") as desc_file: return True, "".join(desc_file.readlines()).strip() else: - return False, self.description.strip() + return False, self.description.strip() if self.description else "" def set_new_instance_path(self, new_path): old_path = self.path diff --git a/tp_framework/core/instance_repair.py b/tp_framework/core/instance_repair.py index b01d110..79dc79b 100644 --- a/tp_framework/core/instance_repair.py +++ b/tp_framework/core/instance_repair.py @@ -102,7 +102,8 @@ def repair(self): if self.to_repair.expectation_expectation == self.to_repair.properties_negative_test_case: logger.warning(f"{self._log_prefix()}Changing properites_negative_test_case, it has to be `not` expectation_expectation") self.to_repair.properties_negative_test_case = not self.to_repair.expectation_expectation - + # check other JSON fields + # TODO: check if self.to_json() diff --git a/tp_framework/core/measurement.py b/tp_framework/core/measurement.py index df8de5e..b1fbaf2 100644 --- a/tp_framework/core/measurement.py +++ b/tp_framework/core/measurement.py @@ -11,9 +11,8 @@ import config from core import utils -from core.exceptions import InstanceDoesNotExists, MeasurementNotFound -from core.instance import Instance #, load_instance_from_metadata -from core.pattern import Pattern +from core.exceptions import MeasurementNotFound, MeasurementInvalid +from core.instance import Instance class Measurement: @@ -31,7 +30,21 @@ def __init__(self, self.tool = tool self.version = version self.instance = instance - + + #TODO: TESTING + @classmethod + def init_from_measurement_dict(cls, meas_dict): + return cls()._init_from_dict(meas_dict) + + def _init_from_dict(self, dict_to_init_from: dict): + try: + self.date = dict_to_init_from["date"] + self.result = dict_to_init_from["result"] + self.tool = dict_to_init_from["tool"] + self.version = dict_to_init_from["version"] + except KeyError as e: + raise MeasurementInvalid(e) + return self def define_verdict(self, date: datetime, instance: Instance, findings: list[Dict], tool: str, version: str, sink_line_strict : bool = False, @@ -106,7 +119,7 @@ def load_measurements(meas_file: Path, tp_lib: Path, language: str) -> list[Meas return parsed_meas -def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, pattern: Pattern, +def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, pattern, instance: Instance) -> Measurement: # TODO - load last measurement: the code hereafter strongly depends on the folder notation in place for # patterns and pattern instances. Make sure to factorize in function what needs to diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index 29bfd96..c4e0fd4 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -3,9 +3,10 @@ from os import listdir from pathlib import Path -from core.exceptions import PatternInvalid, PatternDoesNotExists, InstanceDoesNotExists +from core.exceptions import PatternInvalid, AddPatternError, InstanceDoesNotExists from core.instance import Instance from core.pattern_repair import PatternRepair +from tp_framework.core.readme_generator import READMEGenerator from core import utils # from core.exceptions import LanguageTPLibDoesNotExist, PatternDoesNotExists, PatternValueError from typing import Tuple @@ -124,13 +125,17 @@ def copy_to_tplib(self): try: given_id = utils.get_id_from_name(self.path.name) except (KeyError, ValueError): + # if we can't get an id from the name, we don't care, we just set a new id pass # if the given id is not the id, the algorithm identified, give it a new id pattern_name = f'{self.pattern_id}_{self.path.name}' if given_id != self.pattern_id else self.path.name new_pattern_path = self.tp_lib_path / self.language / pattern_name for instance in self.instances: instance.copy_to_tplib(new_pattern_path) - utils.copy_dir_content(self.path, new_pattern_path) + try: + utils.copy_dir_content(self.path, new_pattern_path) + except Exception as e: + raise AddPatternError(e) self.path = new_pattern_path def get_instance_by_id(self, tpi_id: int) -> Instance: @@ -140,14 +145,20 @@ def get_instance_by_id(self, tpi_id: int) -> Instance: raise InstanceDoesNotExists(tpi_id, "") def get_description(self) -> Tuple[bool, str]: - if self.description and Path(self.path / self.description).resolve().is_file(): + if self.description and " " not in self.description and Path(self.path / self.description).resolve().is_file(): with open(Path(self.path / self.description).resolve(), "r") as desc_file: return True, "".join(desc_file.readlines()).strip() else: - return False, self.description.strip() + return False, self.description.strip() if self.description else "" - def repair(self): + def repair(self, should_include_readme: bool, + discovery_rule_results: Path = None, + measurement_results: Path = None, + masking_file: Path = None,): PatternRepair(self).repair(self) + if should_include_readme: + # TODO: build README + READMEGenerator().generate_README() def to_dict(self): return { diff --git a/tp_framework/core/pattern_operations.py b/tp_framework/core/pattern_operations.py index 304187d..43512e7 100644 --- a/tp_framework/core/pattern_operations.py +++ b/tp_framework/core/pattern_operations.py @@ -1,20 +1,13 @@ import json -import shutil -import uuid from datetime import datetime -from json import JSONDecodeError from pathlib import Path -from typing import Dict, Tuple +from typing import Dict import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -# import core.instance -from core import errors from core import utils, analysis -from core.exceptions import PatternValueError -from core.instance import Instance #, PatternCategory, FeatureVsInternalApi # , instance_from_dict from core.pattern import Pattern from core.sast_job_runner import SASTjob, job_list_to_dict from core.measurement import meas_list_to_tp_dict @@ -57,7 +50,6 @@ async def start_add_measurement_for_pattern(language: str, sast_tools: list[Dict async def save_measurement_for_patterns(language: str, now: datetime, l_job: list[SASTjob], tp_lib_dir: Path): - d_job = job_list_to_dict(l_job) l_meas = await analysis.inspect_analysis_results(d_job, language) d_tp_meas = meas_list_to_tp_dict(l_meas) @@ -71,7 +63,7 @@ async def save_measurement_for_patterns(language: str, now: datetime, tpi_rel_dir = meas.instance.path.name meas_dir = utils.get_measurement_dir_for_language(tp_lib_dir, language) / tp_rel_dir / tpi_rel_dir meas_dir.mkdir(parents=True, exist_ok=True) - d_tpi_meas_ext: Dict = meas.__dict__ + d_tpi_meas_ext: Dict = meas.__dict__ # Could use vars(meas) here? # TODO: rather than extending here we should extend the Measurement class d_tpi_meas_ext["pattern_id"] = meas.instance.pattern_id d_tpi_meas_ext["instance_id"] = meas.instance.instance_id diff --git a/tp_framework/core/readme_generator.py b/tp_framework/core/readme_generator.py new file mode 100644 index 0000000..a70d5a8 --- /dev/null +++ b/tp_framework/core/readme_generator.py @@ -0,0 +1,381 @@ +import logging +import re + +from datetime import datetime +from pathlib import Path + +from core.exceptions import MeasurementInvalid +# from core.instance import Instance +from core.measurement import Measurement +# from core.pattern import Pattern +from core.readme_markdown_elements import * + +from core import utils +from core import loggermgr + +logger = logging.getLogger(loggermgr.logger_name(__name__)) + + +class READMEGenerator: + def __init__( + self, + pattern, + discovery_rule_results: Path | None, + measurement_results: Path | None, + masking_file: Path | None, + ) -> None: + self.pattern = pattern + self.discovery_rule_results = discovery_rule_results + self.measurement_results = measurement_results + self.mask = {} + + self.log_prefix = f"Generating README for {self.pattern}: " + + try: + self.discovery_rule_results = utils.read_csv_to_dict(discovery_rule_results) + self.discovery_rule_results = self.discovery_rule_results[self.pattern.language] + assert isinstance(self.discovery_rule_results, dict) + except Exception: + logger.warning(f"{self.log_prefix}Cannot find discovery rule results for language {self.pattern.language}") + self.discovery_rule_results = None + + if not measurement_results or not measurement_results.is_dir(): + logger.warning(f"{self.log_prefix}Cannot locate `measurement_results` in '{self.measurement_results}'") + self.measurement_results = None + + try: + self.mask = utils.read_json(masking_file) + except Exception: + logger.info(f"{self.log_prefix}Continue without masking.") + self.mask = {} + + self.readme_structure = [ + self._comment, + self._heading, + self._tags, + self._pattern_description, + self._pattern_metadata, + self._instances, + ] + + def _comment(self) -> list: + # Generates a Comment for the top of the README file. + return [ + MarkdownComment( + "This file is automatically generated. If you wish to make any changes, please use the JSON files and regenerate this file using the tpframework." + ) + ] + + def _heading(self) -> list: + # Generates the heading for the README file. + return [MarkdownHeading(self.pattern.name, 1)] + + def _pattern_description(self) -> list: + # Generates the description for the pattern. + _, desc = self.pattern.get_description() + return [MarkdownHeading("Description", 2), MarkdownString(desc)] + + def _tags(self) -> list: + # Generates pattern tags. + return [ + MarkdownString(f'Tags: {", ".join(self.pattern.tags)}'), + MarkdownString(f'Version: {self.pattern.version}'), + ] + + def _pattern_metadata(self) -> list: + # Generates a table of pattern metadata, such as the instances, discovery rule discovery method and if the discovery rule is successfull on the instance. + discovery_rule_exists = [] + instance_names = [] + discovery_rule_successfull = [] + discovery_method = [] + for instance in self.pattern.instances: + instance_name = f"{instance.instance_id} Instance" + instance_names += [MarkdownLink(instance_name, MarkdownHeading(instance_name, 2))] + + discovery_rule_exists += [utils.translate_bool(instance.discovery_rule.is_file())] + + if self.discovery_rule_results: + try: + current_result = self.discovery_rule_results[str(self.pattern.pattern_id)][str(instance.instance_id)] + except KeyError: + logger.warning(f'{self.log_prefix}Could not find discovery rule result for {instance}. Assuming "error"') + current_result = "error" + discovery_rule_successfull += [current_result] + discovery_method += [instance.discovery_method] + + metadata_dict = { + "0::Instances": instance_names, + "1::has discovery rule": discovery_rule_exists, + "2::discovery method": discovery_method, + "3::rule successfull": discovery_rule_successfull, + } + if not self.discovery_rule_results: + metadata_dict.pop("3::rule successfull") + + return [MarkdownHeading("Overview", 2), MarkdownTable(metadata_dict)] + + def _instances(self) -> list: + # Generates the README elements for all instances. + return InstanceREADMEGenerator( + self.pattern, + self.measurement_results, + mask=self.mask + ).generate_md() + + def generate_README(self) -> str: + md_elements = [] + for f in self.readme_structure: + md_elements += f() + return MarkdownDocument(md_elements).to_markdown() + + +class InstanceREADMEGenerator: + def __init__( + self, + pattern, + path_to_measurements: Path | None, + level: int = 2, + mask: dict = {}, + ) -> None: + self.pattern = pattern + self.log_prefix = f"Generating README for {self.pattern}: " + self.level = level + self.measurements = Path(path_to_measurements) if path_to_measurements else None + self.has_multiple_instances = len(self.pattern.instances) > 1 + self.mask_dict = mask + + self.current_instance = None + + self.instance_structure = [ + self._instance_name, + self._instance_description, + self._instance_code, + self._instance_properties, + self._instance_more, + ] + self.instance_more_structure = [ + self._compile, + self._discovery, + self._measurement, + self._remediation, + ] + + def _instance_name(self) -> list: + # Generates the Markdown heading for the current instance. + return [MarkdownHeading(f"{self.current_instance.instance_id} Instance", self.level)] + + def _instance_description(self) -> list: + # Generates the description for the current instance. + _, desc = self.current_instance.get_description() + return [MarkdownString(desc)] if desc else [] + + def _instance_code(self) -> list: + # Generates the Instance code for the current instance. + heading = MarkdownHeading("Code", self.level + 1) + code = self.current_instance.code_path + source = self.current_instance.expectation_source_file + sink = self.current_instance.expectation_sink_file + if source == sink: + content = self._get_file_content_if_exists(code) + return [heading, MarkdownCode(content, self.pattern.language)] if content else [] + source_content = self._get_file_content_if_exists(source) + sink_content = self._get_file_content_if_exists(sink) + return [ + heading, + MarkdownHeading("Source File", self.level + 2), + MarkdownCode(source_content, self.pattern.language), + MarkdownHeading("Sink File", self.level + 2), + MarkdownCode(sink_content, self.pattern.language), + ] if source_content and sink_content else [] + + def _instance_properties(self) -> list: + # Generates the table of instance properties. + properties_dict = { + "category": [self.current_instance.properties_category], + "feature_vs_internal_api": [self.current_instance.properties_feature_vs_internal_api], + "input_sanitizer": [utils.translate_bool(self.current_instance.properties_input_sanitizer)], + "source_and_sink": [utils.translate_bool(self.current_instance.properties_source_and_sink)], + "negative_test_case": [utils.translate_bool(self.current_instance.properties_negative_test_case)] + } + return [ + MarkdownHeading("Instance Properties", self.level + 1), + MarkdownTable(properties_dict) + ] + + def _instance_more(self) -> list: + # generates the instance more section + ret = [] + for f in self.instance_more_structure: + ret += f() + return [MarkdownCollapsible(ret, MarkdownString("More"))] + + def _compile(self) -> list: + # Generates the compile section for an instance. + compile = self.current_instance.compile_binary + content = self._get_file_content_if_exists(compile) + binary = MarkdownCode(content, utils.get_language_by_file_ending(compile)) + return [MarkdownCollapsible([binary], MarkdownHeading("Compile", self.level + 1))] if content else [] + + def _discovery(self) -> list: + # Generates the 'discovery' section for an instance. + desc = self.current_instance.discovery_notes + desc = MarkdownString(self._get_file_content_if_exists(desc)) + rule_path = self.current_instance.discovery_rule + rule = self._get_file_content_if_exists(rule_path) + # get only necessary content + rule = re.sub(r"@main def main\(name .*{.*$", "", rule, flags=re.M) + rule = re.sub(r"importCpg.*$", "", rule, flags=re.M) + rule = re.sub(r"println\(.*\).*$", "", rule, flags=re.M) + rule = re.sub(r"delete;.*$", "", rule, flags=re.M) + rule = "".join(rule.rsplit("}", 1)) # remove the last } + rule = "\n".join([l.strip() for l in rule.split("\n")]).strip() + rule = ( + MarkdownCode(rule, utils.get_language_by_file_ending(rule_path)) + if rule_path + else MarkdownString("No discovery rule yet.") + ) + discovery_table = { + "discovery method": [self.current_instance.discovery_method], + "expected accuracy": [ + self.current_instance.discovery_rule_accuracy + ], + } + discovery_table = MarkdownTable(discovery_table) + return [ + MarkdownCollapsible( + [desc, rule, discovery_table], + MarkdownHeading("Discovery", self.level + 1), + ) + ] + + def _measurement(self) -> list: + # Generates the 'measurement' section for an instance. + if not self.measurements: + return [] + instance_measurements = self.measurements / self.pattern.path.name / self.current_instance.name + measurement_table = {} + has_measurement = False + dates = [] + ground_truth = self.current_instance.expectation_expectation + for json_file in utils.list_files(instance_measurements, ".json"): + current_json = utils.read_json(json_file) + for c_dict in current_json: + try: + measurement = Measurement.init_from_measurement_dict(c_dict) + except MeasurementInvalid: + logger.warning(f"{self.log_prefix}Could not fetch measurement in {current_json}") + continue + has_measurement = True + tool = f"1::{self._mask(measurement.tool.lower())}" + date = datetime.strptime(measurement.date, "%Y-%m-%d %H:%M:%S").strftime("%d %b %Y") + dates += [date] + sast_tool_result = utils.translate_bool(not (measurement.result ^ ground_truth)) + try: + measurement_table[tool] += [(sast_tool_result, date)] + measurement_table[tool] = sorted( + measurement_table[tool], + key=lambda tup: datetime.strptime(tup[1], "%d %b %Y"), + ) + except KeyError: + measurement_table[tool] = [(sast_tool_result, date)] + if not has_measurement: + return [] + measurement_table, sorted_dates = self._format_measurements(measurement_table, dates) + measurement_table["0::Tool"] = sorted_dates + measurement_table["2::Ground Truth"] = [utils.translate_bool(ground_truth)] * len(sorted_dates) + return [ + MarkdownCollapsible( + [MarkdownTable(measurement_table)], + MarkdownHeading("Measurement", self.level + 1), + is_open=True, + ) + ] + + def _remediation(self) -> list: + # Generates the 'remediation' section for an instance. + note = self.current_instance.remediation_notes + note = MarkdownString(self._get_file_content_if_exists(note)) + transformation = self.current_instance.remediation_transformation + transformation = MarkdownString(self._get_file_content_if_exists(transformation)) + modeling_rule = self.current_instance.remediation_modeling_rule + modeling_rule = MarkdownString(self._get_file_content_if_exists(modeling_rule)) + if any([note, transformation, modeling_rule]): + note = [ + note + if note + else MarkdownString( + "Can you think of a transformation, that makes this tarpit less challenging for SAST tools?" + ) + ] + transformation = ( + [MarkdownHeading("Transformation", self.level + 2), transformation] + if transformation + else [] + ) + modeling_rule = ( + [MarkdownHeading("Modeling Rule", self.level + 2), modeling_rule] + if modeling_rule + else [] + ) + return [ + MarkdownCollapsible( + note + transformation + modeling_rule, + MarkdownHeading("Remediation", self.level + 1), + ) + ] + return [] + + def _get_file_content_if_exists(self, path_to_file: Path) -> str: + if path_to_file and Path(path_to_file).is_file(): + with open(path_to_file, "r") as in_file: + return "".join(in_file.readlines()).strip() + return path_to_file if path_to_file else "" + + def _mask(self, value_to_mask: str): + if value_to_mask in self.mask_dict.keys(): + return self.mask_dict[value_to_mask] + return value_to_mask + + def _format_measurements(self, tool_measurement_dict: dict, dates: list) -> tuple: + """Formats the measurements in the wanted table format: + | | Tool1 | Tool2 | + |--------+--------+--------| + | Date1 | yes | no | + Args: + tool_measurement_dict (dict): dict containing measurement results and date as a list of tuple for each tool. + dates (list): a list of measurement dates. + Returns: + tuple(dict, list): dict of all tools and their measurement results (one column) and a list of sorted measurement dates (first column) + """ + dates_sorted = sorted(list(set(dates))) + formatted_measurement_table = {} + for tool, measurements in tool_measurement_dict.items(): + formatted_measurements = [] + current_measurement = measurements.pop(0) + for date in dates_sorted: + if current_measurement[1] == date: + formatted_measurements += [current_measurement[0]] + if len(measurements): + current_measurement = measurements.pop(0) + else: + break + else: + formatted_measurements += [""] + formatted_measurement_table[tool] = formatted_measurements + return formatted_measurement_table, dates_sorted + + def generate_md(self) -> list: + ret = [] + for idx, self.current_instance in enumerate(self.pattern.instances): + instance_md_elements = [] + for f in self.instance_structure: + instance_md_elements += f() + if self.has_multiple_instances: + ret += [ + MarkdownCollapsible( + instance_md_elements[1:], instance_md_elements[0], idx == 0 + ) + ] + else: + ret = instance_md_elements + return ret \ No newline at end of file diff --git a/tp_framework/core/readme_markdown_elements.py b/tp_framework/core/readme_markdown_elements.py new file mode 100644 index 0000000..4514860 --- /dev/null +++ b/tp_framework/core/readme_markdown_elements.py @@ -0,0 +1,171 @@ +from tabulate import tabulate + + +class MarkdownElement: + """Super class for all MarkdownElements used within generating README files for a testability pattern.""" + + def __init__(self, content: str): + self.content = content.strip() if content else "" + + def linkable(self) -> str: + """Makes it possible for a markdown Element to be used within a link. + Returns: + str: a string representation, that can be used in a markdown link. + """ + raise NotImplementedError + + def to_markdown(self): + raise NotImplementedError + + def strip(self): + return self.to_markdown().strip() + + def __bool__(self): + return bool(self.content) + + +class MarkdownCode(MarkdownElement): + """A markdown code block. + Syntax: + ``` + self.content + ``` + """ + + def __init__(self, content, code_type): + super().__init__(content) + self.code_type = code_type + + def to_markdown(self) -> str: + return f"\n```{self.code_type.lower()}\n{self.content}\n```\n" + + +class MarkdownComment(MarkdownElement): + """A markdown comment + Syntax: + [//]: # () + """ + + def to_markdown(self): + self.content = self.content.replace("\r", " ").replace("\n", " ") + return f"\n[//]: # ({self.content})\n" + + +class MarkdownHeading(MarkdownElement): + """A markdown heading, `self.level` indicates the number of '#' + Syntax example: + # + """ + + def __init__(self, content, level: int): + super().__init__(content) + self.level = int(level) + assert self.level >= 1 + + def to_markdown(self) -> str: + return f'\n{"#" * self.level} {self.content}\n\n' + + def linkable(self) -> str: + return f'#{self.content.replace(" " , "-").lower()}' + + +class MarkdownCollapsible(MarkdownElement): + """A markdown collapsible element. + Syntax example: +
+ + + +
+ """ + + def __init__(self, content: list, heading: MarkdownElement, is_open: bool = False): + self.content = content + self.is_open = is_open + self.heading = heading + + def to_markdown(self) -> str: + final = f'\n
' + heading = ( + self.heading.to_markdown().strip() + if not isinstance(self.heading, MarkdownHeading) + else self.heading.to_markdown() + ) + final += f"\n\n{heading}\n\n" + for element in self.content: + final += element.to_markdown() + final += f"\n
\n" + return final + + +class MarkdownString(MarkdownElement): + """Representation of a String, it is surrounded by newlines.""" + def to_markdown(self) -> str: + return f"\n{self.content}\n" + + +class MarkdownLink(MarkdownElement): + """A markdown link. + Syntax: + [self.content](self.link) + """ + + def __init__(self, content: str | MarkdownElement, link: MarkdownElement): + super().__init__(content) + assert isinstance( + link, MarkdownElement + ), "The link of a MarkdownLink must be a MarkdownElement." + self.link = link.linkable() + + def to_markdown(self): + return f"[{self.content.strip()}]({self.link.strip()})" + + +class MarkdownTable(MarkdownElement): + """A markdown table + Syntax: + | | | + |---|---| + | | | + The content must be provided as a dict, where the value for each key is a list. + The key will be the header and the list contains values for that column. + Columns will be sorted alphabetically, if you wish to sort columns yourself you can prefix them using ::. + """ + + def __init__(self, content: dict): + assert isinstance( + content, dict + ), "content for Markdown table must be provided as dict" + assert all( + [isinstance(v, list) for v in content.values()] + ), "content for Markdowntable must have lists as values" + self.headings = sorted(content.keys(), key=lambda x: x.lower()) + num_rows = max([len(v) for v in content.values()]) + self.lines = [ + [None for _ in range(len(self.headings))] for _ in range(num_rows) + ] + for column_idx, key in enumerate(self.headings): + for row_index, v in enumerate(content[key]): + self.lines[row_index][column_idx] = v.strip() if v else "" + + def to_markdown(self): + return f'\n{tabulate(self.lines, [h.split("::")[-1] if "::" in h else h for h in self.headings], "github")}\n' + + +class MarkdownDocument(MarkdownElement): + """A central point, where all markdown elements are collected into one single markdown document.""" + + def __init__(self, content: list) -> None: + self.content = content + + def to_markdown(self) -> str: + final = "" + for element in self.content: + assert isinstance(element, MarkdownElement) + final += element.to_markdown() + import re + + final = re.sub("\n\n\n*", "\n\n", final) + return ( + f"{final.strip()}\n" # GitHub markdown likes a newline at the end of files + ) diff --git a/tp_framework/core/repair_tool.py b/tp_framework/core/repair_tool.py index fed2263..63039c1 100644 --- a/tp_framework/core/repair_tool.py +++ b/tp_framework/core/repair_tool.py @@ -61,7 +61,6 @@ def _ensure_json_file_exists(self): expected_json_name = f"{self.to_repair.path.name}.json" actual_name = self.to_repair.json_path.name - logger.error(f"{expected_json_name}, {actual_name}") if expected_json_name != actual_name: new_path = self.to_repair.path / expected_json_name shutil.move(self.to_repair.json_path, new_path) diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index 9e76105..a601bc5 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -1,16 +1,16 @@ import csv -import os +import hashlib import json -from datetime import datetime -from platform import system import shutil +import os +import yaml +from collections import defaultdict +from datetime import datetime from importlib import import_module from pathlib import Path from typing import Tuple, Dict -import yaml -import hashlib import logging from core import loggermgr @@ -185,6 +185,53 @@ def get_relative_paths(file_path: Path, base_path: Path): logger.warning(f"Could not parse filepath {file_path} to a relative path.") return file_path + +def read_csv_to_dict(path_to_file: str) -> dict: + # Reads a csv file into a dictionary, the csv file must contain the columns 'pattern_id', 'instance_id', 'language', 'successful' + # The dict will have the form: {: {: {: }}} + res = [] + with open(path_to_file, "r") as csvfile: + r = csv.reader(csvfile, delimiter=",") + headings = next(r) + wanted_columns = ["pattern_id", "instance_id", "language", "successful"] + wanted_idx = [headings.index(w) for w in wanted_columns] + assert len(wanted_idx) == len(wanted_columns), f"Could not find wanted column names in csv {path_to_file}" + sanitized_lines =filter(lambda x: bool(x[0].strip()), r) + res = [[line[i].strip() for i in wanted_idx] for line in sanitized_lines] + + ret = {} + for line in res: + if line[2] not in ret.keys(): + ret[line[2]] = {} + if line[0] not in ret[line[2]].keys(): + ret[line[2]][line[0]] = {} + if line[1] not in ret[line[2]][line[0]].keys(): + ret[line[2]][line[0]][line[1]] = {} + ret[line[2]][line[0]][line[1]] = line[3] + return ret + + +def translate_bool(bool_to_translate: bool): + return "YES" if bool_to_translate else "NO" + +# TODO TESTING +def get_language_by_file_ending(filename: str) -> str: + if not filename: + return "" + if Path(filename).suffix == ".py": + return "python" + if Path(filename).suffix == ".php": + return "php" + if Path(filename).suffix == ".js": + return "javascript" + if Path(filename).suffix == ".java": + return "java" + if Path(filename).suffix == ".sc": + return "scala" + if Path(filename).suffix == ".bash": + return "bash" + return "" + ################################################################################ # OTHER # TODO: Could be sorted alphabetically? diff --git a/tp_framework/tmp.py b/tp_framework/tmp.py deleted file mode 100644 index c3c2e0f..0000000 --- a/tp_framework/tmp.py +++ /dev/null @@ -1,17 +0,0 @@ -from pathlib import Path - -class A: - def __init__(self) -> None: - self.path = Path(".") - self.my_path = self.make_path('abc') - - def make_path(self, arg): - yield self.path / arg - - -a = A() -print(a.path) -print(a.my_path) -a.path = Path('/') -print(a.path) -print(a.my_path.is_file()) \ No newline at end of file From b4508cb5318c5252c80d07a476d3c7a115bdb1de Mon Sep 17 00:00:00 2001 From: felix-20 Date: Tue, 4 Jul 2023 14:02:35 +0200 Subject: [PATCH 10/16] removed unnecessary code --- .gitignore | 2 - Dockerfile | 11 +++-- qualitytests/cli/test_interface.py | 45 +++++++++++++++++- qualitytests/core/test_discovery.py | 4 +- qualitytests/core/test_instance.py | 4 +- .../core/test_instance_readme_generation.py | 4 +- qualitytests/core/test_instance_repair_php.py | 2 +- qualitytests/core/test_readme_generator.py | 8 +++- qualitytests/core/test_utils.py | 4 +- .../1_instance_1_static_variables.json | 2 +- .../PHP/1_static_variables/README.md | 19 ++++++-- requirements-dev.txt | Bin 122 -> 138 bytes requirements.txt | Bin 50 -> 68 bytes tp_framework/cli/interface.py | 13 +++-- tp_framework/cli/tpf_commands.py | 13 ++--- tp_framework/core/discovery.py | 28 ++--------- tp_framework/core/errors.py | 12 +---- tp_framework/core/exceptions.py | 21 -------- tp_framework/core/instance.py | 3 +- tp_framework/core/instance_repair.py | 1 - tp_framework/core/measure.py | 2 - tp_framework/core/measurement.py | 8 +--- tp_framework/core/modelling_rules.py | 2 +- tp_framework/core/pattern.py | 25 +++++----- tp_framework/core/pattern_repair.py | 4 -- tp_framework/core/readme_generator.py | 12 +++-- tp_framework/core/sast_job_runner.py | 3 -- tp_framework/core/utils.py | 9 ++-- 28 files changed, 130 insertions(+), 131 deletions(-) diff --git a/.gitignore b/.gitignore index cc6ff62..6bd1c26 100644 --- a/.gitignore +++ b/.gitignore @@ -6,8 +6,6 @@ __pycache__/ .pytest_cache venv tp_framework.egg-info/ -tp_framework/.metals/ -tp_framework/.vscode/ coverage_html/ .coverage htmlcov diff --git a/Dockerfile b/Dockerfile index 7c438ea..d045f82 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,16 +36,21 @@ ARG REQUIREMENTS_FILE COPY ${REQUIREMENTS_FILE} ${TPF_HOME}/${REQUIREMENTS_FILE} RUN pip install -r ${TPF_HOME}/${REQUIREMENTS_FILE} -ARG JOERN_VERSION="v1.2.1" +ARG JOERN_VERSION="v1.1.1538" RUN echo ${JOERN_VERSION} COPY discovery ${DISCOVERY_HOME} RUN chmod +x ${DISCOVERY_HOME}/joern/joern-install.sh -RUN /bin/sh -c 'cd ${DISCOVERY_HOME}/joern/ && ./joern-install.sh --version=v1.2.1 --install-dir=/opt/joern' +RUN /bin/sh -c 'cd ${DISCOVERY_HOME}/joern/ && ./joern-install.sh --version=v1.1.1538 --install-dir=/opt/joern' + +# install js2cpg +# RUN /bin/sh -c 'cd ${DISCOVERY_HOME}/joern/js2cpg/; sbt stage' + # ADD HERE COMMANDS USEFUL FOR OTHER DOCKER-COMPOSE SERVICES +# ENV PYTHONPATH "${PYTHONPATH}:${TPF_HOME}/tp_framework" RUN python setup.py develop -ENTRYPOINT [ "bash" ] +ENTRYPOINT [ "bash" ] \ No newline at end of file diff --git a/qualitytests/cli/test_interface.py b/qualitytests/cli/test_interface.py index 777693f..095101a 100644 --- a/qualitytests/cli/test_interface.py +++ b/qualitytests/cli/test_interface.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Dict +from unittest.mock import patch, call import json import sys @@ -13,7 +14,7 @@ from qualitytests.qualitytests_utils import join_resources_path, create_mock_cpg, \ get_result_output_dir, get_logfile_path, in_logfile, init_measure_test, \ - init_sastreport_test, init_test + init_sastreport_test, init_test, create_pattern class TestInterface: @@ -252,3 +253,45 @@ def test_check_discovery_rules_3(self, tmp_path, capsys, mocker): logfile = get_logfile_path(captured_out_lines) assert logfile and logfile.is_file() + + def test_repair_patterns_not_including_readme(self): + sample_tp_lib = join_resources_path("sample_patlib") + test_pattern = create_pattern() + with patch("core.pattern.Pattern.init_from_id_and_language") as init_pattern_mock, \ + patch("core.pattern.Pattern.repair") as patternrepair_mock, \ + patch("core.utils.check_file_exist") as check_file_exists_mock, \ + patch("core.utils.check_measurement_results_exist") as measurement_result_exist_mock, \ + patch("pathlib.Path.mkdir") as mkdir_mock: + init_pattern_mock.return_value = test_pattern + interface.repair_patterns("JS", [1,2,3], None, True, Path("measurements"), Path("dr_results.csv"), Path("out"), sample_tp_lib) + + patternrepair_mock.assert_called_with(False, + discovery_rule_results=Path("dr_results.csv"), + measurement_results=Path("measurements"), + masking_file=None) + expected_calls = [call(1, "JS", sample_tp_lib), call(2, "JS", sample_tp_lib), call(3, "JS", sample_tp_lib)] + init_pattern_mock.assert_has_calls(expected_calls) + check_file_exists_mock.assert_not_called() + measurement_result_exist_mock.assert_not_called() + mkdir_mock.assert_called() + + def test_repair_patterns_not_including_readme(self): + sample_tp_lib = join_resources_path("sample_patlib") + test_pattern = create_pattern() + with patch("core.pattern.Pattern.init_from_id_and_language") as init_pattern_mock, \ + patch("core.pattern.Pattern.repair") as patternrepair_mock, \ + patch("core.utils.check_file_exist") as check_file_exists_mock, \ + patch("core.utils.check_measurement_results_exist") as measurement_result_exist_mock, \ + patch("pathlib.Path.mkdir") as mkdir_mock: + init_pattern_mock.return_value = test_pattern + interface.repair_patterns("JS", [1,2,3], None, False, Path("measurements"), Path("dr_results.csv"), Path("out"), sample_tp_lib) + + patternrepair_mock.assert_called_with(True, + discovery_rule_results=Path("dr_results.csv"), + measurement_results=Path("measurements"), + masking_file=None) + expected_calls = [call(1, "JS", sample_tp_lib), call(2, "JS", sample_tp_lib), call(3, "JS", sample_tp_lib)] + init_pattern_mock.assert_has_calls(expected_calls) + check_file_exists_mock.assert_called() + measurement_result_exist_mock.assert_called_once() + mkdir_mock.assert_called() diff --git a/qualitytests/core/test_discovery.py b/qualitytests/core/test_discovery.py index 270d160..6c89b48 100644 --- a/qualitytests/core/test_discovery.py +++ b/qualitytests/core/test_discovery.py @@ -7,8 +7,8 @@ from pytest_mock import MockerFixture import config -from core import utils, discovery, instance, pattern -from core.exceptions import MeasurementNotFound, CPGGenerationError +from core import utils, discovery +from core.exceptions import CPGGenerationError from qualitytests.qualitytests_utils import join_resources_path, create_instance diff --git a/qualitytests/core/test_instance.py b/qualitytests/core/test_instance.py index 2161759..86b7fdc 100644 --- a/qualitytests/core/test_instance.py +++ b/qualitytests/core/test_instance.py @@ -12,9 +12,9 @@ class TestInstance: sample_tp_lib: Path = join_resources_path("sample_patlib") invalid_instances = [ - # (Path("./test_instance.json"), False, {}, "The provided instance path 'test_instance.json' does not exist."), + (Path("./test_instance.json"), False, {}, "The provided instance path 'test_instance.json' does not exist."), (Path("./1_instance_test_pattern.json"), True, {}, "Could not get id from ''."), - # (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {}, "Pattern 1 - Instance 1 - Please check ") + (Path("./1_instance_test_pattern/1_instance_test_pattern.json"), True, {}, "Pattern 1 - Instance 1 - Please check ") ] @pytest.mark.parametrize("json_file_path, is_file_return, read_json_return, expected_error", invalid_instances) diff --git a/qualitytests/core/test_instance_readme_generation.py b/qualitytests/core/test_instance_readme_generation.py index d11e30c..bd7c4b9 100644 --- a/qualitytests/core/test_instance_readme_generation.py +++ b/qualitytests/core/test_instance_readme_generation.py @@ -217,7 +217,9 @@ def test_measurement(self, mask, meas_results): instance_readme_gen.measurements = Path("/") instance_readme_gen.mask_dict = mask with patch("core.utils.list_files") as list_files_mock, \ - patch("core.utils.read_json") as read_json_mock: + patch("core.utils.read_json") as read_json_mock, \ + patch("pathlib.Path.exists") as exist_mock: + exist_mock.return_value = True list_files_mock.return_value = ["file1.json"] read_json_mock.return_value = meas_results diff --git a/qualitytests/core/test_instance_repair_php.py b/qualitytests/core/test_instance_repair_php.py index 2da3e08..c0175d0 100644 --- a/qualitytests/core/test_instance_repair_php.py +++ b/qualitytests/core/test_instance_repair_php.py @@ -3,7 +3,7 @@ from unittest.mock import patch, mock_open from core.instance_repair import InstanceRepairPHP -from qualitytests.qualitytests_utils import create_instance, create_pattern, create_instance_php, join_resources_path +from qualitytests.qualitytests_utils import create_instance_php, join_resources_path class TestInstanceRepairPHP: diff --git a/qualitytests/core/test_readme_generator.py b/qualitytests/core/test_readme_generator.py index d25ce65..a1432da 100644 --- a/qualitytests/core/test_readme_generator.py +++ b/qualitytests/core/test_readme_generator.py @@ -166,16 +166,20 @@ def test_generate_complete_readme(self): with patch("core.utils.list_files") as list_files_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.measurement.Measurement.init_from_measurement_dict") as measurement_mock: + patch("core.measurement.Measurement.init_from_measurement_dict") as measurement_mock, \ + patch("pathlib.Path.exists") as path_exist_mock: list_files_mock.return_value = ["file1.md", "file2.md"] + path_exist_mock.return_value = True measurement_mock.side_effect = [measurement1, measurement2, measurement3, measurement4] read_json_mock.return_value = [{}, {}] actual = readme_generator.generate_README() + path_exist_mock.assert_called_once() path_to_expected_readme = sample_tp_lib / "PHP" / "1_static_variables" / "README.md" with open(path_to_expected_readme, "r") as fp: expected = fp.read() + with open("tmp.md", "w") as f: + f.write(actual) assert expected == actual - diff --git a/qualitytests/core/test_utils.py b/qualitytests/core/test_utils.py index 2b6b4fa..3a5e096 100644 --- a/qualitytests/core/test_utils.py +++ b/qualitytests/core/test_utils.py @@ -208,5 +208,5 @@ def test_read_csv_to_dict(self): actual["PHP"]["1"]["3"] def test_translate_bool(self): - assert "YES" == utils.translate_bool(True) - assert "NO" == utils.translate_bool(False) + assert "yes" == utils.translate_bool(True) + assert "no" == utils.translate_bool(False) diff --git a/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json b/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json index 93a7a98..71ac204 100644 --- a/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json +++ b/qualitytests/resources/sample_patlib/PHP/1_static_variables/1_instance_1_static_variables/1_instance_1_static_variables.json @@ -10,7 +10,7 @@ "notes": "The `BIND_STATIC` opcode is only for static variables that are normally used inside code blocks. The SAST tools may not able to keep the proper values for these static variables. As such the discovery rule should be accurate as it is" }, "remediation": { - "notes": null, + "notes": "./docs/remediation_notes.md", "transformation": null, "modeling_rule": null }, diff --git a/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md b/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md index dfa467c..260d762 100644 --- a/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md +++ b/qualitytests/resources/sample_patlib/PHP/1_static_variables/README.md @@ -14,7 +14,7 @@ This pattern targets `static` variables. When a function terminates, its local v | Instances | has discovery rule | discovery method | rule successfull | |---------------------------|----------------------|--------------------|--------------------| -| [1 Instance](#1-instance) | YES | joern | yes | +| [1 Instance](#1-instance) | yes | joern | yes | ## 1 Instance @@ -38,7 +38,7 @@ F('abc'); // print value of $_GET["p1"] | category | feature_vs_internal_api | input_sanitizer | negative_test_case | source_and_sink | |------------|---------------------------|-------------------|----------------------|-------------------| -| S0 | FEATURE | NO | NO | NO | +| S0 | FEATURE | no | no | no |
@@ -114,8 +114,19 @@ val start_line = (name, "1_static_variables_iall", cpg.call(".*BIND_STATIC.*").l | Tool | masked_tool | tool2 | Ground Truth | |-------------|---------------|---------|----------------| -| 01 Jan 1970 | NO | NO | YES | -| 01 Jan 2023 | YES | YES | YES | +| 01 Jan 1970 | no | no | yes | +| 01 Jan 2023 | yes | yes | yes | + +
+ +
+ + +### Remediation + + + +Likely this tarpit should be solved at the SAST tool side. Transforming a static variable into a non-static one is unfeasible. It is unclear how to create a modeling rule for the static keyword.
diff --git a/requirements-dev.txt b/requirements-dev.txt index 4ad98ad61100ce760f62e3b4a0ae0984703d42b4..9be8177f418d28a4968bc75e324d88234a09d1b4 100644 GIT binary patch delta 22 bcmb>GVw_MVP{NSNki<~RkOQPk7*ZJkNJIs4 delta 5 McmeBTteQ{-00qMW5C8xG diff --git a/requirements.txt b/requirements.txt index 4c05084906f8f010933fce4accdbdbc07e2fa215..948a7b0cde1b67905584687b540006ba990bccb0 100644 GIT binary patch delta 23 ccmXqVnP4Qu#ZbbK$dJTP%8&!3OBhla06HfGrvLx| delta 4 LcmZ=UnqUL~0{j6v diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index 923d20c..8d469eb 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -9,7 +9,7 @@ import config from core import utils, pattern_operations, discovery, measure, errors, report_for_sast -from core.exceptions import PatternValueError, PatternInvalid, AddPatternError +from core.exceptions import PatternInvalid, AddPatternError from core.pattern import Pattern @@ -54,8 +54,7 @@ def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict def run_discovery_for_pattern_list(src_dir: Path, pattern_id_list: list[int], language: str, itools: list[Dict], tp_lib_path: Path = Path(config.DEFAULT_TP_LIBRARY_ROOT_DIR).resolve(), output_dir: Path = Path(config.RESULT_DIR).resolve(), - ignore: bool = False, - cpg: str = None): + ignore: bool = False): print("Discovery for patterns started...") # Set output directory and logger build_name, disc_output_dir = utils.get_operation_build_name_and_dir( @@ -64,7 +63,7 @@ def run_discovery_for_pattern_list(src_dir: Path, pattern_id_list: list[int], la # utils.check_tp_lib(tp_lib_path) d_res = discovery.discovery(Path(src_dir), pattern_id_list, tp_lib_path, itools, language, build_name, - disc_output_dir, ignore=ignore, cpg=cpg) + disc_output_dir, ignore=ignore) print("Discovery for patterns completed.") print(f"- results available here: {disc_output_dir}") print(f"- log file available here: {disc_output_dir / config.logfile}") @@ -182,6 +181,7 @@ def repair_patterns(language: str, pattern_ids: list, measurement_results: Path, checkdiscoveryrule_results: Path, output_dir: Path, tp_lib_path: Path): print("Pattern Repair started...") + print(measurement_results) should_include_readme = not include_README utils.check_tp_lib(tp_lib_path) if should_include_readme: @@ -197,4 +197,7 @@ def repair_patterns(language: str, pattern_ids: list, except PatternInvalid as e: print(f"Failed to init pattern: {tp_id} due to {e}") continue - pattern.repair(should_include_readme) \ No newline at end of file + pattern.repair(should_include_readme, + discovery_rule_results=checkdiscoveryrule_results, + measurement_results=measurement_results, + masking_file=masking_file) \ No newline at end of file diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index 584584d..00fb4b4 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -7,7 +7,7 @@ from cli import interface from core import utils -from core.exceptions import InvalidSastTools, PatternInvalid +from core.exceptions import InvalidSastTools from core.errors import invalidSastTools from core.pattern import Pattern @@ -214,12 +214,6 @@ def add_command_subparser(self, subparser): required=True, help="Path to discovery target folder" ) - discovery_parser.add_argument( - "-c", "--cpg", - dest="cpg_existing", - type=str, - help="Specify an already existing CPG in TARGET_DIR instead of letting the framework generate a new one." - ) discovery_parser.add_argument( "-i", "--ignore-measurements", action="store_true", @@ -261,7 +255,6 @@ def execute_command(self, args): tp_lib_path: str = parse_tp_lib(args.tp_lib) target_dir = Path(args.target_discovery) utils.check_target_dir(target_dir) - cpg_name: str = args.cpg_existing output_dir: str = parse_output_dir(args.output_dir) tool_parsed: list[Dict] = parse_tool_list(args.tools) l_pattern_id = parse_patterns(args.all_patterns, args.pattern_range, args.patterns, @@ -269,7 +262,7 @@ def execute_command(self, args): language) try: interface.run_discovery_for_pattern_list(target_dir, l_pattern_id, language, tool_parsed, tp_lib_path, - output_dir=output_dir, ignore=args.ignore, cpg=cpg_name) + output_dir=output_dir, ignore=args.ignore) except InvalidSastTools: print(invalidSastTools()) exit(1) @@ -594,6 +587,7 @@ def execute_command(self, args): tp_lib_path, language, init_patterns=False)) output_dir: Path = parse_dir_or_file(args.output_dir) measurement_results: Path = parse_dir_or_file(args.measurement_dir, config.MEASUREMENT_REL_DIR, "Measurement directory") + print('\033[92m', measurement_results, '\033[0m') checkdiscoveryrules_results: Path = parse_dir_or_file(args.checkdiscoveryrules_file, "checkdiscoveryrules.csv", "Checkdiscoveryrules csv file") masking_file: Path or None = parse_dir_or_file(args.masking_file) if args.masking_file else None interface.repair_patterns(language=language, pattern_ids=l_pattern_id, @@ -601,6 +595,7 @@ def execute_command(self, args): measurement_results=measurement_results, checkdiscoveryrule_results=checkdiscoveryrules_results, output_dir=output_dir, tp_lib_path=tp_lib_path) + # class Template(Command): # # # overriding abstract method diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index aa6990a..62234ba 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -18,7 +18,7 @@ CPGLanguageNotSupported, DiscoveryRuleError, DiscoveryRuleParsingResultError, InvalidSastTools from core.measurement import Measurement -from core.instance import Instance #, instance_from_dict, load_instance_from_metadata +from core.instance import Instance from core.pattern import Pattern # mand_finding_joern_keys = ["filename", "methodFullName", "lineNumber"] @@ -206,11 +206,8 @@ def run_and_process_discovery_rule(cpg: Path, discovery_rule: Path, f"No discovery method has been specified. Likely you need to modify the discovery->method property in the JSON file of the pattern instance related to the discovery rule {discovery_rule}. We will continue with the default discovery method for Scala discovery rules (aka '{default_discovery_method}').") discovery_method = default_discovery_method if discovery_method == "joern": - try: - cpg_file_name, query_name, raw_findings = run_joern_discovery_rule(cpg, discovery_rule) - findings = process_joern_discovery_rule_findings(discovery_rule, query_name, raw_findings) - except: - findings = None + cpg_file_name, query_name, raw_findings = run_joern_discovery_rule(cpg, discovery_rule) + findings = process_joern_discovery_rule_findings(discovery_rule, query_name, raw_findings) return findings else: e = DiscoveryMethodNotSupported(discovery_method=discovery_method) @@ -249,19 +246,14 @@ def discovery(src_dir: Path, l_tp_id: list[int], tp_lib_path: Path, itools: list build_name: str, disc_output_dir: Path, timeout_sec: int = 0, - ignore=False, - cpg: str = None) -> Dict: + ignore=False) -> Dict: logger.info("Discovery for patterns started...") # TODO: to support multiple discovery methods the following would need major refactoring. # - CPG is specific to Joern # - each discovery rule tells which method to use # - on the other hand you do not want to compute the CPG multiple times - # if a CPG name is specified, expect it in TARGET_DIR. Else, generate new CPG from source - if cpg is not None: - cpg: Path = src_dir / cpg - else: - cpg: Path = generate_cpg(src_dir, language, build_name, disc_output_dir, timeout_sec=timeout_sec) + cpg: Path = generate_cpg(src_dir, language, build_name, disc_output_dir, timeout_sec=timeout_sec) if not ignore: return discovery_under_measurement(cpg, l_tp_id, tp_lib_path, itools, language, build_name, disc_output_dir, timeout_sec=timeout_sec) @@ -413,10 +405,6 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di if not measurement_stop and tpi_instance.discovery_rule: # prepare and execute the discovery rule (if not done yet) dr = (tpi_json_path.parent / tpi_instance.discovery_rule).resolve() - if not dr.exists(): - d_tpi_discovery["rule_path"] = str(dr) - logger.exception("Scala rule for {} does not exist".format(dr)) - return d_tpi_discovery logger.info( f"{msgpre}prepare discovery rule {dr}...") @@ -429,7 +417,6 @@ def discovery_for_tpi(tpi_instance: Instance, tpi_json_path: Path, cpg: Path, di f"{msgpre}running discovery rule...") # related to #42 pdr = patch_PHP_discovery_rule(dr, tpi_instance.language, output_dir=disc_output_dir) - findings = [] try: findings = run_and_process_discovery_rule(cpg, pdr, discovery_method=d_tpi_discovery["method"]) d_tpi_discovery["results"] = findings @@ -463,7 +450,6 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di "method", "queryFile", "queryHash", "queryName", "queryAccuracy", "queryAlreadyExecuted", "discovery", "filename", "lineNumber", "methodFullName"] rows = [] - findings = [] for tp_id in d_res: if d_res[tp_id]["measurement_found"] is False: rows.append( @@ -533,7 +519,6 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di row["discovery"] = f["discovery"] row["queryName"] = f["queryName"] if f["discovery"]: - findings.append(f) row["filename"] = f["filename"] row["lineNumber"] = f["lineNumber"] row["methodFullName"] = f["methodFullName"] @@ -543,9 +528,6 @@ def post_process_and_export_results(d_res: dict, build_name: str, disc_output_di pass ofile = disc_output_dir / f"discovery_{build_name}.csv" utils.write_csv_file(ofile, fields, rows) - findings_file = disc_output_dir / f"findings_{build_name}.json" - with open(findings_file, 'w+') as f: - json.dump(findings, f, sort_keys=True, indent=4) d_results = { "discovery_result_file": str(ofile), "results": d_res diff --git a/tp_framework/core/errors.py b/tp_framework/core/errors.py index 792930e..bedfdf8 100644 --- a/tp_framework/core/errors.py +++ b/tp_framework/core/errors.py @@ -2,10 +2,6 @@ def patternDoesNotExists(pattern_id): return f"Specified Pattern `{pattern_id}` does not exists." -def patternValueError(): # TODO: can we get rid of that? - return f"Error during Pattern initialization." - - def patternInvalidError(e): return f"{e} Pattern is invalid." @@ -25,12 +21,10 @@ def instanceDoesNotExists(instance_id=None, ref_metadata=None): message = f"Specified Pattern Instance at `{ref_metadata}` does not exists." return message + def instanceInvalidError(e): return f"{e} Instance is invalid." -def patternFolderNotFound(pattern_dir_path): - return f"`Pattern source folder {pattern_dir_path}` not found or is not a folder." - def patternDefaultJSONNotFound(default_pattern_json): return f"`{default_pattern_json}` not found in pattern folder. Please specify explicitly a file containing the pattern metadata." @@ -96,10 +90,6 @@ def measurementResultsDirDoesNotExist(): return "The directory with the measurements does not exist." -def fileDoesNotExist(): - return "The file you provided for does not exist or is the wrong file type." - - def templateDirDoesNotExist(not_exisitng_dir_or_file): return f"Your tplib does not have {not_exisitng_dir_or_file}." diff --git a/tp_framework/core/exceptions.py b/tp_framework/core/exceptions.py index 23011c4..c5af8f3 100644 --- a/tp_framework/core/exceptions.py +++ b/tp_framework/core/exceptions.py @@ -39,12 +39,6 @@ def __init__(self, message: str) -> None: super().__init__(self.message) -class PatternFolderNotFound(Exception): - def __init__(self): - self.message = errors.patternFolderNotFound() - super().__init__(self.message) - - class MeasurementNotFound(Exception): def __init__(self, pattern_id): self.pattern_id = pattern_id @@ -108,15 +102,6 @@ def __init__(self, message=None, discovery_method=None): super().__init__(self.message) -class PatternValueError(Exception): # TODO: can we get rid of this? - def __init__(self, message=None): - if message: - self.message = message - else: - self.message = errors.patternValueError() - super().__init__(self.message) - - class CPGGenerationError(Exception): def __init__(self, message=errors.cpgGenerationError()): self.message = message @@ -160,12 +145,6 @@ def __init__(self, message) -> None: super().__init__(self.message) -class FileDoesNotExist(Exception): - def __init__(self, message=errors.fileDoesNotExist()): - self.message = message - super().__init__(self.message) - - class TemplateDoesNotExist(Exception): def __init__(self, message=errors.templateDirDoesNotExist('template')) -> None: self.message = message diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index 9f5cc65..cd3336f 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -1,11 +1,12 @@ import shutil from pathlib import Path -from typing import Tuple#, Enum +from typing import Tuple from core import utils from core.exceptions import InstanceInvalid from core.instance_repair import InstanceRepair + # class PatternCategory(str, Enum): # S0 = "S0" # D1 = "D1" diff --git a/tp_framework/core/instance_repair.py b/tp_framework/core/instance_repair.py index 79dc79b..70951d0 100644 --- a/tp_framework/core/instance_repair.py +++ b/tp_framework/core/instance_repair.py @@ -46,7 +46,6 @@ def _adjust_variable_number_in_discovery_rule(self) -> None: # determine the name for the rule in scala file # if there is more than one instance, it should be _i # if this rule is for multiple patterns, it should be _iall - #assert False, f"{len(self.pattern.instances)}\n{dr_path.parent}\n{self.pattern.path}" rule_name = ( f'{self.pattern.path.name}_i{self.to_repair.instance_id}' if len(self.pattern.instances) > 1 and dr_path.parent != self.pattern.path diff --git a/tp_framework/core/measure.py b/tp_framework/core/measure.py index 2f7f9fb..b7ebf65 100644 --- a/tp_framework/core/measure.py +++ b/tp_framework/core/measure.py @@ -120,5 +120,3 @@ async def measure_list_patterns(l_tp_id: list[int], language: str, } logger.info(f"SAST measurement - done") return d_results - - diff --git a/tp_framework/core/measurement.py b/tp_framework/core/measurement.py index b1fbaf2..4c142fa 100644 --- a/tp_framework/core/measurement.py +++ b/tp_framework/core/measurement.py @@ -127,12 +127,6 @@ def load_last_measurement_for_tool(tool: Dict, language: str, tp_lib: Path, patt # the strict notation pattern_dir_name: str = pattern.path.name instance_dir_name: str = instance.path.name - # TODO: continue here - # instance_dir: Path = pattern_dir / instance_dir_name - # if not instance_dir.is_dir(): - # ee = InstanceDoesNotExists(instance_id=pi_id) - # logger.exception(ee) - # raise ee measurement_dir_for_pattern_instance: Path = utils.get_measurement_dir_for_language(tp_lib, language) / pattern_dir_name / instance_dir_name if not measurement_dir_for_pattern_instance.is_dir(): ee = MeasurementNotFound(pattern.pattern_id) @@ -173,4 +167,4 @@ def any_tool_matching(meas, tools, version=config.discovery_under_measurement["e if not version: return any(meas.tool == tool["name"] for tool in tools) else: - return any(meas.tool == tool["name"] and meas.version == tool["version"] for tool in tools) \ No newline at end of file + return any(meas.tool == tool["name"] and meas.version == tool["version"] for tool in tools) diff --git a/tp_framework/core/modelling_rules.py b/tp_framework/core/modelling_rules.py index ca3c5a7..c8afbe4 100644 --- a/tp_framework/core/modelling_rules.py +++ b/tp_framework/core/modelling_rules.py @@ -24,4 +24,4 @@ async def scan(src_dir: Path, tools: list[Dict], language: str, modelling_rules: apply_remediation=True, modelling_rules=modelling_rules) results.append({f"{tool['name']}:{tool['version']}": sast.inspector(res, language)}) - return results, tools \ No newline at end of file + return results, tools diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index c4e0fd4..f1798a7 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -1,22 +1,16 @@ -# import json -import shutil -from os import listdir from pathlib import Path +from typing import Tuple from core.exceptions import PatternInvalid, AddPatternError, InstanceDoesNotExists from core.instance import Instance -from core.pattern_repair import PatternRepair -from tp_framework.core.readme_generator import READMEGenerator -from core import utils -# from core.exceptions import LanguageTPLibDoesNotExist, PatternDoesNotExists, PatternValueError -from typing import Tuple +from core import utils +from core.pattern_repair import PatternRepair import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) - class Pattern: @classmethod def init_from_id_and_language(cls, id: int, language: str, tp_lib_path: Path): @@ -29,8 +23,8 @@ def init_from_json_file_without_pattern_id(cls, json_file_path: Path, language: def __init__(self) -> None: # metadata self.pattern_id = None - self.language = None # TODO: needed? - self.tp_lib_path = None # TODO needed? + self.language = None + self.tp_lib_path = None self.language = None self.path = None self.json_path = None @@ -157,8 +151,13 @@ def repair(self, should_include_readme: bool, masking_file: Path = None,): PatternRepair(self).repair(self) if should_include_readme: - # TODO: build README - READMEGenerator().generate_README() + from core.readme_generator import READMEGenerator + readme = READMEGenerator(pattern=self, discovery_rule_results=discovery_rule_results, + measurement_results=measurement_results, + masking_file=masking_file).generate_README() + path_to_readme = self.path / "README.md" + with open(path_to_readme, "w") as readme_file: + readme_file.write(readme) def to_dict(self): return { diff --git a/tp_framework/core/pattern_repair.py b/tp_framework/core/pattern_repair.py index a792570..8918a6a 100644 --- a/tp_framework/core/pattern_repair.py +++ b/tp_framework/core/pattern_repair.py @@ -1,12 +1,8 @@ -from pathlib import Path -from copy import deepcopy - import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -from core.exceptions import InstanceInvalid from core.instance import Instance from core.repair_tool import RepairTool from core import utils diff --git a/tp_framework/core/readme_generator.py b/tp_framework/core/readme_generator.py index a70d5a8..67a84a8 100644 --- a/tp_framework/core/readme_generator.py +++ b/tp_framework/core/readme_generator.py @@ -5,9 +5,7 @@ from pathlib import Path from core.exceptions import MeasurementInvalid -# from core.instance import Instance from core.measurement import Measurement -# from core.pattern import Pattern from core.readme_markdown_elements import * from core import utils @@ -253,6 +251,9 @@ def _measurement(self) -> list: if not self.measurements: return [] instance_measurements = self.measurements / self.pattern.path.name / self.current_instance.name + if not instance_measurements.exists(): + logger.error(f"{self.log_prefix}Could not find `measurement` for {self.current_instance}") + return [] measurement_table = {} has_measurement = False dates = [] @@ -326,8 +327,11 @@ def _remediation(self) -> list: return [] def _get_file_content_if_exists(self, path_to_file: Path) -> str: - if path_to_file and Path(path_to_file).is_file(): - with open(path_to_file, "r") as in_file: + if not path_to_file: + return "" + potential_file_path = Path(self.current_instance.path / path_to_file).resolve() + if " " not in str(path_to_file) and potential_file_path.is_file(): + with open(potential_file_path, "r") as in_file: return "".join(in_file.readlines()).strip() return path_to_file if path_to_file else "" diff --git a/tp_framework/core/sast_job_runner.py b/tp_framework/core/sast_job_runner.py index 24d6fb6..50fa758 100644 --- a/tp_framework/core/sast_job_runner.py +++ b/tp_framework/core/sast_job_runner.py @@ -63,9 +63,6 @@ def set_extracted(self, value: bool=True): def set_measurement(self, meas: Measurement): self.measurement = meas - - def __str__(self) -> str: - return f'{self.tp_id} - {self.tpi_id}' def job_list_to_dict(l: list[SASTjob]) -> Dict: diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index a601bc5..efaa434 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -5,7 +5,6 @@ import os import yaml -from collections import defaultdict from datetime import datetime from importlib import import_module from pathlib import Path @@ -18,8 +17,8 @@ import config from core.exceptions import PatternDoesNotExists, LanguageTPLibDoesNotExist, TPLibDoesNotExist, InvalidSastTools, \ - DiscoveryMethodNotSupported, TargetDirDoesNotExist, InvalidSastTool, PatternFolderNotFound, InstanceDoesNotExists, \ - FileDoesNotExist, TemplateDoesNotExist, MeasurementResultsDoNotExist + DiscoveryMethodNotSupported, TargetDirDoesNotExist, InvalidSastTool, InstanceDoesNotExists, \ + MeasurementResultsDoNotExist from core import errors @@ -168,7 +167,7 @@ def load_sast_specific_config(tool_name: str, tool_version: str) -> Dict: def check_file_exist(file_path: Path, file_suffix = ".csv"): if not file_path.is_file() or not file_path.suffix == file_suffix: - e = FileDoesNotExist(file_path) + e = FileNotFoundError(file_path) logger.error(get_exception_message(e)) raise e @@ -212,7 +211,7 @@ def read_csv_to_dict(path_to_file: str) -> dict: def translate_bool(bool_to_translate: bool): - return "YES" if bool_to_translate else "NO" + return "yes" if bool_to_translate else "no" # TODO TESTING def get_language_by_file_ending(filename: str) -> str: From 7c16259c2044447a8a0db086d5d8ab2b6a343566 Mon Sep 17 00:00:00 2001 From: felix-20 Date: Tue, 4 Jul 2023 14:37:19 +0200 Subject: [PATCH 11/16] added validating against json schema --- requirements-dev.txt | Bin 138 -> 162 bytes requirements.txt | Bin 68 -> 90 bytes tp_framework/cli/interface.py | 1 - tp_framework/core/instance_repair.py | 7 ++++--- tp_framework/core/pattern_repair.py | 4 +++- tp_framework/core/repair_tool.py | 28 +++++++++++++++++++++------ 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 9be8177f418d28a4968bc75e324d88234a09d1b4..58498d34f753a7e17c776a93a8088fbd58d56e96 100644 GIT binary patch delta 31 lcmeBTT*Nq`OM;hyiy@1lm?57b4@f36WH6*M diff --git a/requirements.txt b/requirements.txt index 948a7b0cde1b67905584687b540006ba990bccb0..240f6876d805d2c350ad778f30a86a16d6660125 100644 GIT binary patch delta 27 gcmZ>Xn&2YF#gN5N%#hEJ2PBgjG8j@Bav2gC09IKAz5oCK delta 4 Lcma!YncxBd1DpYK diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index 8d469eb..c493dac 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -181,7 +181,6 @@ def repair_patterns(language: str, pattern_ids: list, measurement_results: Path, checkdiscoveryrule_results: Path, output_dir: Path, tp_lib_path: Path): print("Pattern Repair started...") - print(measurement_results) should_include_readme = not include_README utils.check_tp_lib(tp_lib_path) if should_include_readme: diff --git a/tp_framework/core/instance_repair.py b/tp_framework/core/instance_repair.py index 70951d0..1842e9d 100644 --- a/tp_framework/core/instance_repair.py +++ b/tp_framework/core/instance_repair.py @@ -16,7 +16,8 @@ class InstanceRepair(RepairTool): def __init__(self, instance, pattern: Path) -> None: self.pattern = pattern template = instance.tp_lib_path / "pattern_template" / "ID_pattern_name" / "IID_instance_ID_pattern_name" / "IID_instance_ID_pattern_name.json" - super().__init__(instance, template) + schema = instance.tp_lib_path / "pattern_template" / "schema" / "instance.schema.json" + super().__init__(instance, template, schema) try: self.instance_repair_class = globals()[f"InstanceRepair{self.to_repair.language}"] except KeyError: @@ -101,8 +102,8 @@ def repair(self): if self.to_repair.expectation_expectation == self.to_repair.properties_negative_test_case: logger.warning(f"{self._log_prefix()}Changing properites_negative_test_case, it has to be `not` expectation_expectation") self.to_repair.properties_negative_test_case = not self.to_repair.expectation_expectation - # check other JSON fields - # TODO: check if + # validate the instance json against the scheme + self._validate_against_schema() self.to_json() diff --git a/tp_framework/core/pattern_repair.py b/tp_framework/core/pattern_repair.py index 8918a6a..f3b1708 100644 --- a/tp_framework/core/pattern_repair.py +++ b/tp_framework/core/pattern_repair.py @@ -10,7 +10,8 @@ class PatternRepair(RepairTool): def __init__(self, pattern) -> None: json_template = pattern.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" - super().__init__(pattern, json_template) + schema = pattern.tp_lib_path / "pattern_template" / "schema" / "pattern.schema.json" + super().__init__(pattern, json_template, schema) def _complete_instances(self): # list pattern directory and try to find all instances @@ -86,5 +87,6 @@ def repair(self, pattern): self._repair_description() self._repair_tags() + self._validate_against_schema() # write to json self.to_json() diff --git a/tp_framework/core/repair_tool.py b/tp_framework/core/repair_tool.py index 63039c1..ba01e18 100644 --- a/tp_framework/core/repair_tool.py +++ b/tp_framework/core/repair_tool.py @@ -1,6 +1,8 @@ import shutil from pathlib import Path from copy import deepcopy +from jsonschema import validate + from core.exceptions import PatternRepairError from core import utils @@ -10,17 +12,24 @@ logger = logging.getLogger(loggermgr.logger_name(__name__)) class RepairTool: - def __init__(self, to_repair, template_json_file: Path) -> None: + def __init__(self, to_repair, template_json_file: Path, schema_file: Path) -> None: self.to_repair = to_repair - self.json_template = template_json_file# to_repair.tp_lib_path / "pattern_template" / "ID_pattern_name" / "ID_pattern_name.json" + self.json_template = template_json_file + self.schema_dict = schema_file if not self.json_template.is_file(): - raise PatternRepairError(f"{self._log_prefix()} No template JSON found in {self.json_template}") + raise PatternRepairError(f"{self._log_prefix()}No template JSON found in {self.json_template}") + if not schema_file.is_file(): + raise PatternRepairError(f"{self._log_prefix()}No schema JSON found in {schema_file}") try: self.template_dict = utils.read_json(self.json_template) except Exception: - raise PatternRepairError(f"{self._log_prefix()} The template JSON file {self.json_template} is corrupt, please check") + raise PatternRepairError(f"{self._log_prefix()}The template JSON file {self.json_template} is corrupt, please check") if not self.template_dict: - raise PatternRepairError(f"{self._log_prefix()} The template JSON {self.json_template} is empty") + raise PatternRepairError(f"{self._log_prefix()}The template JSON {self.json_template} is empty") + try: + self.schema_dict = utils.read_json(schema_file) + except Exception: + raise PatternRepairError(f"{self._log_prefix()}The schema JSON file {schema_file} is corrupt, please check") def _log_prefix(self): return f"PatternRepair ({self.to_repair}) " @@ -74,13 +83,20 @@ def _check_paths_exists(self): logger.warning(f"{self._log_prefix()}Could not find path {v}") setattr(self.to_repair, k, None) - + def _validate_against_schema(self): + repaired_dict = self.to_repair.to_dict() + try: + validate(instance=repaired_dict, schema=self.schema_dict) + except Exception as e: + msg = utils.get_exception_message(e) + logger.error(f"{self._log_prefix()}Validating against schema failed: {msg}") def repair(self): raise NotImplementedError() def to_json(self): repaired_dict = self.to_repair.to_dict() + original_dict = utils.read_json(self.to_repair.json_path) if repaired_dict != original_dict: utils.write_json(self.to_repair.json_path, repaired_dict) \ No newline at end of file From 0d137915cc9b4a9c81f7f8ae178b10d7a3a0c208 Mon Sep 17 00:00:00 2001 From: felix-20 Date: Wed, 5 Jul 2023 15:57:17 +0200 Subject: [PATCH 12/16] fixed tests --- qualitytests/core/test_instance_repair.py | 8 ++--- qualitytests/core/test_pattern_repair.py | 4 +-- qualitytests/core/test_repair_tool.py | 36 +++++++++++++++-------- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/qualitytests/core/test_instance_repair.py b/qualitytests/core/test_instance_repair.py index 85507be..f601506 100644 --- a/qualitytests/core/test_instance_repair.py +++ b/qualitytests/core/test_instance_repair.py @@ -57,8 +57,8 @@ def _get_instance_repair(self) -> InstanceRepair: repair_tool = InstanceRepair(test_instance, test_pattern) global_mock.assert_called_once() - read_json_mock.assert_called_once() - is_file_mock.assert_called_once() + read_json_mock.assert_called() + is_file_mock.assert_called() return repair_tool def test_init_instance_repair_with_wrong_language(self): @@ -72,8 +72,8 @@ def test_init_instance_repair_with_wrong_language(self): is_file_mock.return_value = True InstanceRepair(test_instance, test_pattern) - is_file_mock.assert_called_once() - read_json_mock.assert_called_once() + is_file_mock.assert_called() + read_json_mock.assert_called() logger_error.assert_called_once_with("InstanceRepairTEST could not be found, maybe it is not imported?") assert "Could not instantiate language specific instance repair" in str(e_info) diff --git a/qualitytests/core/test_pattern_repair.py b/qualitytests/core/test_pattern_repair.py index 9894ff0..aada9f5 100644 --- a/qualitytests/core/test_pattern_repair.py +++ b/qualitytests/core/test_pattern_repair.py @@ -24,8 +24,8 @@ def _get_pattern_repair(self) -> PatternRepair: repair_tool = PatternRepair(test_pattern) - read_json_mock.assert_called_once() - is_file_mock.assert_called_once() + read_json_mock.assert_called() + is_file_mock.assert_called() return repair_tool def test_complete_instances_no_new_instance0(self): diff --git a/qualitytests/core/test_repair_tool.py b/qualitytests/core/test_repair_tool.py index 8eb11fc..6b69a71 100644 --- a/qualitytests/core/test_repair_tool.py +++ b/qualitytests/core/test_repair_tool.py @@ -22,15 +22,25 @@ class TestRepairTool: "version": "v0.draft" } - def test_init_pattern_repair1(self): + def test_init_pattern_repair0(self): with patch("pathlib.Path.is_file") as is_file_mock, \ pytest.raises(PatternRepairError) as e_info: is_file_mock.return_value = False - RepairTool(TestRepairTool.pattern, Path(".")) + RepairTool(TestRepairTool.pattern, Path("."), Path(".")) is_file_mock.assert_called_once() # logger.assert_called_once() - assert "PatternRepair (JS - p1) No template JSON found in" in str(e_info) + assert "PatternRepair (JS - p1) No template JSON found in " in str(e_info) + + def test_init_pattern_repair1(self): + with patch("pathlib.Path.is_file") as is_file_mock, \ + pytest.raises(PatternRepairError) as e_info: + is_file_mock.side_effect = [True, False] + + RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + is_file_mock.assert_called() + # logger.assert_called_once() + assert "PatternRepair (JS - p1) No schema JSON found in " in str(e_info) def test_init_pattern_repair2(self): with patch("pathlib.Path.is_file") as is_file_mock, \ @@ -39,10 +49,10 @@ def test_init_pattern_repair2(self): is_file_mock.return_value = True read_json_mock.return_value = {} - RepairTool(TestRepairTool.pattern, Path(".")) - is_file_mock.assert_called_once() + RepairTool(TestRepairTool.pattern, Path("."), Path(".")) + is_file_mock.assert_called() read_json_mock.assert_called_once() - assert "PatternRepair (JS - p1) The template JSON" in str(e_info) and " is empty" in str(e_info) + assert "PatternRepair (JS - p1) The template JSON" in str(e_info) and " is empty" in str(e_info) def test_copy_template(self): with patch("pathlib.Path.is_file") as is_file_mock, \ @@ -53,7 +63,7 @@ def test_copy_template(self): is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict - RepairTool(TestRepairTool.pattern, Path("."))._copy_template() + RepairTool(TestRepairTool.pattern, Path("."), Path("."))._copy_template() logger.assert_called_once_with("PatternRepair (JS - p1) Copying template JSON.") copy_file_mock.assert_called_once() @@ -83,7 +93,7 @@ def test_ensure_json_file_exists(self, is_file_mock_ret: bool, is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict - repair_tool = RepairTool(TestRepairTool.pattern, Path(".")) + repair_tool = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) json_path = get_pattern_json_ret if get_pattern_json_ret else repair_tool.to_repair.json_path is_file_mock.reset_mock() is_file_mock.return_value = is_file_mock_ret @@ -117,7 +127,7 @@ def test_to_json1(self): is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict - repair_tool = RepairTool(TestRepairTool.pattern, Path(".")) + repair_tool = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) read_json_mock.reset_mock() read_json_mock.return_value = {} @@ -136,7 +146,7 @@ def test_to_json2(self): is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict - repair_tool = RepairTool(TestRepairTool.pattern, Path(".")) + repair_tool = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) read_json_mock.reset_mock() read_json_mock.return_value = {"name": "test"} @@ -154,7 +164,7 @@ def test_check_paths_pattern_exist_all_correct(self): is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict - repair_tool_pattern = RepairTool(TestRepairTool.pattern, Path(".")) + repair_tool_pattern = RepairTool(TestRepairTool.pattern, Path("."), Path(".")) repair_tool_pattern._check_paths_exists() warn_logger_mock.assert_not_called() @@ -168,7 +178,7 @@ def check_path_instance_exist_all_correct(self): is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict - repair_tool_instance = RepairTool(test_instance, Path(".")) + repair_tool_instance = RepairTool(test_instance, Path("."), Path(".")) repair_tool_instance._check_paths_exists() warn_logger_mock.assert_not_called() @@ -184,7 +194,7 @@ def check_path_instance_exist_non_correct(self): read_json_mock.return_value = TestRepairTool.template_json_dict exist_mock.return_value = False - repair_tool_instance = RepairTool(test_instance, Path(".")) + repair_tool_instance = RepairTool(test_instance, Path("."), Path(".")) repair_tool_instance._check_paths_exists() warn_logger_mock.assert_called() From 4968ea3262c15a9b5693f85afd1d968f9479d373 Mon Sep 17 00:00:00 2001 From: felix-20 Date: Mon, 21 Aug 2023 11:25:18 +0200 Subject: [PATCH 13/16] implemented review feedback --- qualitytests/cli/test_main.py | 8 ++++++-- qualitytests/core/test_instance.py | 4 ++-- qualitytests/core/test_pattern.py | 12 ++++++------ qualitytests/qualitytests_utils.py | 16 ++++++++-------- tp_framework/cli/interface.py | 22 +++++++++++----------- tp_framework/cli/tpf_commands.py | 12 ++++++------ tp_framework/core/discovery.py | 27 +++++++++++++-------------- tp_framework/core/instance.py | 1 - tp_framework/core/utils.py | 1 - 9 files changed, 52 insertions(+), 51 deletions(-) diff --git a/qualitytests/cli/test_main.py b/qualitytests/cli/test_main.py index 9829f52..2bcaf69 100644 --- a/qualitytests/cli/test_main.py +++ b/qualitytests/cli/test_main.py @@ -189,11 +189,13 @@ def test_cli_report_4(self, tmp_path, mocker): def test_cli_report_5(self, tmp_path, mocker): self._init_cli_report(mocker) # Test: valid params, no tools i.e., get all measurements + test_tp_lib_path = join_resources_path("sample_patlib") main.main(['sastreport', '--export', 'whatever.csv', '-a', '-l', self.test_lang, - '--output-dir', str(tmp_path) + '--output-dir', str(tmp_path), + '--tp-lib', str(test_tp_lib_path) # '--output-dir', str(tmp_path), # '--only-last-measurement' ]) @@ -207,9 +209,11 @@ def _init_cli_check_discovery_rules_1(self, mocker): def test_cli_check_discovery_rules_1(self, tmp_path, mocker): self._init_cli_check_discovery_rules_1(mocker) # Test: valid params + test_tp_lib_path = join_resources_path("sample_patlib") main.main(['checkdiscoveryrules', '--export', 'whatever.csv', '-a', '-l', self.test_lang, - '--output-dir', str(tmp_path) + '--output-dir', str(tmp_path), + '--tp-lib', str(test_tp_lib_path) ]) \ No newline at end of file diff --git a/qualitytests/core/test_instance.py b/qualitytests/core/test_instance.py index 86b7fdc..7b208ec 100644 --- a/qualitytests/core/test_instance.py +++ b/qualitytests/core/test_instance.py @@ -5,7 +5,7 @@ from core.instance import Instance from core.exceptions import InstanceInvalid -from qualitytests.qualitytests_utils import join_resources_path, create_instance, example_instance_dict +from qualitytests.qualitytests_utils import join_resources_path, create_instance, example_tpi_dict class TestInstance: @@ -38,7 +38,7 @@ def test_init_valid_instance_from_json_path(self): patch("pathlib.Path.is_dir") as is_dir_mock: is_file_mock.return_value = True - read_json_mock.return_value = example_instance_dict + read_json_mock.return_value = example_tpi_dict test_instance = Instance.init_from_json_path(Path("/1_instance_test_pattern/1_instance_test_pattern.json"), 1, "js", TestInstance.sample_tp_lib) read_json_mock.assert_called_once() diff --git a/qualitytests/core/test_pattern.py b/qualitytests/core/test_pattern.py index 0c5b55a..100fb0d 100644 --- a/qualitytests/core/test_pattern.py +++ b/qualitytests/core/test_pattern.py @@ -5,12 +5,12 @@ from core.pattern import Pattern from core.exceptions import PatternDoesNotExists, PatternInvalid, InstanceDoesNotExists -from qualitytests.qualitytests_utils import join_resources_path, create_pattern, example_pattern_dict +from qualitytests.qualitytests_utils import join_resources_path, create_pattern, example_tp_dict class TestPattern: sample_tp_lib: Path = join_resources_path("sample_patlib") - example_pattern_dict = { + example_tp_dict = { "name": "Test Pattern", "description": "./docs/description.md", "family": "test_pattern", @@ -27,8 +27,8 @@ class TestPattern: (3, "php", {"instances": ["test_instances_invalid_relative_path"]}, "Pattern 3 (PHP) - The instance path 'test_instances_invalid_relative_path' is not valid.") ] valid_patterns = [ - (1, "php", example_pattern_dict), - (1, "js", example_pattern_dict) + (1, "php", example_tp_dict), + (1, "js", example_tp_dict) ] valid_patterns_without_id = [ @@ -67,7 +67,7 @@ def test_init_from_json_file_without_pattern_id(self, path_to_json: Path, langua is_dir_mock.return_value = True is_file_mock.return_value = True isinstance_mock.return_value = True - read_json_mock.return_value = TestPattern.example_pattern_dict + read_json_mock.return_value = TestPattern.example_tp_dict pattern = Pattern.init_from_json_file_without_pattern_id(path_to_json, language, pattern_path, TestPattern.sample_tp_lib) read_json_mock.assert_called_once() is_file_mock.assert_called() @@ -128,7 +128,7 @@ def test_to_dict(self): rel_path_mock.return_value = None actual = test_pattern.to_dict() - expected = deepcopy(example_pattern_dict) + expected = deepcopy(example_tp_dict) expected["instances"] = [None] assert expected == actual diff --git a/qualitytests/qualitytests_utils.py b/qualitytests/qualitytests_utils.py index 8bea2b1..763d2c6 100644 --- a/qualitytests/qualitytests_utils.py +++ b/qualitytests/qualitytests_utils.py @@ -14,7 +14,7 @@ resource_path = "resources" cpg_binary_rel_path = "sample_joern/cpg_binary.bin" -example_instance_dict = { +example_tpi_dict = { "description": "Some description", "code": { "path": "", @@ -53,7 +53,7 @@ } } -example_pattern_dict = { +example_tp_dict = { "name": "Test Pattern", "description": "./docs/description.md", "family": "test_pattern", @@ -159,7 +159,7 @@ def create_instance(): patch("pathlib.Path.is_dir") as is_dir_mock: is_file_mock.return_value = True - # read_json_mock.return_value = example_instance_dict + # read_json_mock.return_value = example_tpi_dict json_path = sample_tp_lib / "JS" / "1_unset_element_array" / "1_instance_1_unset_element_array" / "1_instance_1_unset_element_array.json" test_instance = Instance.init_from_json_path(json_path, 1, "js", sample_tp_lib) @@ -176,7 +176,7 @@ def create_instance2(): patch("pathlib.Path.is_dir") as is_dir_mock: is_file_mock.return_value = True - # read_json_mock.return_value = example_instance_dict + # read_json_mock.return_value = example_tpi_dict json_path = sample_tp_lib / "JS" / "2_uri" / "1_instance_2_uri" / "1_instance_2_uri.json" test_instance = Instance.init_from_json_path(json_path, 1, "js", sample_tp_lib) @@ -193,7 +193,7 @@ def create_instance_php(): patch("pathlib.Path.is_dir") as is_dir_mock: is_file_mock.return_value = True - # read_json_mock.return_value = example_instance_dict + # read_json_mock.return_value = example_tpi_dict json_path = sample_tp_lib / "PHP" / "1_static_variables" / "1_instance_1_static_variables" / "1_instance_1_static_variables.json" test_instance = Instance.init_from_json_path(json_path, 1, "php", sample_tp_lib) @@ -215,13 +215,13 @@ def create_pattern(): is_dir_mock.return_value = True is_file_mock.return_value = True isinstance_mock.return_value = True - read_json_mock.return_value = example_pattern_dict + read_json_mock.return_value = example_tp_dict instance_init_mock.return_value = test_instance - test_pattern = Pattern.init_from_id_and_language(1, "JS", sample_tp_lib) + test_tpi = Pattern.init_from_id_and_language(1, "JS", sample_tp_lib) read_json_mock.assert_called_once() is_file_mock.assert_called() is_dir_mock.assert_called() isinstance_mock.assert_called() instance_init_mock.assert_called_once() - return test_pattern + return test_tpi diff --git a/tp_framework/cli/interface.py b/tp_framework/cli/interface.py index c2e5206..8bc3560 100644 --- a/tp_framework/cli/interface.py +++ b/tp_framework/cli/interface.py @@ -16,26 +16,26 @@ # CRUD patterns # TODO - add_pattern: develop UPDATE, DELETE, READ (maybe this one we do not need)... ## CREATE/ADD -def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict], pattern_json: str = None, +def add_pattern(tp_dir: str, language: str, measure: bool, tools: list[Dict], tp_json: str = None, tp_lib_path: Path = Path(config.DEFAULT_TP_LIBRARY_ROOT_DIR).resolve()): # TODO - add_pattern: add some printing message for the user - pattern_dir_path: Path = Path(pattern_dir).resolve() - if not pattern_dir_path.is_dir(): - print(errors.patternFolderNotFound(pattern_dir_path)) + tp_dir_path: Path = Path(tp_dir).resolve() + if not tp_dir_path.is_dir(): + print(errors.patternFolderNotFound(tp_dir_path)) return - pattern_json_path = Path(pattern_json) if pattern_json else utils.get_json_file(pattern_dir_path) - if not pattern_json_path: - print(errors.patternDefaultJSONNotFound(pattern_dir)) + tp_json_path = Path(tp_json) if tp_json else utils.get_json_file(tp_dir_path) + if not tp_json_path: + print(errors.patternDefaultJSONNotFound(tp_dir)) return tp_lib_path.mkdir(exist_ok=True, parents=True) try: - created_pattern: Pattern = pattern_operations.add_testability_pattern_to_lib_from_json( + created_tp: Pattern = pattern_operations.add_testability_pattern_to_lib_from_json( language, - pattern_json_path, - pattern_dir_path, + tp_json_path, + tp_dir_path, tp_lib_path ) except (PatternInvalid, AddPatternError) as e: @@ -47,7 +47,7 @@ def add_pattern(pattern_dir: str, language: str, measure: bool, tools: list[Dict raise if measure: - asyncio.run(measure_list_patterns([created_pattern.pattern_id], language, tools=tools, tp_lib_path=tp_lib_path)) + asyncio.run(measure_list_patterns([created_tp.pattern_id], language, tools=tools, tp_lib_path=tp_lib_path)) # Discovery diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index bd1b894..827f5db 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -592,11 +592,11 @@ def execute_command(self, args): tp_lib_path: str = parse_tp_lib(args.tp_lib) l_pattern_id = sorted(parse_patterns(args.all_patterns, args.pattern_range, args.patterns, tp_lib_path, language, init_patterns=False)) - output_dir: Path = parse_dir_or_file(args.output_dir) + output_dir: Path = parse_dir_or_file(args.output_dir, config.RESULT_DIR, "Output directory") measurement_results: Path = parse_dir_or_file(args.measurement_dir, config.MEASUREMENT_REL_DIR, "Measurement directory") print('\033[92m', measurement_results, '\033[0m') checkdiscoveryrules_results: Path = parse_dir_or_file(args.checkdiscoveryrules_file, "checkdiscoveryrules.csv", "Checkdiscoveryrules csv file") - masking_file: Path or None = parse_dir_or_file(args.masking_file) if args.masking_file else None + masking_file: Path or None = parse_dir_or_file(args.masking_file, "mask.json","Masking file") if args.masking_file else None interface.repair_patterns(language=language, pattern_ids=l_pattern_id, masking_file=masking_file, include_README=args.skip_readme, measurement_results=measurement_results, checkdiscoveryrule_results=checkdiscoveryrules_results, @@ -679,19 +679,19 @@ def parse_patterns(all_patterns: bool, pattern_range: str, patterns, tp_lib_path id_list = patterns # init a Pattern to make sure, all the patterns that should be used for the task are valid. # return only the pattern_id, to be compatible with current implementation - # TODO: refactor to use the Pattern instances instead of the ids + # Could refactor this to just use pattern and instance objects, main purpose is validation return sorted([Pattern.init_from_id_and_language(idx, language, tp_lib_path).pattern_id \ for idx in id_list]) if init_patterns else id_list def parse_dir_or_file(path_to_file_or_dir: str, - default_path: str = config.RESULT_DIR, - name: str = "Output directory") -> Path: + default_path: str, + exception_prefix: str = "") -> Path: if not path_to_file_or_dir: path_to_file_or_dir: str = str(default_path) try: path_to_file_or_dir_as_path: Path = Path(path_to_file_or_dir).resolve() return path_to_file_or_dir_as_path except Exception as e: - print(f"{name} is wrong: {path_to_file_or_dir}") + print(f"{exception_prefix} does not exist: {path_to_file_or_dir}") exit(1) diff --git a/tp_framework/core/discovery.py b/tp_framework/core/discovery.py index aa85e0a..0eb5ce3 100644 --- a/tp_framework/core/discovery.py +++ b/tp_framework/core/discovery.py @@ -13,7 +13,6 @@ import config from core import utils, measurement -from core.pattern import Pattern from core.exceptions import DiscoveryMethodNotSupported, MeasurementNotFound, CPGGenerationError, \ CPGLanguageNotSupported, DiscoveryRuleError, DiscoveryRuleParsingResultError, InvalidSastTools from core.measurement import Measurement @@ -155,7 +154,7 @@ def patch_PHP_discovery_rule(discovery_rule: Path, language: str, output_dir: Pa newlines = [] changed = False for l in lines: - newl = l.replace(t_str, p_str) if re.match('\s*val x\d+ = \(name, "[^"]+", cpg\.call.*(\.location\.toJson)\);\s*', l) else l + newl = l.replace(t_str, p_str) if re.match(r'\s*val x\d+ = \(name, "[^"]+", cpg\.call.*(\.location\.toJson)\);\s*', l) else l newlines.append(newl) if newl != l: changed = True @@ -342,9 +341,9 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito "jsonpath": tpi.json_path } continue - tpi_instance = meas_tpi_by_tools[0].instance + tpi = meas_tpi_by_tools[0].instance d_tpi = { - "instance": tpi_instance, + "instance": tpi, "measurement": "supported", "jsonpath": tpi.json_path, "discovery": {} @@ -363,7 +362,7 @@ def discovery_under_measurement(cpg: Path, l_tp_id: list[int], tp_lib: Path, ito d_tpi["measurement"] = "not_supported" # discovery per tpi measurement_stop: bool = d_tpi["measurement"] not in ["ignore", "not_supported"] - d_tpi["discovery"] = discovery_for_tpi(tpi_instance, tpi.json_path, cpg, disc_output_dir, + d_tpi["discovery"] = discovery_for_tpi(tpi, cpg, disc_output_dir, measurement_stop=measurement_stop, already_executed=d_dr_executed) d_res_tpi[tpi.instance_id] = d_tpi d_res[tp_id]["instances"] = d_res_tpi @@ -745,18 +744,18 @@ def check_discovery_rules(language: str, l_tp_id: list[int], (i + 1, num_patterns, tp_id) # tp_info )) try: - target_pattern = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) - num_instances = len(target_pattern.instances) + target_tp = Pattern.init_from_id_and_language(tp_id, language, tp_lib_path) + num_instances = len(target_tp.instances) except Exception as e: # should not happen at all! And should be removed and a list of patterns should be parsed to that function logger.warning( f"Either pattern id {tp_id} does not exist, or its file system structure is not valid, or its instances cannot be fetched. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(pattern=target_pattern) + res = get_check_discovery_rule_result(pattern=target_tp) results.append(res) err += 1 continue instance: Instance - for j, instance in enumerate(target_pattern.instances): + for j, instance in enumerate(target_tp.instances): try: tpi_id = instance.instance_id logger.info(utils.get_tpi_op_status_string( @@ -769,7 +768,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], if not dr_path.is_file(): logger.warning( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule {dr_path} does not exist") - res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance) + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance) results.append(res) err += 1 continue @@ -783,16 +782,16 @@ def check_discovery_rules(language: str, l_tp_id: list[int], # Inspect the d_results if d_results["findings"] and any( f["result"] == discovery_result_strings["discovery"] for f in d_results["findings"]): - res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance, successful="yes") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance, successful="yes") success += 1 else: - res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance, successful="no") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance, successful="no") unsuccess += 1 results.append(res) else: logger.info( f"Instance {tpi_id} of pattern {tp_id}: the discovery rule is not provided for the pattern") - res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance, successful="missing") + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance, successful="missing") results.append(res) missing += 1 logger.info(utils.get_tpi_op_status_string( @@ -803,7 +802,7 @@ def check_discovery_rules(language: str, l_tp_id: list[int], except Exception as e: logger.warning( f"Something went wrong for the instance at {instance.path} of the pattern id {tp_id}. Exception raised: {utils.get_exception_message(e)}") - res = get_check_discovery_rule_result(pattern=target_pattern, instance=instance) + res = get_check_discovery_rule_result(pattern=target_tp, instance=instance) results.append(res) err += 1 continue diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index cd3336f..c454f77 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -34,7 +34,6 @@ def __init__(self) -> None: self.pattern_id = None self.language = None self.name = None - self.pattern = None self.tp_lib_path = None # JSON fields diff --git a/tp_framework/core/utils.py b/tp_framework/core/utils.py index 160d378..dc004bf 100644 --- a/tp_framework/core/utils.py +++ b/tp_framework/core/utils.py @@ -237,7 +237,6 @@ def get_language_by_file_ending(filename: str) -> str: # Useful for some SAST tools that accepts a zip file of the source code to scan -# Where is it used in the code? def zipdir(path, ziph): for root, dirs, files in os.walk(path): for file in files: From bfb6e80713341ddeef31573267a5f04577764e41 Mon Sep 17 00:00:00 2001 From: felix-20 Date: Mon, 21 Aug 2023 14:02:01 +0200 Subject: [PATCH 14/16] remove debug print statement --- tp_framework/cli/tpf_commands.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tp_framework/cli/tpf_commands.py b/tp_framework/cli/tpf_commands.py index 827f5db..e1674f9 100644 --- a/tp_framework/cli/tpf_commands.py +++ b/tp_framework/cli/tpf_commands.py @@ -594,7 +594,6 @@ def execute_command(self, args): tp_lib_path, language, init_patterns=False)) output_dir: Path = parse_dir_or_file(args.output_dir, config.RESULT_DIR, "Output directory") measurement_results: Path = parse_dir_or_file(args.measurement_dir, config.MEASUREMENT_REL_DIR, "Measurement directory") - print('\033[92m', measurement_results, '\033[0m') checkdiscoveryrules_results: Path = parse_dir_or_file(args.checkdiscoveryrules_file, "checkdiscoveryrules.csv", "Checkdiscoveryrules csv file") masking_file: Path or None = parse_dir_or_file(args.masking_file, "mask.json","Masking file") if args.masking_file else None interface.repair_patterns(language=language, pattern_ids=l_pattern_id, From b3b38115bcb0705fd1932d715fc44d4857646c8c Mon Sep 17 00:00:00 2001 From: felix-20 Date: Mon, 21 Aug 2023 15:19:11 +0200 Subject: [PATCH 15/16] added patternrepair documentation --- docs/How-to-run-CLI-Usage.md | 3 +- docs/How-to-run-patternrepair.md | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 docs/How-to-run-patternrepair.md diff --git a/docs/How-to-run-CLI-Usage.md b/docs/How-to-run-CLI-Usage.md index 06e062e..83a2b2a 100644 --- a/docs/How-to-run-CLI-Usage.md +++ b/docs/How-to-run-CLI-Usage.md @@ -22,7 +22,8 @@ The following main commands are currently implemented: - [`discovery`](./How-to-run-discover-measured-patterns.md): discover measured patterns within a project source code - [`manual-discovery`](./How-to-run-manual-discovery.md): execute discovery rules (normally associated to patterns) within a project source code - reporting: create reports about SAST measurement and/or pattern discovery (**CONTINUE**) - - [`sastreport`](./How-to-run-sastreport.md): fetch last SAST measurements for tools against patterns and aggregate in a common csv file + - [`sastreport`](./How-to-run-sastreport.md): fetch last SAST measurements for tools against patterns and aggregate in a common csv file +- [`patternrepair`](./How-to-run-patternrepair.md): Can repair a pattern in your pattern library, i.e. checks the JSON file, creates a README file etc. The following are under-investigation: diff --git a/docs/How-to-run-patternrepair.md b/docs/How-to-run-patternrepair.md new file mode 100644 index 0000000..4ec989f --- /dev/null +++ b/docs/How-to-run-patternrepair.md @@ -0,0 +1,70 @@ +# How to run: Pattern repair + +## Overview + +This command can be used to repair a pattern in your library. At the moment this is only supported for PHP. + +## Command line + +To repair a pattern use: + +```text +usage: tpframework [OPTIONS] COMMAND patternrepair [-h] -l LANGUAGE (-p PATTERN_ID [PATTERN_ID ...] | --pattern-range RANGE_START-RANGE_END | -a) [--tp-lib TP_LIB_DIR] + [--output-dir OUTPUT_DIR] [--masking-file MASKING_FILE] [--measurement-results MEASUREMENT_DIR] + [--checkdiscoveryrules-results CHECKDISCOVERYRULES_FILE] [--skip-readme] + +options: + -h, --help show this help message and exit + -l LANGUAGE, --language LANGUAGE + Programming language targeted + -p PATTERN_ID [PATTERN_ID ...], --patterns PATTERN_ID [PATTERN_ID ...] + Specify pattern(s) ID(s) to test for discovery + --pattern-range RANGE_START-RANGE_END + Specify pattern ID range separated by`-` (ex. 10-50) + -a, --all-patterns Test discovery for all available patterns + --tp-lib TP_LIB_DIR Absolute path to alternative pattern library, default resolves to `./testability_patterns` + --output-dir OUTPUT_DIR + Absolute path to the folder where outcomes (e.g., log file, export file if any) will be stored, default resolves to `./out` + --masking-file MASKING_FILE + Absolute path to a json file, that contains a mapping, if the name for some measurement tools should be kept secret, default is None + --measurement-results MEASUREMENT_DIR + Absolute path to the folder where measurement results are stored, default resolves to `./measurements` + --checkdiscoveryrules-results CHECKDISCOVERYRULES_FILE + Absolute path to the csv file, where the results of the `checkdiscoveryrules` command are stored, default resolves to `./checkdiscoveryrules.csv` + --skip-readme If set, the README generation is skipped. +``` + +By default, the `patternrepair` will create a README file for a pattern, where an overview of the pattern is presented together with some measurement results, if available. +For the generation of the REAMDE, there are a few files mandatory: +First of all, there has to be a csv file, that contains the results of the `checkdiscoveryrules` command for the patterns, that should be repaired. +Second, the results of the `measurement` command in a directory, structured similary to the pattern library. +Additionally you can provide a masking file, that can be used to mask the names of tools used for `measurement`. +The masking file should be a JSON file of the format `{: }`. + +If `--skip-readme` is set, None of the files is required and no new README file will be generated. + +## Example + +`tpframework patternrepair -l php -p 1 --skip-readme` + +This command will take a look at PHP pattern 1 and tries to repair it, without generating a new README file. +During that process it might provide you some feedback about files, that need manual review. +The tool checks for the following things: + +- make sure, a pattern JSON file exists +- ensure all relative links are correct +- collect all instances within the pattern path (an instance is identified by a directory, that contains a JSON file in the instance format) +- make sure the pattern name is correct (therefor the pattern name is derived from the directory name) +- check the description field and warn if there is no description +- check the given tags +- validates the pattern json against the pattern json scheme +- for each instance, repairing means: + - ensuring a instance JSON file with the required keys is available + - ensures all relative links exist + - check the scala rule if exists and iff necessary adjust the variable names + - check the description and again warn if there is no description provided + - checks that the field `expectation:expectation` is the opposite of `properties:negative_test_case` + - validates the instance json against the instance json scheme + - for PHP patterns: + - generates new opcode for each php file + - changes source line and sink line in the pattern JSON, according to the comments `// source`, `// sink` in the php file From a06d4c9b67b32ebcf63464d57d4feabef5c02e1a Mon Sep 17 00:00:00 2001 From: felix-20 Date: Mon, 21 Aug 2023 15:26:08 +0200 Subject: [PATCH 16/16] Moved repair tool into own directory --- qualitytests/cli/test_main.py | 4 ++- .../core/test_instance_readme_generation.py | 20 ++++++------- qualitytests/core/test_instance_repair.py | 28 +++++++++---------- qualitytests/core/test_instance_repair_php.py | 12 ++++---- qualitytests/core/test_markdown_elements.py | 2 +- qualitytests/core/test_pattern_repair.py | 6 ++-- qualitytests/core/test_readme_generator.py | 8 +++--- qualitytests/core/test_repair_tool.py | 12 ++++---- tp_framework/core/instance.py | 2 +- tp_framework/core/pattern.py | 4 +-- tp_framework/core/repair/__init__.py | 0 .../core/{ => repair}/instance_repair.py | 2 +- .../core/{ => repair}/pattern_repair.py | 2 +- .../core/{ => repair}/readme_generator.py | 2 +- .../{ => repair}/readme_markdown_elements.py | 0 tp_framework/core/{ => repair}/repair_tool.py | 2 +- 16 files changed, 54 insertions(+), 52 deletions(-) create mode 100644 tp_framework/core/repair/__init__.py rename tp_framework/core/{ => repair}/instance_repair.py (99%) rename tp_framework/core/{ => repair}/pattern_repair.py (98%) rename tp_framework/core/{ => repair}/readme_generator.py (99%) rename tp_framework/core/{ => repair}/readme_markdown_elements.py (100%) rename tp_framework/core/{ => repair}/repair_tool.py (99%) diff --git a/qualitytests/cli/test_main.py b/qualitytests/cli/test_main.py index 2bcaf69..cecb54a 100644 --- a/qualitytests/cli/test_main.py +++ b/qualitytests/cli/test_main.py @@ -7,6 +7,8 @@ from qualitytests.qualitytests_utils import pyexe, join_resources_path from cli import main +from pathlib import Path + class TestMain: testdir = Path(__file__).parent.parent.resolve() @@ -17,7 +19,7 @@ class TestMain: def test_cli_help_1(self): # process call cmd = pyexe + " {0} -h".format(self.tpf) - pr = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pr = subprocess.Popen(cmd.split(" "), shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, errdata) = pr.communicate() output = output.decode("utf-8") print(output) diff --git a/qualitytests/core/test_instance_readme_generation.py b/qualitytests/core/test_instance_readme_generation.py index bd7c4b9..78e9298 100644 --- a/qualitytests/core/test_instance_readme_generation.py +++ b/qualitytests/core/test_instance_readme_generation.py @@ -3,8 +3,8 @@ from pathlib import Path from unittest.mock import patch, mock_open -from core.readme_generator import InstanceREADMEGenerator -from core.readme_markdown_elements import * +from core.repair.readme_generator import InstanceREADMEGenerator +from core.repair.readme_markdown_elements import * from qualitytests.qualitytests_utils import create_pattern class TestInstanceREADMEGenerator: @@ -40,7 +40,7 @@ def test_instance_code_same_source_and_sink(self): expected_code = instance_readme_gen.current_instance.code_path instance_readme_gen.current_instance.expectation_source_file = "code_file" instance_readme_gen.current_instance.expectation_sink_file = "code_file" - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.return_value = "x = 1" actual1 = instance_readme_gen._instance_code() @@ -52,7 +52,7 @@ def test_instance_code_same_source_and_sink(self): assert "Code" == actual1[0].content assert isinstance(actual1[1], MarkdownCode) - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.return_value = None actual2 = instance_readme_gen._instance_code() @@ -63,7 +63,7 @@ def test_instance_code_different_source_and_sink(self): expected_code = instance_readme_gen.current_instance.code_path instance_readme_gen.current_instance.expectation_source_file = "code_file_source" instance_readme_gen.current_instance.expectation_sink_file = "code_file_sink" - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.return_value = "x = 1" actual1 = instance_readme_gen._instance_code() @@ -82,7 +82,7 @@ def test_instance_code_different_source_and_sink(self): assert "Sink File" == actual1[3].content assert isinstance(actual1[4], MarkdownCode) - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.reset_mock() file_content_mock.return_value = None @@ -110,7 +110,7 @@ def test_instance_more(self): def test_compile(self): instance_readme_gen = self._get_instance_readme_generator() - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.return_value = "binary" actual1 = instance_readme_gen._compile() @@ -119,7 +119,7 @@ def test_compile(self): assert 1 == len(actual1) assert isinstance(actual1[0], MarkdownCollapsible) - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.return_value = "" actual2 = instance_readme_gen._compile() @@ -157,7 +157,7 @@ def test_discovery_rule_exists(self, dr_return, expected_dr, rule_path, desc, ex instance_readme_gen = self._get_instance_readme_generator() instance_readme_gen.current_instance.discovery_rule = rule_path instance_readme_gen.current_instance.discovery_notes = desc - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.side_effect = [desc, dr_return] actual = instance_readme_gen._discovery() file_content_mock.assert_called() @@ -257,7 +257,7 @@ def test_measurement(self, mask, meas_results): @pytest.mark.parametrize("get_file_content_ret, expected_classes, expected_content", remediation_testcases) def test_remediation(self, get_file_content_ret: list, expected_classes: list, expected_content: list): instance_readme_gen = self._get_instance_readme_generator() - with patch("core.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator._get_file_content_if_exists") as file_content_mock: file_content_mock.side_effect = get_file_content_ret actual = instance_readme_gen._remediation() diff --git a/qualitytests/core/test_instance_repair.py b/qualitytests/core/test_instance_repair.py index f601506..bfac1ee 100644 --- a/qualitytests/core/test_instance_repair.py +++ b/qualitytests/core/test_instance_repair.py @@ -2,7 +2,7 @@ from pathlib import Path from unittest.mock import patch, mock_open -from core.instance_repair import InstanceRepair +from core.repair.instance_repair import InstanceRepair from core.exceptions import PatternRepairError from qualitytests.qualitytests_utils import create_instance, create_pattern @@ -50,7 +50,7 @@ def _get_instance_repair(self) -> InstanceRepair: test_pattern = create_pattern() with patch("pathlib.Path.is_file") as is_file_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.instance_repair.globals") as global_mock: + patch("core.repair.instance_repair.globals") as global_mock: is_file_mock.return_value = True read_json_mock.return_value = TestInstanceRepair.template_json_dict @@ -67,7 +67,7 @@ def test_init_instance_repair_with_wrong_language(self): test_pattern = create_pattern() with patch("pathlib.Path.is_file") as is_file_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.instance_repair.logger.error") as logger_error, \ + patch("core.repair.instance_repair.logger.error") as logger_error, \ pytest.raises(PatternRepairError) as e_info: is_file_mock.return_value = True @@ -110,8 +110,8 @@ def test_adjust_variable_number_in_discovery_works(self, instances, expected_inf test_instance_repair.to_repair.discovery_rule = dr_rule_path dr_rule = TestInstanceRepair.example_rule.replace(dr_rule_replace, "") with patch("builtins.open", mock_open(read_data=dr_rule), create=True), \ - patch("core.instance_repair.logger.info") as info_logger, \ - patch("core.instance_repair.logger.warning") as warn_logger: + patch("core.repair.instance_repair.logger.info") as info_logger, \ + patch("core.repair.instance_repair.logger.warning") as warn_logger: test_instance_repair._adjust_variable_number_in_discovery_rule() if dr_rule_replace: @@ -124,12 +124,12 @@ def test_check_rule_accuracy_given(self): test_instance_repair = self._get_instance_repair() test_instance_repair.to_repair.discovery_rule_accuracy = "FP" - with patch("core.instance_repair.logger.warning") as warn_logger: + with patch("core.repair.instance_repair.logger.warning") as warn_logger: test_instance_repair._check_rule_accuracy() warn_logger.assert_not_called() test_instance_repair.to_repair.discovery_rule_accuracy = "" - with patch("core.instance_repair.logger.warning") as warn_logger: + with patch("core.repair.instance_repair.logger.warning") as warn_logger: test_instance_repair._check_rule_accuracy() warn_logger.assert_called_once_with("PatternRepair (JS - p1:1) Discovery rule given, but no rule accuracy.") @@ -149,10 +149,10 @@ def test_repair_scala_rule(self, dr_rule, is_file_return, warn, info): test_instance_repair = self._get_instance_repair() test_instance_repair.to_repair.discovery_rule = dr_rule with patch("pathlib.Path.is_file") as is_file_mock, \ - patch("core.instance_repair.InstanceRepair._adjust_variable_number_in_discovery_rule") as adjust_mock, \ - patch("core.instance_repair.InstanceRepair._check_rule_accuracy") as check_rule_mock, \ - patch("core.instance_repair.logger.warning") as logger_warn_mock, \ - patch("core.instance_repair.logger.info") as logger_info_mock: + patch("core.repair.instance_repair.InstanceRepair._adjust_variable_number_in_discovery_rule") as adjust_mock, \ + patch("core.repair.instance_repair.InstanceRepair._check_rule_accuracy") as check_rule_mock, \ + patch("core.repair.instance_repair.logger.warning") as logger_warn_mock, \ + patch("core.repair.instance_repair.logger.info") as logger_info_mock: is_file_mock.return_value = is_file_return test_instance_repair._repair_scala_rule() @@ -172,9 +172,9 @@ def test_repair_scala_rule(self, dr_rule, is_file_return, warn, info): def test_repair(self): test_instance_repair = self._get_instance_repair() - with patch("core.instance_repair.InstanceRepair._ensure_json_file_exists") as func1_mock, \ - patch("core.instance_repair.InstanceRepair._repair_scala_rule") as func2_mock, \ - patch("core.instance_repair.RepairTool.to_json") as func3_mock: + with patch("core.repair.instance_repair.InstanceRepair._ensure_json_file_exists") as func1_mock, \ + patch("core.repair.instance_repair.InstanceRepair._repair_scala_rule") as func2_mock, \ + patch("core.repair.instance_repair.RepairTool.to_json") as func3_mock: test_instance_repair.repair() func1_mock.assert_called_once() func2_mock.assert_called_once() diff --git a/qualitytests/core/test_instance_repair_php.py b/qualitytests/core/test_instance_repair_php.py index c0175d0..cd73199 100644 --- a/qualitytests/core/test_instance_repair_php.py +++ b/qualitytests/core/test_instance_repair_php.py @@ -2,7 +2,7 @@ from pathlib import Path from unittest.mock import patch, mock_open -from core.instance_repair import InstanceRepairPHP +from core.repair.instance_repair import InstanceRepairPHP from qualitytests.qualitytests_utils import create_instance_php, join_resources_path class TestInstanceRepairPHP: @@ -58,7 +58,7 @@ def test_make_opcode_from_php_file(self): with patch("os.utime") as utime_mock, \ patch("os.system") as system_mock, \ patch("builtins.open", mock_open(read_data="some data"), create=True), \ - patch("core.instance_repair.InstanceRepairPHP._mask_line") as mask_line_mock: + patch("core.repair.instance_repair.InstanceRepairPHP._mask_line") as mask_line_mock: actual = test_instance_php_repair._make_opcode_from_php_file(test_instance_php_repair.instance.code_path) assert expected == actual @@ -68,8 +68,8 @@ def test_make_opcode_from_php_file(self): def test_repair_opcode(self): test_instance_php_repair = self._get_instance_repair() - with patch("core.instance_repair.InstanceRepairPHP._remove_bash_files") as bash_file_remove_mock, \ - patch("core.instance_repair.InstanceRepairPHP._make_opcode_from_php_file") as make_opcode_mock, \ + with patch("core.repair.instance_repair.InstanceRepairPHP._remove_bash_files") as bash_file_remove_mock, \ + patch("core.repair.instance_repair.InstanceRepairPHP._make_opcode_from_php_file") as make_opcode_mock, \ patch("core.utils.list_files") as list_files_mock: list_files_mock.return_value = ["file1"] @@ -92,8 +92,8 @@ def test_repair_source_line_sink_line(self, source_sink_ret, warning, exp_source expected_file = test_instance_php_repair.instance.expectation_sink_file test_instance_php_repair.instance.expectation_sink_line = 99 test_instance_php_repair.instance.expectation_source_line = 99 - with patch("core.instance_repair.InstanceRepairPHP._get_source_and_sink_for_file") as source_sink_mock, \ - patch("core.instance_repair.logger.warning") as warn_logger: + with patch("core.repair.instance_repair.InstanceRepairPHP._get_source_and_sink_for_file") as source_sink_mock, \ + patch("core.repair.instance_repair.logger.warning") as warn_logger: source_sink_mock.return_value = source_sink_ret test_instance_php_repair._repair_source_line_sink_line() diff --git a/qualitytests/core/test_markdown_elements.py b/qualitytests/core/test_markdown_elements.py index 3e478e0..ee1cd12 100644 --- a/qualitytests/core/test_markdown_elements.py +++ b/qualitytests/core/test_markdown_elements.py @@ -1,4 +1,4 @@ -from core.readme_markdown_elements import * +from core.repair.readme_markdown_elements import * class TestMarkdownElements: diff --git a/qualitytests/core/test_pattern_repair.py b/qualitytests/core/test_pattern_repair.py index aada9f5..9946d36 100644 --- a/qualitytests/core/test_pattern_repair.py +++ b/qualitytests/core/test_pattern_repair.py @@ -1,7 +1,7 @@ import pytest from unittest.mock import patch -from core.pattern_repair import PatternRepair +from core.repair.pattern_repair import PatternRepair from qualitytests.qualitytests_utils import join_resources_path, create_pattern, create_instance2 class TestPatternRepair: @@ -104,8 +104,8 @@ def test_repair_description(self, pattern_description_ret, instance_description_ with patch("core.pattern.Pattern.get_description") as get_pattern_description_mock, \ patch("core.instance.Instance.get_description") as get_instance_description_mock, \ - patch("core.pattern_repair.logger.warn") as warn_logger, \ - patch("core.pattern_repair.logger.info") as info_logger, \ + patch("core.repair.pattern_repair.logger.warn") as warn_logger, \ + patch("core.repair.pattern_repair.logger.info") as info_logger, \ patch("pathlib.Path.mkdir") as mkdir_mock, \ patch("builtins.open") as open_mock: diff --git a/qualitytests/core/test_readme_generator.py b/qualitytests/core/test_readme_generator.py index a1432da..0a8f93f 100644 --- a/qualitytests/core/test_readme_generator.py +++ b/qualitytests/core/test_readme_generator.py @@ -2,8 +2,8 @@ from pathlib import Path from unittest.mock import patch -from core.readme_generator import READMEGenerator -from core.readme_markdown_elements import * +from core.repair.readme_generator import READMEGenerator +from core.repair.readme_markdown_elements import * from qualitytests.qualitytests_utils import create_pattern, join_resources_path class TestREADMEGenerator: @@ -40,7 +40,7 @@ def _get_readme_generator(self): def test_init_readme_generator_discovery_results(self, dr_file, dr_res, is_dir, mask_file, warn): test_pattern = create_pattern() with patch("pathlib.Path.is_dir") as is_dir_mock, \ - patch("core.readme_generator.logger.warning") as warn_logger, \ + patch("core.repair.readme_generator.logger.warning") as warn_logger, \ patch("core.utils.read_json") as read_json_mock, \ patch("core.utils.read_csv_to_dict") as csv_to_dict_mock: is_dir_mock.return_value = is_dir @@ -127,7 +127,7 @@ def test_pattern_metadata_without_discovery_rule_results(self): def test_instances(self): test_readme_gen = self._get_readme_generator() - with patch("core.readme_generator.InstanceREADMEGenerator.generate_md") as generate_md_mock: + with patch("core.repair.readme_generator.InstanceREADMEGenerator.generate_md") as generate_md_mock: actual = test_readme_gen._instances() generate_md_mock.assert_called_once() assert generate_md_mock.return_value == actual diff --git a/qualitytests/core/test_repair_tool.py b/qualitytests/core/test_repair_tool.py index 6b69a71..9a2d42d 100644 --- a/qualitytests/core/test_repair_tool.py +++ b/qualitytests/core/test_repair_tool.py @@ -3,7 +3,7 @@ from unittest.mock import patch from core.pattern import Pattern -from core.repair_tool import RepairTool +from core.repair.repair_tool import RepairTool from core.exceptions import PatternRepairError from qualitytests.qualitytests_utils import join_resources_path, create_pattern, create_instance @@ -57,7 +57,7 @@ def test_init_pattern_repair2(self): def test_copy_template(self): with patch("pathlib.Path.is_file") as is_file_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.repair_tool.logger.info") as logger, \ + patch("core.repair.repair_tool.logger.info") as logger, \ patch("shutil.copy") as copy_file_mock: is_file_mock.return_value = True @@ -83,7 +83,7 @@ def test_ensure_json_file_exists(self, is_file_mock_ret: bool, should_rename_json: bool): with patch("pathlib.Path.is_file") as is_file_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.repair_tool.logger.info"), \ + patch("core.repair.repair_tool.logger.info"), \ patch("core.utils.get_json_file") as get_pattern_json_mock, \ patch("core.utils.read_json") as read_json_mock, \ patch("core.utils.write_json") as write_json_mock, \ @@ -159,7 +159,7 @@ def test_to_json2(self): def test_check_paths_pattern_exist_all_correct(self): with patch("pathlib.Path.is_file") as is_file_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.repair_tool.logger.warning") as warn_logger_mock: + patch("core.repair.repair_tool.logger.warning") as warn_logger_mock: is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict @@ -173,7 +173,7 @@ def check_path_instance_exist_all_correct(self): test_instance = create_instance() with patch("pathlib.Path.is_file") as is_file_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.repair_tool.logger.warning") as warn_logger_mock: + patch("core.repair.repair_tool.logger.warning") as warn_logger_mock: is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict @@ -188,7 +188,7 @@ def check_path_instance_exist_non_correct(self): with patch("pathlib.Path.is_file") as is_file_mock, \ patch("pathlib.Path.exists") as exist_mock, \ patch("core.utils.read_json") as read_json_mock, \ - patch("core.repair_tool.logger.warning") as warn_logger_mock: + patch("core.repair.repair_tool.logger.warning") as warn_logger_mock: is_file_mock.return_value = True read_json_mock.return_value = TestRepairTool.template_json_dict diff --git a/tp_framework/core/instance.py b/tp_framework/core/instance.py index c454f77..bc2b5fc 100644 --- a/tp_framework/core/instance.py +++ b/tp_framework/core/instance.py @@ -4,7 +4,7 @@ from core import utils from core.exceptions import InstanceInvalid -from core.instance_repair import InstanceRepair +from core.repair.instance_repair import InstanceRepair # class PatternCategory(str, Enum): diff --git a/tp_framework/core/pattern.py b/tp_framework/core/pattern.py index f1798a7..508b0fe 100644 --- a/tp_framework/core/pattern.py +++ b/tp_framework/core/pattern.py @@ -5,7 +5,7 @@ from core.instance import Instance from core import utils -from core.pattern_repair import PatternRepair +from core.repair.pattern_repair import PatternRepair import logging from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) @@ -151,7 +151,7 @@ def repair(self, should_include_readme: bool, masking_file: Path = None,): PatternRepair(self).repair(self) if should_include_readme: - from core.readme_generator import READMEGenerator + from core.repair.readme_generator import READMEGenerator readme = READMEGenerator(pattern=self, discovery_rule_results=discovery_rule_results, measurement_results=measurement_results, masking_file=masking_file).generate_README() diff --git a/tp_framework/core/repair/__init__.py b/tp_framework/core/repair/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tp_framework/core/instance_repair.py b/tp_framework/core/repair/instance_repair.py similarity index 99% rename from tp_framework/core/instance_repair.py rename to tp_framework/core/repair/instance_repair.py index 1842e9d..95c4c42 100644 --- a/tp_framework/core/instance_repair.py +++ b/tp_framework/core/repair/instance_repair.py @@ -10,7 +10,7 @@ from core import loggermgr logger = logging.getLogger(loggermgr.logger_name(__name__)) -from core.repair_tool import RepairTool +from core.repair.repair_tool import RepairTool class InstanceRepair(RepairTool): def __init__(self, instance, pattern: Path) -> None: diff --git a/tp_framework/core/pattern_repair.py b/tp_framework/core/repair/pattern_repair.py similarity index 98% rename from tp_framework/core/pattern_repair.py rename to tp_framework/core/repair/pattern_repair.py index f3b1708..2c15e92 100644 --- a/tp_framework/core/pattern_repair.py +++ b/tp_framework/core/repair/pattern_repair.py @@ -4,7 +4,7 @@ from core.instance import Instance -from core.repair_tool import RepairTool +from core.repair.repair_tool import RepairTool from core import utils class PatternRepair(RepairTool): diff --git a/tp_framework/core/readme_generator.py b/tp_framework/core/repair/readme_generator.py similarity index 99% rename from tp_framework/core/readme_generator.py rename to tp_framework/core/repair/readme_generator.py index 67a84a8..ef404fa 100644 --- a/tp_framework/core/readme_generator.py +++ b/tp_framework/core/repair/readme_generator.py @@ -6,7 +6,7 @@ from core.exceptions import MeasurementInvalid from core.measurement import Measurement -from core.readme_markdown_elements import * +from core.repair.readme_markdown_elements import * from core import utils from core import loggermgr diff --git a/tp_framework/core/readme_markdown_elements.py b/tp_framework/core/repair/readme_markdown_elements.py similarity index 100% rename from tp_framework/core/readme_markdown_elements.py rename to tp_framework/core/repair/readme_markdown_elements.py diff --git a/tp_framework/core/repair_tool.py b/tp_framework/core/repair/repair_tool.py similarity index 99% rename from tp_framework/core/repair_tool.py rename to tp_framework/core/repair/repair_tool.py index ba01e18..94aba5f 100644 --- a/tp_framework/core/repair_tool.py +++ b/tp_framework/core/repair/repair_tool.py @@ -99,4 +99,4 @@ def to_json(self): original_dict = utils.read_json(self.to_repair.json_path) if repaired_dict != original_dict: - utils.write_json(self.to_repair.json_path, repaired_dict) \ No newline at end of file + utils.write_json(self.to_repair.json_path, repaired_dict)