From dfdbb377985cdb230ed2aac1f17705969deee7d2 Mon Sep 17 00:00:00 2001 From: Diogo Gaspar Date: Wed, 20 Nov 2024 11:46:58 +0100 Subject: [PATCH] fix: include all transitive, omitted deps TODO: missing plugin support, as referenced in https://github.com/chains-project/dirty-waters/issues/37#issuecomment-2470852399 --- tool/extract_deps.py | 91 ++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/tool/extract_deps.py b/tool/extract_deps.py index b2aa9972..b0d1573a 100644 --- a/tool/extract_deps.py +++ b/tool/extract_deps.py @@ -368,60 +368,59 @@ def extract_deps_from_maven(pom_xml_content): Returns: dict: A dictionary containing the extracted dependencies. """ - try: - deps_list = [] - properties = {} - - # Extract properties - prop_pattern = r"(.*?)" - prop_matches = re.findall(prop_pattern, pom_xml_content, re.DOTALL) - for prop_content in prop_matches: - # print(f"Properties content: {prop_content}") - prop_items = re.findall(r"<([^>]+)>(.*?)", prop_content, re.DOTALL) - properties.update(dict(prop_items)) - - # Extract parent version if exists - parent_version_pattern = r".*?(.*?).*?" - parent_version_match = re.search(parent_version_pattern, pom_xml_content, re.DOTALL) - if parent_version_match: - properties["project.version"] = parent_version_match.group(1) - - # Extract parent artifactId if exists - parent_artifactId_pattern = r".*?(.*?).*?" - parent_artifactId_match = re.search(parent_artifactId_pattern, pom_xml_content, re.DOTALL) - if parent_artifactId_match: - properties["project.artifactId"] = parent_artifactId_match.group(1) - - # Extract parent groupId if exists - parent_groupId_pattern = r".*?(.*?).*?" - parent_groupId_match = re.search(parent_groupId_pattern, pom_xml_content, re.DOTALL) - if parent_groupId_match: - properties["project.groupId"] = parent_groupId_match.group(1) - - pattern = r".*?(.*?).*?(.*?).*?(.*?).*?" - matches = re.findall(pattern, pom_xml_content, re.DOTALL) - - for group_id, artifact_id, version in matches: - # Resolve property placeholders - if group_id.startswith("${"): - group_id = properties.get(group_id[2:-1], group_id) - if artifact_id.startswith("${"): - artifact_id = properties.get(artifact_id[2:-1], artifact_id) - if version.startswith("${"): - version = properties.get(version[2:-1], version) - deps_list.append(f"{group_id}:{artifact_id}@{version}") - - deps_list_data = {"resolutions": deps_list, "patches": []} + def run_commands_in_sequence(commands, initial_input = None): + """ + Runs a sequence of commands where the output of one is piped to the next. + + Args: + commands (List[List[str]]): A list of command arguments to be executed in sequence. + initial_input (bytes): The initial input to be passed to the first command (optional). + + Returns: + bytes: The output of the final command in the sequence. + """ + input_data = initial_input + for command in commands: + print(f"[INFO] Running: {' '.join(command)}") + process = subprocess.run(command, input=input_data, check=True, capture_output=True) + input_data = process.stdout # Pass the output to the next command + return input_data + + # First, pasting the content of the pom.xml file onto a temporary file + # This is needed, since (AFAIK) the maven dependency plugin does not accept input from stdin + temp_pom_path = "/tmp/pom.xml" + with open(temp_pom_path, "w") as f: + f.write(pom_xml_content) + + # TODO: missing plugins; see https://github.com/chains-project/dirty-waters/issues/37#issuecomment-2470852399 + commands = [ + ["mvn", "dependency:tree", "-Dverbose", "-DoutputType=json", "-DoutputFile=/dev/stdout", "-f", temp_pom_path], + ["grep", "-v", "\\[INFO"], + ["jq", '[.children | .. | {version, groupId, artifactId}?] | unique'] + ] + + try: + result = run_commands_in_sequence(commands) + # Parse the JSON output and construct the resolutions list + dependencies = json.loads(result) + resolutions = [ + f"{dep['groupId']}:{dep['artifactId']}@{dep['version']}" + for dep in dependencies + ] + + deps_list_data = {"resolutions": resolutions, "patches": []} + # TODO: confirm resolutions and patches? return deps_list_data - except (IOError, ValueError, KeyError) as e: + except subprocess.CalledProcessError as e: + print(f"An error occurred: {e}") logging.error( "An error occurred while extracting dependencies from pom.xml file: %s", str(e), ) return {"resolutions": [], "patches": []} - + def deps_versions(deps_versions_info_list): """