Skip to content

Commit

Permalink
style: format Python code with Black
Browse files Browse the repository at this point in the history
  • Loading branch information
randomicecube authored and github-actions[bot] committed Jan 30, 2025
1 parent e5d0449 commit 4aba24e
Show file tree
Hide file tree
Showing 10 changed files with 168 additions and 100 deletions.
113 changes: 68 additions & 45 deletions tool/compare_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,29 @@

cache_manager = get_cache_manager()


def tag_format(tag, package_name, repo_name):
_, repo_name = repo_name.split("/") # splits owner and repo name
project_name = repo_name.split("-")[-1] # deals with lots of maven-<project_name> repos (e.g., surefire, etc)
tag_formats = set([
f"{tag}",
f"v{tag}",
f"v_{tag}",
f"r{tag}",
f"release-{tag}",
f"parent-{tag}",
# Below: further tag formats found in the AROMA paper, table 3: https://dl.acm.org/doi/pdf/10.1145/3643764
f"release/{tag}",
f"{tag}-release",
f"v.{tag}",
] + [
f"{name}{suffix}"
for name in [package_name, repo_name, project_name]
for suffix in [f"@{tag}", f"-v{tag}", f"_v{tag}", f"-{tag}", f"_{tag}"]
])
_, repo_name = repo_name.split("/") # splits owner and repo name
project_name = repo_name.split("-")[-1] # deals with lots of maven-<project_name> repos (e.g., surefire, etc)
tag_formats = set(
[
f"{tag}",
f"v{tag}",
f"v_{tag}",
f"r{tag}",
f"release-{tag}",
f"parent-{tag}",
# Below: further tag formats found in the AROMA paper, table 3: https://dl.acm.org/doi/pdf/10.1145/3643764
f"release/{tag}",
f"{tag}-release",
f"v.{tag}",
]
+ [
f"{name}{suffix}"
for name in [package_name, repo_name, project_name]
for suffix in [f"@{tag}", f"-v{tag}", f"_v{tag}", f"-{tag}", f"_{tag}"]
]
)

only_package_name, artifact_id_parts = None, None
if "/" in package_name: # NPM-based
Expand All @@ -45,6 +49,7 @@ def tag_format(tag, package_name, repo_name):

return tag_formats


def find_existing_tags(tag_formats, repo_name):
for tag_format in tag_formats:
tag_url = f"https://api.github.com/repos/{repo_name}/git/ref/tags/{tag_format}"
Expand All @@ -53,6 +58,7 @@ def find_existing_tags(tag_formats, repo_name):
return tag_format
return None


def get_commit_info(commit):
if commit.get("committer") is None:
committer_login = "No committer info"
Expand Down Expand Up @@ -86,6 +92,7 @@ def get_commit_info(commit):
"id": author_id,
}


def get_authors_from_response(url, data, package_info):
result = {
"repo": package_info.get("repo_pure"),
Expand All @@ -103,24 +110,29 @@ def get_authors_from_response(url, data, package_info):
if not commit_info:
commit_info = get_commit_info(commit)
cache_manager.commit_comparison_cache.cache_authors_from_url(commit.get("url"), commit_info)

if commit_info:
authors_info.append(commit_info)
result.update({
"authors": authors_info,
"tag1": package_info.get("chosen_v1"),
"tag2": package_info.get("chosen_v2"),
})
result.update(
{
"authors": authors_info,
"tag1": package_info.get("chosen_v1"),
"tag2": package_info.get("chosen_v2"),
}
)
else:
result.update({
"tag1": package_info.get("version1"),
"tag2": package_info.get("version2"),
"commits_info_message": "No commits found",
"status_code": 200,
})
result.update(
{
"tag1": package_info.get("version1"),
"tag2": package_info.get("version2"),
"commits_info_message": "No commits found",
"status_code": 200,
}
)

return result


def get_authors_from_tags(tag1, tag2, package, package_info):
repo_name = package_info.get("repo_name")
tag_formats_old = tag_format(tag1, package, repo_name)
Expand All @@ -129,7 +141,9 @@ def get_authors_from_tags(tag1, tag2, package, package_info):
existing_tag_format_new = find_existing_tags(tag_formats_new, repo_name)
category = package_info.get("message")

compare_url = f"https://api.github.com/repos/{repo_name}/compare/{existing_tag_format_old}...{existing_tag_format_new}"
compare_url = (
f"https://api.github.com/repos/{repo_name}/compare/{existing_tag_format_old}...{existing_tag_format_new}"
)
response = make_github_request(compare_url, max_retries=2)

if not response:
Expand All @@ -146,7 +160,7 @@ def get_authors_from_tags(tag1, tag2, package, package_info):
status_old = tag_old
old_tag_found = True
break

if not old_tag_found:
for tag_new in tag_formats_new:
new_tag_url = f"https://api.github.com/repos/{repo_name}/git/ref/tags/{tag_new}"
Expand All @@ -167,14 +181,15 @@ def get_authors_from_tags(tag1, tag2, package, package_info):

return get_authors_from_response(compare_url, response, package_info)


def get_patch_authors(repo_name, patch_name, path, release_version_sha, headers):
url = f"https://api.github.com/repos/{repo_name}/commits?path=.yarn/patches/{path}&sha={release_version_sha}"
patch_info = {
"patch_name": patch_name,
"repo_name": repo_name,
"commit_url": url,
}

response = make_github_request(url, headers=headers)
authors_info = []
if response:
Expand Down Expand Up @@ -214,19 +229,24 @@ def get_patch_authors(repo_name, patch_name, path, release_version_sha, headers)
"c_type": committer_type,
}
)
patch_info.update({
"category": "patch",
"authors": authors_info,
})
patch_info.update(
{
"category": "patch",
"authors": authors_info,
}
)
else:
patch_info.update({
"authors": None,
"error": True,
"error_message": response.status_code,
})
patch_info.update(
{
"authors": None,
"error": True,
"error_message": response.status_code,
}
)

return patch_info


def get_commit_authors(packages_data):
logging.info("Getting commits for packages...")
authors_per_package = {}
Expand Down Expand Up @@ -256,6 +276,7 @@ def get_commit_authors(packages_data):

return authors_per_package


def get_patch_commits(headers, repo_name, release_version, patch_data):
logging.info("Getting commits for patches...")
release_version_sha = cache_manager.github_cache.get_tag_to_sha(repo_name, release_version)
Expand All @@ -267,7 +288,9 @@ def get_patch_commits(headers, repo_name, release_version, patch_data):
release_version_sha = response_json.get("object").get("sha")
else:
release_version_sha = None
cache_manager.github_cache.cache_tag_to_sha(repo_name, release_version, "No release found" if release_version_sha is None else release_version_sha)
cache_manager.github_cache.cache_tag_to_sha(
repo_name, release_version, "No release found" if release_version_sha is None else release_version_sha
)
elif release_version_sha == "No release found":
release_version_sha = None

Expand Down Expand Up @@ -295,7 +318,7 @@ def get_patch_commits(headers, repo_name, release_version, patch_data):
}
continue

data = cache_manager.commit_comparison_cache.get_patch_authors(repo_name, path, release_version_sha)
data = cache_manager.commit_comparison_cache.get_patch_authors(repo_name, path, release_version_sha)
if not data:
# Cache miss, get authors from GitHub
data = get_patch_authors(repo_name, changed_patch, path, release_version_sha, headers)
Expand All @@ -311,4 +334,4 @@ def get_commit_results(api_headers, repo_name, release_version, patch_data, pack
authors_per_package_result = get_commit_authors(packages_data)
commit_results = {**authors_per_patches_result, **authors_per_package_result}

return commit_results
return commit_results
1 change: 1 addition & 0 deletions tool/compare_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"no_change": "No change",
}


def parse_dependencies(file_path):
dependencies = {}
with open(file_path, "r") as file:
Expand Down
16 changes: 9 additions & 7 deletions tool/get_pr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,14 @@ def get_pr_info(data):
if not pr_data:
if commit_node_id:
pr_info = fetch_pull_requests(commit_node_id)
cache_manager.github_cache.cache_pr_info({
"package": package,
"commit_sha": commit_sha,
"commit_node_id": commit_node_id,
"pr_info": pr_info,
})
cache_manager.github_cache.cache_pr_info(
{
"package": package,
"commit_sha": commit_sha,
"commit_node_id": commit_node_id,
"pr_info": pr_info,
}
)
else:
pr_info = pr_data["pr_info"]

Expand Down Expand Up @@ -217,4 +219,4 @@ def get_useful_pr_info(commits_data):

author["commit_merged_info"].append(merged_info)

return commits_data
return commits_data
5 changes: 4 additions & 1 deletion tool/get_pr_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

url = "https://api.github.com/graphql"


def get_first_pr_info(repo_name, review_author_login):
query = """
query($query: String!, $type: SearchType!, $last: Int!)
Expand Down Expand Up @@ -95,7 +96,9 @@ def get_pr_review_info(data):
if not first_pr_info:
if review_author_login:
first_pr_info = get_first_pr_info(repo_name, review_author_login)
cache_manager.github_cache.cache_pr_review(package, repo_name, review_author_login, first_pr_info)
cache_manager.github_cache.cache_pr_review(
package, repo_name, review_author_login, first_pr_info
)
useful_info = first_pr_info.get("data", {}).get("search", {}).get("nodes", [])
first_review_info = useful_info[0] if useful_info else {}
all_useful_first_prr_info = first_review_info.get("reviews", {}).get("edges", [])
Expand Down
33 changes: 22 additions & 11 deletions tool/get_user_commit_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

cache_manager = get_cache_manager()


def get_repo_author_commits(api_url):
# Since we can't return the commits in ascending date order, we'll just return the latest commit
# This response also holds the number of pages, so the last page will have the first commit
Expand All @@ -21,6 +22,7 @@ def get_repo_author_commits(api_url):
last_page_url = f"{search_url}&page={last_page}"
return make_github_request(last_page_url, max_retries=2, retry_delay=2, sleep_between_requests=2)


def get_user_first_commit_info(data):
"""
Get the first commit information for each author in the given data.
Expand Down Expand Up @@ -69,7 +71,7 @@ def get_user_first_commit_info(data):
author_login_in_commit,
author_id_in_commit,
) = data
first_time_commit = (earliest_commit_sha == commit_sha)
first_time_commit = earliest_commit_sha == commit_sha

commit_result.update(
{
Expand All @@ -89,18 +91,27 @@ def get_user_first_commit_info(data):
earliest_commit_sha = earliest_commit["sha"]
author_login_in_commit = earliest_commit["author"]["login"]
author_id_in_commit = earliest_commit["author"]["id"]
first_time_commit = (earliest_commit_sha == commit_sha)
first_time_commit = earliest_commit_sha == commit_sha
cache_manager.user_commit_cache.cache_user_commit(
api_url, earliest_commit_sha, repo_name, package, author_login, commit_sha, author_login_in_commit, author_id_in_commit
api_url,
earliest_commit_sha,
repo_name,
package,
author_login,
commit_sha,
author_login_in_commit,
author_id_in_commit,
)
commit_result.update(
{
"api_url": api_url,
"earliest_commit_sha": earliest_commit_sha,
"author_login_in_1st_commit": author_login_in_commit,
"author_id_in_1st_commit": author_id_in_commit,
"is_first_commit": first_time_commit,
"commit_notice": "Data retrieved from API",
}
)
commit_result.update({
"api_url": api_url,
"earliest_commit_sha": earliest_commit_sha,
"author_login_in_1st_commit": author_login_in_commit,
"author_id_in_1st_commit": author_id_in_commit,
"is_first_commit": first_time_commit,
"commit_notice": "Data retrieved from API",
})
else:
commit_result["commit_notice"] = f"Failed to retrieve data from API({api_url})"
author["commit_result"] = commit_result
Expand Down
12 changes: 4 additions & 8 deletions tool/github_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def extract_repo_url(repo_info: str) -> str:
match = GITHUB_URL_PATTERN.search(repo_info)
if not match:
return "not github"

# if there is a match, there's still the possibility of the scm url having been
# put in a different form, e.g.,
# github.com/apache/maven-scm/tree/maven-scm-2.1.0/maven-scm-providers/maven-scm-providers-standard
Expand All @@ -50,6 +50,7 @@ def extract_repo_url(repo_info: str) -> str:
joined = joined if not joined.endswith(".git") else joined[:-4]
return joined


def get_scm_commands(pm: str, package: str) -> List[str]:
"""Get the appropriate command to find a package's source code locations for the package manager."""
if pm == "yarn-berry" or pm == "yarn-classic":
Expand Down Expand Up @@ -79,6 +80,7 @@ def get_scm_commands(pm: str, package: str) -> List[str]:
]
raise ValueError(f"Unsupported package manager: {pm}")


def process_package(
package,
pm,
Expand All @@ -89,12 +91,7 @@ def process_package(
repos_output_json,
):
def check_if_valid_repo_info(repo_info):
if (
repo_info is None
or "Undefined" in repo_info
or "undefined" in repo_info
or "ERR!" in repo_info
):
if repo_info is None or "Undefined" in repo_info or "undefined" in repo_info or "ERR!" in repo_info:
repos_output_json[package] = {"github": "Could not find"}
undefined.append(f"Undefined for {package}, {repo_info}")
return False
Expand All @@ -109,7 +106,6 @@ def check_if_valid_repo_info(repo_info):
some_errors.append(f"No GitHub URL for {package}\n{repo_info}")
return False


repo_info = cache_manager.github_cache.get_github_url(package)
valid_repo_info = False
if not repo_info:
Expand Down
Loading

0 comments on commit 4aba24e

Please sign in to comment.