Skip to content
This repository has been archived by the owner on Feb 15, 2024. It is now read-only.

Commit

Permalink
Merge pull request #295 from RedHatProductSecurity/rhel-deduplication
Browse files Browse the repository at this point in the history
Result deduplication
  • Loading branch information
JakubFrejlach authored Feb 13, 2024
2 parents 256633d + 4aaf3be commit 0e4a6cc
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 16 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased
### Added
* added --deduplicate/--no-deduplicate (enabled by default) flag on products-contain-component
which performs additional deduplications (all deduplication steps are listed in the --help)
* added rhel-br/rhel deduplication (GRIF-150)

### Changed
* change verbosity level 0 to return component name on service products-contain-component
* verbosity level 0 (without -v option) now deduplicates multiple same components
per product version

### Fixed
* fixed picking the latest NVR with natural sort instead of normal sort
* fixed listing product streams sorted naturally
* fixed error when using -vvvv (verbosity level 4) without
GRIFFON_MIDDLEWARE_CLI set

## [0.5.5] - 2024-02-02
### Changed
Expand Down
12 changes: 12 additions & 0 deletions griffon/commands/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
these operations beyond cli
"""

import copy
import logging
import re
Expand Down Expand Up @@ -363,6 +364,15 @@ def retrieve_component_summary(ctx, component_name, strict_name_search):
default=get_config_option("default", "exclude_unreleased", False),
help="Exclude unreleased components.",
)
@click.option(
"--deduplicate/--no-deduplicate",
"deduplicate",
default=get_config_option("default", "deduplicate", True),
help=(
"Deduplicate / do not deduplicate results "
"based on following rules: rhel/rhel-br redundancy"
),
)
@click.pass_context
@progress_bar(is_updatable=True)
def get_product_contain_component(
Expand Down Expand Up @@ -396,6 +406,7 @@ def get_product_contain_component(
regex_name_search,
include_container_roots,
exclude_unreleased,
deduplicate,
):
# with console_status(ctx) as operation_status:
"""List products of a latest component."""
Expand All @@ -419,6 +430,7 @@ def get_product_contain_component(
params.pop("sfm2_flaw_id")
params.pop("flaw_mode")
params.pop("affect_mode")
params.pop("deduplicate")
if component_name:
q = query_service.invoke(
core_queries.products_containing_component_query, params, status=operation_status
Expand Down
8 changes: 8 additions & 0 deletions griffon/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Helpers for direct usage or debbuging
"""

import json
import re
from enum import Enum
from typing import Callable, Optional, Type, Union

Expand Down Expand Up @@ -106,3 +108,9 @@ class Style(Enum):

def __str__(self):
return str(self.value)


def natural_sort_key(string):
"""Key for builtin sorted function to perform natural sort"""
split_by_digit = re.split("([0-9]+)", string)
return [int(part) if part.isdigit() else part.lower() for part in split_by_digit]
91 changes: 77 additions & 14 deletions griffon/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Gather up all of the messy 'presentation' logic into one place
"""

import enum
import json
import logging
Expand All @@ -16,6 +17,8 @@
from rich.text import Text
from rich.tree import Tree

from .helpers import natural_sort_key

console = Console(color_system="auto")

logger = logging.getLogger("griffon")
Expand Down Expand Up @@ -426,6 +429,29 @@ def highlight_search_term(search_pattern, text_value):
return re.sub(search_pattern, "[b]\\g<0>[/b]", text_value)


def rhel_br_deduplicate(result_tree: dict) -> dict:
"""
if component exists for both rhel-X and rhel-br-X
product version and product stream keep only the rhel-X record
"""
filtered_result_tree = result_tree
for pv in list(result_tree.keys()):
if pv.startswith("rhel-br"):
for ps in list(result_tree[pv].keys()):
for cn in list(result_tree[pv][ps].keys()):
if cn in result_tree.get(pv.replace("rhel-br", "rhel"), {}).get(
ps.replace("rhel-br", "rhel"), {}
):
filtered_result_tree[pv][ps].pop(cn)

if not filtered_result_tree[pv][ps]:
filtered_result_tree[pv].pop(ps)

if not filtered_result_tree[pv]:
filtered_result_tree.pop(pv)
return filtered_result_tree


def text_output_products_contain_component(
ctx,
output,
Expand Down Expand Up @@ -460,6 +486,10 @@ def text_output_products_contain_component(
)
result_tree = generate_result_tree(normalised_results)

# perform deduplication
if ctx.params["deduplicate"]:
result_tree = rhel_br_deduplicate(result_tree)

# TODO - MAVEN component type will require special handling
if ctx.params["affect_mode"]:
console.no_color = True
Expand All @@ -479,10 +509,13 @@ def text_output_products_contain_component(
else:
if ctx.obj["VERBOSE"] == 0: # product_version X root component nvr
for pv in result_tree.keys():
for ps in result_tree[pv].keys():
used_component_names = set() # store used component names for deduplication
for ps in sorted(result_tree[pv].keys(), key=natural_sort_key):
for cn in sorted(result_tree[pv][ps].keys()):
# select the latest nvr (from sorted list)
nvr = list(result_tree[pv][ps][cn].keys())[-1]
nvr = sorted(
list(result_tree[pv][ps][cn].keys()), key=natural_sort_key
)[-1]
product_color = process_product_color(
result_tree[pv][ps][cn][nvr]["product_stream_relations"],
result_tree[pv][ps][cn][nvr]["build_type"],
Expand All @@ -500,7 +533,7 @@ def text_output_products_contain_component(
dep = f"[{root_component_color}]{dep_name}[/{root_component_color}]" # noqa
if result_tree[pv][ps][cn][nvr]["upstreams"]:
upstream_component_names = sorted(
list(
set(
[
f"{upstream['name']}"
for upstream in result_tree[pv][ps][cn][nvr][
Expand All @@ -509,6 +542,15 @@ def text_output_products_contain_component(
]
)
)

# deduplicate upstream component names
upstream_component_names = sorted(
set(upstream_component_names) - used_component_names
)
used_component_names = used_component_names.union(
set(upstream_component_names)
)

for upstream_component_name in upstream_component_names:
console.print(
Text(pv, style=f"{product_color} b"),
Expand All @@ -528,15 +570,28 @@ def text_output_products_contain_component(
)
)
)

# deduplicate source component names
source_component_names = sorted(
set(source_component_names) - used_component_names
)
used_component_names = used_component_names.union(
set(source_component_names)
)

for source_component_name in source_component_names:
console.print(
Text(pv, style=f"{product_color} b"),
f"[pale_turquoise1]{source_component_name}[/pale_turquoise1]", # noqa
no_wrap=no_wrap,
)
if not (result_tree[pv][ps][cn][nvr]["upstreams"]) and not (
result_tree[pv][ps][cn][nvr]["sources"]
if (
not (result_tree[pv][ps][cn][nvr]["upstreams"])
and not (result_tree[pv][ps][cn][nvr]["sources"])
and dep_name
not in used_component_names # deduplicate single component name
):
used_component_names.add(dep_name)
console.print(
Text(pv, style=f"{product_color} b"),
dep,
Expand All @@ -547,10 +602,12 @@ def text_output_products_contain_component(
ctx.obj["VERBOSE"] == 1
): # product_stream X root component nvr (type) x child components [nvr (type)]
for pv in result_tree.keys():
for ps in result_tree[pv].keys():
for ps in sorted(result_tree[pv].keys(), key=natural_sort_key):
for cn in sorted(result_tree[pv][ps].keys()):
# select the latest nvr (from sorted list)
nvr = list(result_tree[pv][ps][cn].keys())[-1]
nvr = sorted(
list(result_tree[pv][ps][cn].keys()), key=natural_sort_key
)[-1]
product_color = process_product_color(
result_tree[pv][ps][cn][nvr]["product_stream_relations"],
result_tree[pv][ps][cn][nvr]["build_type"],
Expand Down Expand Up @@ -671,10 +728,12 @@ def text_output_products_contain_component(
ctx.obj["VERBOSE"] == 2
): # product_stream X root component nvr (type:arch) x child components [name {versions} (type:{arches})] x related_url x build_source_url # noqa
for pv in result_tree.keys():
for ps in result_tree[pv].keys():
for ps in sorted(result_tree[pv].keys(), key=natural_sort_key):
for cn in sorted(result_tree[pv][ps].keys()):
# select the latest nvr (from sorted list)
nvr = list(result_tree[pv][ps][cn].keys())[-1]
nvr = sorted(
list(result_tree[pv][ps][cn].keys()), key=natural_sort_key
)[-1]
product_color = process_product_color(
result_tree[pv][ps][cn][nvr]["product_stream_relations"],
result_tree[pv][ps][cn][nvr]["build_type"],
Expand Down Expand Up @@ -816,18 +875,20 @@ def text_output_products_contain_component(
# delete once we stop using middleware CLI completely
middleware_cli_purl_verbose_level = (
ctx.obj["VERBOSE"] > 3
and ctx.obj["MIDDLEWARE_CLI"]
and ctx.obj.get("MIDDLEWARE_CLI")
and not ctx.params["no_middleware"]
)

if (
ctx.obj["VERBOSE"] == 3 or middleware_cli_purl_verbose_level
): # product_stream X root component nvr (type:arch) x child components [ nvr (type:arch)] x related_url x build_source_url # noqa
for pv in result_tree.keys():
for ps in result_tree[pv].keys():
for ps in sorted(result_tree[pv].keys(), key=natural_sort_key):
for cn in sorted(result_tree[pv][ps].keys()):
# select the latest nvr (from sorted list)
nvr = list(result_tree[pv][ps][cn].keys())[-1]
nvr = sorted(
list(result_tree[pv][ps][cn].keys()), key=natural_sort_key
)[-1]
product_color = process_product_color(
result_tree[pv][ps][cn][nvr]["product_stream_relations"],
result_tree[pv][ps][cn][nvr]["build_type"],
Expand Down Expand Up @@ -939,10 +1000,12 @@ def text_output_products_contain_component(
ctx.obj["VERBOSE"] > 3 and not middleware_cli_purl_verbose_level
): # product_stream X root component purl x child components [ purl ] x related_url x build_source_url # noqa
for pv in result_tree.keys():
for ps in result_tree[pv].keys():
for ps in sorted(result_tree[pv].keys(), key=natural_sort_key):
for cn in sorted(result_tree[pv][ps].keys()):
# select the latest nvr (from sorted list)
nvr = list(result_tree[pv][ps][cn].keys())[-1]
nvr = sorted(
list(result_tree[pv][ps][cn].keys()), key=natural_sort_key
)[-1]
product_color = process_product_color(
result_tree[pv][ps][cn][nvr]["product_stream_relations"],
result_tree[pv][ps][cn][nvr]["build_type"],
Expand Down
5 changes: 3 additions & 2 deletions griffon/static/default_griffonrc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ exclude_components = -container-source
-common-debuginfo
-doc
-devel
-javadoc
-testlib
-javadoc
-testlib
-repolib
include_container_roots = False
exclude_unreleased = False
filter_rh_naming = True
deduplicate = True

# profile sections (use with --profile {profile} flag)
[cloud]
Expand Down

0 comments on commit 0e4a6cc

Please sign in to comment.