From e54b4a6fb3f7a7782783efbaf30d433ef349f3a7 Mon Sep 17 00:00:00 2001 From: Jakub Frejlach Date: Thu, 8 Feb 2024 15:33:53 +0100 Subject: [PATCH 1/6] Add rhel-X/rhel-br-X deduplication --deduplicate/--no-deduplicate will perform a set of deduplication heuristics however for the user, only the deduplication as a whole really matters --- griffon/commands/queries.py | 12 ++++++++++++ griffon/output.py | 29 +++++++++++++++++++++++++++++ griffon/static/default_griffonrc | 5 +++-- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/griffon/commands/queries.py b/griffon/commands/queries.py index e075d7d..3ed449f 100644 --- a/griffon/commands/queries.py +++ b/griffon/commands/queries.py @@ -5,6 +5,7 @@ these operations beyond cli """ + import copy import logging import re @@ -363,6 +364,15 @@ def retrieve_component_summary(ctx, component_name, strict_name_search): default=get_config_option("default", "exclude_unreleased", False), help="Exclude unreleased components.", ) +@click.option( + "--deduplicate/--no-deduplicate", + "deduplicate", + default=get_config_option("default", "deduplicate", True), + help=( + "Deduplicate / do not deduplicate results " + "based on following rules: rhel/rhel-br redundancy" + ), +) @click.pass_context @progress_bar(is_updatable=True) def get_product_contain_component( @@ -396,6 +406,7 @@ def get_product_contain_component( regex_name_search, include_container_roots, exclude_unreleased, + deduplicate, ): # with console_status(ctx) as operation_status: """List products of a latest component.""" @@ -419,6 +430,7 @@ def get_product_contain_component( params.pop("sfm2_flaw_id") params.pop("flaw_mode") params.pop("affect_mode") + params.pop("deduplicate") if component_name: q = query_service.invoke( core_queries.products_containing_component_query, params, status=operation_status diff --git a/griffon/output.py b/griffon/output.py index 44e82f9..f389cf4 100644 --- a/griffon/output.py +++ b/griffon/output.py @@ -2,6 +2,7 @@ Gather up all of the messy 'presentation' logic into one place """ + import enum import json import logging @@ -426,6 +427,30 @@ def highlight_search_term(search_pattern, text_value): return re.sub(search_pattern, "[b]\\g<0>[/b]", text_value) +def rhel_br_deduplicate(result_tree: dict) -> dict: + """ + if component exists for both rhel-X and rhel-br-X + product version and product stream keep only the rhel-X record + + """ + filtered_result_tree = result_tree + for pv in list(result_tree.keys()): + if pv.startswith("rhel-br"): + for ps in list(result_tree[pv].keys()): + for cn in list(result_tree[pv][ps].keys()): + if cn in result_tree.get(pv.replace("rhel-br", "rhel"), {}).get( + ps.replace("rhel-br", "rhel"), {} + ): + filtered_result_tree[pv][ps].pop(cn) + + if not filtered_result_tree[pv][ps]: + filtered_result_tree[pv].pop(ps) + + if not filtered_result_tree[pv]: + filtered_result_tree.pop(pv) + return filtered_result_tree + + def text_output_products_contain_component( ctx, output, @@ -460,6 +485,10 @@ def text_output_products_contain_component( ) result_tree = generate_result_tree(normalised_results) + # perform deduplication + if ctx.params["deduplicate"]: + result_tree = rhel_br_deduplicate(result_tree) + # TODO - MAVEN component type will require special handling if ctx.params["affect_mode"]: console.no_color = True diff --git a/griffon/static/default_griffonrc b/griffon/static/default_griffonrc index 32f31a3..9203cfd 100644 --- a/griffon/static/default_griffonrc +++ b/griffon/static/default_griffonrc @@ -17,12 +17,13 @@ exclude_components = -container-source -common-debuginfo -doc -devel - -javadoc - -testlib + -javadoc + -testlib -repolib include_container_roots = False exclude_unreleased = False filter_rh_naming = True +deduplicate = True # profile sections (use with --profile {profile} flag) [cloud] From f4f2e3a951ed86893b2dd1194bcb9a22582f3efe Mon Sep 17 00:00:00 2001 From: Jakub Frejlach Date: Thu, 8 Feb 2024 16:14:14 +0100 Subject: [PATCH 2/6] Fix picking latest NVR To pick the latest NVR we need to sort the list of NVRs naturally otherwise it might produce incorrect assumption since two almost identical NVRs which only has the difference that one part of the version has two digits is considered to be lower than the one with only one digit on the same place --- griffon/helpers.py | 8 ++++++++ griffon/output.py | 22 +++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/griffon/helpers.py b/griffon/helpers.py index c006976..04ac75a 100644 --- a/griffon/helpers.py +++ b/griffon/helpers.py @@ -1,7 +1,9 @@ """ Helpers for direct usage or debbuging """ + import json +import re from enum import Enum from typing import Callable, Optional, Type, Union @@ -106,3 +108,9 @@ class Style(Enum): def __str__(self): return str(self.value) + + +def natural_sort_key(string): + """Key for builtin sorted function to perform natural sort""" + split_by_digit = re.split("([0-9]+)", string) + return [int(part) if part.isdigit() else part.lower() for part in split_by_digit] diff --git a/griffon/output.py b/griffon/output.py index f389cf4..683f8c6 100644 --- a/griffon/output.py +++ b/griffon/output.py @@ -17,6 +17,8 @@ from rich.text import Text from rich.tree import Tree +from .helpers import natural_sort_key + console = Console(color_system="auto") logger = logging.getLogger("griffon") @@ -511,7 +513,9 @@ def text_output_products_contain_component( for ps in result_tree[pv].keys(): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) - nvr = list(result_tree[pv][ps][cn].keys())[-1] + nvr = sorted( + list(result_tree[pv][ps][cn].keys()), key=natural_sort_key + )[-1] product_color = process_product_color( result_tree[pv][ps][cn][nvr]["product_stream_relations"], result_tree[pv][ps][cn][nvr]["build_type"], @@ -579,7 +583,9 @@ def text_output_products_contain_component( for ps in result_tree[pv].keys(): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) - nvr = list(result_tree[pv][ps][cn].keys())[-1] + nvr = sorted( + list(result_tree[pv][ps][cn].keys()), key=natural_sort_key + )[-1] product_color = process_product_color( result_tree[pv][ps][cn][nvr]["product_stream_relations"], result_tree[pv][ps][cn][nvr]["build_type"], @@ -703,7 +709,9 @@ def text_output_products_contain_component( for ps in result_tree[pv].keys(): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) - nvr = list(result_tree[pv][ps][cn].keys())[-1] + nvr = sorted( + list(result_tree[pv][ps][cn].keys()), key=natural_sort_key + )[-1] product_color = process_product_color( result_tree[pv][ps][cn][nvr]["product_stream_relations"], result_tree[pv][ps][cn][nvr]["build_type"], @@ -856,7 +864,9 @@ def text_output_products_contain_component( for ps in result_tree[pv].keys(): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) - nvr = list(result_tree[pv][ps][cn].keys())[-1] + nvr = sorted( + list(result_tree[pv][ps][cn].keys()), key=natural_sort_key + )[-1] product_color = process_product_color( result_tree[pv][ps][cn][nvr]["product_stream_relations"], result_tree[pv][ps][cn][nvr]["build_type"], @@ -971,7 +981,9 @@ def text_output_products_contain_component( for ps in result_tree[pv].keys(): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) - nvr = list(result_tree[pv][ps][cn].keys())[-1] + nvr = sorted( + list(result_tree[pv][ps][cn].keys()), key=natural_sort_key + )[-1] product_color = process_product_color( result_tree[pv][ps][cn][nvr]["product_stream_relations"], result_tree[pv][ps][cn][nvr]["build_type"], From 327c4d756529dc97ada92258b7e776d8bb4aea5d Mon Sep 17 00:00:00 2001 From: Jakub Frejlach Date: Mon, 12 Feb 2024 13:36:29 +0100 Subject: [PATCH 3/6] minor - fix acessing the middleware cli on ctx obj --- griffon/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/griffon/output.py b/griffon/output.py index 683f8c6..7712154 100644 --- a/griffon/output.py +++ b/griffon/output.py @@ -853,7 +853,7 @@ def text_output_products_contain_component( # delete once we stop using middleware CLI completely middleware_cli_purl_verbose_level = ( ctx.obj["VERBOSE"] > 3 - and ctx.obj["MIDDLEWARE_CLI"] + and ctx.obj.get("MIDDLEWARE_CLI") and not ctx.params["no_middleware"] ) From 07a1a93cbd69642ca55336c39abb7f8cebc050bd Mon Sep 17 00:00:00 2001 From: Jakub Frejlach Date: Mon, 12 Feb 2024 14:38:23 +0100 Subject: [PATCH 4/6] Deduplicate component names on verbosity 0 since on verbosity 0 only product version and component name shows, griffon showed a lot of duplicities, eg. one product version had 3 product streams, and each stream had the component and thus component showed 3 times with the same product version as product streams are not shown on verbosity level 0 --- griffon/output.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/griffon/output.py b/griffon/output.py index 7712154..68aa14b 100644 --- a/griffon/output.py +++ b/griffon/output.py @@ -433,7 +433,6 @@ def rhel_br_deduplicate(result_tree: dict) -> dict: """ if component exists for both rhel-X and rhel-br-X product version and product stream keep only the rhel-X record - """ filtered_result_tree = result_tree for pv in list(result_tree.keys()): @@ -510,6 +509,7 @@ def text_output_products_contain_component( else: if ctx.obj["VERBOSE"] == 0: # product_version X root component nvr for pv in result_tree.keys(): + used_component_names = set() # store used component names for deduplication for ps in result_tree[pv].keys(): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) @@ -533,7 +533,7 @@ def text_output_products_contain_component( dep = f"[{root_component_color}]{dep_name}[/{root_component_color}]" # noqa if result_tree[pv][ps][cn][nvr]["upstreams"]: upstream_component_names = sorted( - list( + set( [ f"{upstream['name']}" for upstream in result_tree[pv][ps][cn][nvr][ @@ -542,6 +542,15 @@ def text_output_products_contain_component( ] ) ) + + # deduplicate upstream component names + upstream_component_names = sorted( + set(upstream_component_names) - used_component_names + ) + used_component_names = used_component_names.union( + set(upstream_component_names) + ) + for upstream_component_name in upstream_component_names: console.print( Text(pv, style=f"{product_color} b"), @@ -561,15 +570,28 @@ def text_output_products_contain_component( ) ) ) + + # deduplicate source component names + source_component_names = sorted( + set(source_component_names) - used_component_names + ) + used_component_names = used_component_names.union( + set(source_component_names) + ) + for source_component_name in source_component_names: console.print( Text(pv, style=f"{product_color} b"), f"[pale_turquoise1]{source_component_name}[/pale_turquoise1]", # noqa no_wrap=no_wrap, ) - if not (result_tree[pv][ps][cn][nvr]["upstreams"]) and not ( - result_tree[pv][ps][cn][nvr]["sources"] + if ( + not (result_tree[pv][ps][cn][nvr]["upstreams"]) + and not (result_tree[pv][ps][cn][nvr]["sources"]) + and dep_name + not in used_component_names # deduplicate single component name ): + used_component_names.add(dep_name) console.print( Text(pv, style=f"{product_color} b"), dep, From c785ccd33c253d9801ea5577af95daa1e2bb6f15 Mon Sep 17 00:00:00 2001 From: Jakub Frejlach Date: Mon, 12 Feb 2024 14:40:58 +0100 Subject: [PATCH 5/6] minor - sort product streams naturally --- griffon/output.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/griffon/output.py b/griffon/output.py index 68aa14b..d6ea34b 100644 --- a/griffon/output.py +++ b/griffon/output.py @@ -510,7 +510,7 @@ def text_output_products_contain_component( if ctx.obj["VERBOSE"] == 0: # product_version X root component nvr for pv in result_tree.keys(): used_component_names = set() # store used component names for deduplication - for ps in result_tree[pv].keys(): + for ps in sorted(result_tree[pv].keys(), key=natural_sort_key): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) nvr = sorted( @@ -602,7 +602,7 @@ def text_output_products_contain_component( ctx.obj["VERBOSE"] == 1 ): # product_stream X root component nvr (type) x child components [nvr (type)] for pv in result_tree.keys(): - for ps in result_tree[pv].keys(): + for ps in sorted(result_tree[pv].keys(), key=natural_sort_key): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) nvr = sorted( @@ -728,7 +728,7 @@ def text_output_products_contain_component( ctx.obj["VERBOSE"] == 2 ): # product_stream X root component nvr (type:arch) x child components [name {versions} (type:{arches})] x related_url x build_source_url # noqa for pv in result_tree.keys(): - for ps in result_tree[pv].keys(): + for ps in sorted(result_tree[pv].keys(), key=natural_sort_key): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) nvr = sorted( @@ -883,7 +883,7 @@ def text_output_products_contain_component( ctx.obj["VERBOSE"] == 3 or middleware_cli_purl_verbose_level ): # product_stream X root component nvr (type:arch) x child components [ nvr (type:arch)] x related_url x build_source_url # noqa for pv in result_tree.keys(): - for ps in result_tree[pv].keys(): + for ps in sorted(result_tree[pv].keys(), key=natural_sort_key): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) nvr = sorted( @@ -1000,7 +1000,7 @@ def text_output_products_contain_component( ctx.obj["VERBOSE"] > 3 and not middleware_cli_purl_verbose_level ): # product_stream X root component purl x child components [ purl ] x related_url x build_source_url # noqa for pv in result_tree.keys(): - for ps in result_tree[pv].keys(): + for ps in sorted(result_tree[pv].keys(), key=natural_sort_key): for cn in sorted(result_tree[pv][ps].keys()): # select the latest nvr (from sorted list) nvr = sorted( From 4aaf3be9747025de60149afb142efda2d998ac40 Mon Sep 17 00:00:00 2001 From: Jakub Frejlach Date: Mon, 12 Feb 2024 14:50:45 +0100 Subject: [PATCH 6/6] Update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e30cb36..fd15d26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +### Added +* added --deduplicate/--no-deduplicate (enabled by default) flag on products-contain-component + which performs additional deduplications (all deduplication steps are listed in the --help) +* added rhel-br/rhel deduplication (GRIF-150) + ### Changed * change verbosity level 0 to return component name on service products-contain-component +* verbosity level 0 (without -v option) now deduplicates multiple same components + per product version + +### Fixed +* fixed picking the latest NVR with natural sort instead of normal sort +* fixed listing product streams sorted naturally +* fixed error when using -vvvv (verbosity level 4) without + GRIFFON_MIDDLEWARE_CLI set ## [0.5.5] - 2024-02-02 ### Changed