Only use sprints from compliance snapshots for issue replication

Compliance snapshots are only being created for sprints of a configured date interval. However, as of before this change, the issue replicator considered all sprints returned by the delivery-service api and grouped the open findings among these sprints. As not all of these sprints must have a respective compliance snapshots (i.e. if the sprint is not part of the configured date interval), the finding is not being reported in the GitHub issues. To fix this, this change only considers the sprints determined from the compliance snapshots and groups the findings among these sprints. That way, the configured interval for the artefact enumerator is the single sourth of truth and all findings are being reported only for these sprints.
open-component-model · Jan 7, 2025 · ff38540 · ff38540
1 parent b24bae8
commit ff38540
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 34 deletions.
diff --git a/issue_replicator/__main__.py b/issue_replicator/__main__.py
@@ -2,15 +2,12 @@
 import atexit
 import collections.abc
 import datetime
-import functools
 import logging
 import os
 import signal
 import sys
 import time
 
-import dateutil.parser
-
 import ci.log
 import cnudie.iter
 import cnudie.retrieve
@@ -60,23 +57,6 @@ def handle_sigterm_and_sigint(signum, frame):
     wants_to_terminate = True
 
 
-@functools.cache
-def sprint_dates(
-    delivery_client: delivery.client.DeliveryServiceClient,
-    date_name: str='release_decision',
-) -> tuple[datetime.date]:
-    sprints = delivery_client.sprints()
-    sprint_dates = tuple(
-        sprint.find_sprint_date(name=date_name).value.date()
-        for sprint in sprints
-    )
-
-    if not sprint_dates:
-        raise ValueError('no sprints found')
-
-    return sprint_dates
-
-
 def deserialise_issue_replicator_configuration(
     cfg_name: str,
     namespace: str,
@@ -164,9 +144,8 @@ def _iter_findings_for_artefact(
 
 def _group_findings_by_type_and_date(
     issue_replicator_config: config.IssueReplicatorConfig,
-    delivery_client: delivery.client.DeliveryServiceClient,
     findings: collections.abc.Iterable[issue_replicator.github.AggregatedFinding],
-    latest_processing_dates: set[str],
+    dates: collections.abc.Sequence[datetime.date],
 ) -> collections.abc.Generator[
     tuple[
         dso.model.Datatype, # finding type (e.g. vulnerability, license, malware...)
@@ -181,18 +160,14 @@ def _group_findings_by_type_and_date(
     Groups all findings by finding type and latest processing date. Also, thresholds provided by
     configuration are applied on the findings before yielding.
     '''
-    sprints = sprint_dates(delivery_client=delivery_client)
-
     datasource_for_datatype = {
         dso.model.Datatype.VULNERABILITY: dso.model.Datasource.BDBA,
         dso.model.Datatype.LICENSE: dso.model.Datasource.BDBA,
         dso.model.Datatype.MALWARE_FINDING: dso.model.Datasource.CLAMAV,
         dso.model.Datatype.DIKI_FINDING: dso.model.Datasource.DIKI,
     }
 
-    for latest_processing_date in latest_processing_dates:
-        date = dateutil.parser.isoparse(latest_processing_date).date()
-
+    for date in dates:
         for finding_type_cfg in issue_replicator_config.finding_type_issue_replication_cfgs:
             finding_type = finding_type_cfg.finding_type
             finding_source = datasource_for_datatype.get(finding_type)
@@ -203,7 +178,7 @@ def _group_findings_by_type_and_date(
                     finding.finding.meta.type == finding_type and
                     finding.finding.meta.datasource == finding_source and
                     finding.calculate_latest_processing_date(
-                        sprints=sprints,
+                        sprints=dates,
                         max_processing_days=issue_replicator_config.max_processing_days,
                     ) == date
                 )
@@ -256,9 +231,9 @@ def replicate_issue(
     )
     logger.info(f'{len(active_compliance_snapshots)=}')
 
-    correlation_ids_by_latest_processing_date: dict[str, str] = dict()
+    correlation_ids_by_latest_processing_date: dict[datetime.date, str] = dict()
     for compliance_snapshot in compliance_snapshots:
-        date = compliance_snapshot.data.latest_processing_date.isoformat()
+        date = compliance_snapshot.data.latest_processing_date
 
         if date in correlation_ids_by_latest_processing_date:
             continue
@@ -287,9 +262,8 @@ def replicate_issue(
 
     findings_by_type_and_date = _group_findings_by_type_and_date(
         issue_replicator_config=issue_replicator_config,
-        delivery_client=delivery_client,
         findings=findings,
-        latest_processing_dates=correlation_ids_by_latest_processing_date.keys(),
+        dates=correlation_ids_by_latest_processing_date.keys(),
     )
 
     def _issue_type(
@@ -350,7 +324,7 @@ def _find_finding_type_issue_replication_cfg(
     }
 
     for finding_type, finding_source, date, findings in findings_by_type_and_date:
-        correlation_id = correlation_ids_by_latest_processing_date.get(date.isoformat())
+        correlation_id = correlation_ids_by_latest_processing_date.get(date)
 
         finding_type_issue_replication_cfg = _find_finding_type_issue_replication_cfg(
             finding_cfgs=issue_replicator_config.finding_type_issue_replication_cfgs,

diff --git a/issue_replicator/github.py b/issue_replicator/github.py
@@ -55,7 +55,7 @@ class AggregatedFinding:
 
     def calculate_latest_processing_date(
         self,
-        sprints: tuple[datetime.date],
+        sprints: collections.abc.Iterable[datetime.date],
         max_processing_days: gcm.MaxProcessingTimesDays=None,
     ) -> datetime.date | None:
         if not self.severity: