Skip to content

Commit

Permalink
Only use sprints from compliance snapshots for issue replication
Browse files Browse the repository at this point in the history
Compliance snapshots are only being created for sprints of a configured
date interval. However, as of before this change, the issue replicator
considered all sprints returned by the delivery-service api and grouped
the open findings among these sprints. As not all of these sprints must
have a respective compliance snapshots (i.e. if the sprint is not part
of the configured date interval), the finding is not being reported in
the GitHub issues.
To fix this, this change only considers the sprints determined from the
compliance snapshots and groups the findings among these sprints. That
way, the configured interval for the artefact enumerator is the single
sourth of truth and all findings are being reported only for these
sprints.
  • Loading branch information
8R0WNI3 committed Jan 7, 2025
1 parent b24bae8 commit ff38540
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 34 deletions.
40 changes: 7 additions & 33 deletions issue_replicator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@
import atexit
import collections.abc
import datetime
import functools
import logging
import os
import signal
import sys
import time

import dateutil.parser

import ci.log
import cnudie.iter
import cnudie.retrieve
Expand Down Expand Up @@ -60,23 +57,6 @@ def handle_sigterm_and_sigint(signum, frame):
wants_to_terminate = True


@functools.cache
def sprint_dates(
delivery_client: delivery.client.DeliveryServiceClient,
date_name: str='release_decision',
) -> tuple[datetime.date]:
sprints = delivery_client.sprints()
sprint_dates = tuple(
sprint.find_sprint_date(name=date_name).value.date()
for sprint in sprints
)

if not sprint_dates:
raise ValueError('no sprints found')

return sprint_dates


def deserialise_issue_replicator_configuration(
cfg_name: str,
namespace: str,
Expand Down Expand Up @@ -164,9 +144,8 @@ def _iter_findings_for_artefact(

def _group_findings_by_type_and_date(
issue_replicator_config: config.IssueReplicatorConfig,
delivery_client: delivery.client.DeliveryServiceClient,
findings: collections.abc.Iterable[issue_replicator.github.AggregatedFinding],
latest_processing_dates: set[str],
dates: collections.abc.Sequence[datetime.date],
) -> collections.abc.Generator[
tuple[
dso.model.Datatype, # finding type (e.g. vulnerability, license, malware...)
Expand All @@ -181,18 +160,14 @@ def _group_findings_by_type_and_date(
Groups all findings by finding type and latest processing date. Also, thresholds provided by
configuration are applied on the findings before yielding.
'''
sprints = sprint_dates(delivery_client=delivery_client)

datasource_for_datatype = {
dso.model.Datatype.VULNERABILITY: dso.model.Datasource.BDBA,
dso.model.Datatype.LICENSE: dso.model.Datasource.BDBA,
dso.model.Datatype.MALWARE_FINDING: dso.model.Datasource.CLAMAV,
dso.model.Datatype.DIKI_FINDING: dso.model.Datasource.DIKI,
}

for latest_processing_date in latest_processing_dates:
date = dateutil.parser.isoparse(latest_processing_date).date()

for date in dates:
for finding_type_cfg in issue_replicator_config.finding_type_issue_replication_cfgs:
finding_type = finding_type_cfg.finding_type
finding_source = datasource_for_datatype.get(finding_type)
Expand All @@ -203,7 +178,7 @@ def _group_findings_by_type_and_date(
finding.finding.meta.type == finding_type and
finding.finding.meta.datasource == finding_source and
finding.calculate_latest_processing_date(
sprints=sprints,
sprints=dates,
max_processing_days=issue_replicator_config.max_processing_days,
) == date
)
Expand Down Expand Up @@ -256,9 +231,9 @@ def replicate_issue(
)
logger.info(f'{len(active_compliance_snapshots)=}')

correlation_ids_by_latest_processing_date: dict[str, str] = dict()
correlation_ids_by_latest_processing_date: dict[datetime.date, str] = dict()
for compliance_snapshot in compliance_snapshots:
date = compliance_snapshot.data.latest_processing_date.isoformat()
date = compliance_snapshot.data.latest_processing_date

if date in correlation_ids_by_latest_processing_date:
continue
Expand Down Expand Up @@ -287,9 +262,8 @@ def replicate_issue(

findings_by_type_and_date = _group_findings_by_type_and_date(
issue_replicator_config=issue_replicator_config,
delivery_client=delivery_client,
findings=findings,
latest_processing_dates=correlation_ids_by_latest_processing_date.keys(),
dates=correlation_ids_by_latest_processing_date.keys(),
)

def _issue_type(
Expand Down Expand Up @@ -350,7 +324,7 @@ def _find_finding_type_issue_replication_cfg(
}

for finding_type, finding_source, date, findings in findings_by_type_and_date:
correlation_id = correlation_ids_by_latest_processing_date.get(date.isoformat())
correlation_id = correlation_ids_by_latest_processing_date.get(date)

finding_type_issue_replication_cfg = _find_finding_type_issue_replication_cfg(
finding_cfgs=issue_replicator_config.finding_type_issue_replication_cfgs,
Expand Down
2 changes: 1 addition & 1 deletion issue_replicator/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class AggregatedFinding:

def calculate_latest_processing_date(
self,
sprints: tuple[datetime.date],
sprints: collections.abc.Iterable[datetime.date],
max_processing_days: gcm.MaxProcessingTimesDays=None,
) -> datetime.date | None:
if not self.severity:
Expand Down

0 comments on commit ff38540

Please sign in to comment.