Skip to content

Commit

Permalink
Check all rpms in sboms have a known repo id
Browse files Browse the repository at this point in the history
- Find all components in all sboms that are rpms
- Require they have a repository_id value in their purl
- Require that the repository_id value is in in the big
  list of known repository_ids
- Try to provide useful messages if the repo id is missing,
  or if the list can't be found in the data.

Ref: https://issues.redhat.com/browse/EC-848
  • Loading branch information
simonbaird committed Sep 11, 2024
1 parent 4b82cdc commit 35a11a7
Show file tree
Hide file tree
Showing 5 changed files with 369 additions and 0 deletions.
33 changes: 33 additions & 0 deletions antora/docs/modules/ROOT/pages/release_policy.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Rules included:
* xref:release_policy.adoc#rpm_signature__allowed[RPM Signature: Allowed RPM signature key]
* xref:release_policy.adoc#rpm_signature__result_format[RPM Signature: Result format]
* xref:release_policy.adoc#rpm_signature__rule_data_provided[RPM Signature: Rule data provided]
* xref:release_policy.adoc#repo_ids__repo_ids_list_provided[Repo IDs: Valid repo id list provided]
* xref:release_policy.adoc#sbom_cyclonedx__allowed[SBOM CycloneDX: Allowed]
* xref:release_policy.adoc#sbom_cyclonedx__allowed_package_external_references[SBOM CycloneDX: Allowed package external references]
* xref:release_policy.adoc#sbom_cyclonedx__disallowed_package_attributes[SBOM CycloneDX: Disallowed package attributes]
Expand Down Expand Up @@ -1052,6 +1053,38 @@ Confirm the expected `allowed_rpm_signature_keys` rule data key has been provide
* Effective from: `2024-10-05T00:00:00Z`
* https://github.com/enterprise-contract/ec-policies/blob/{page-origin-refhash}/policy/release/rpm_signature.rego#L52[Source, window="_blank"]

[#repo_ids_package]
== link:#repo_ids_package[Repo IDs]

Checks that all RPMs listed in the SBOM have a valid and known repository id.

* Package name: `repo_ids`
* Package full path: `policy.release.repo_ids`

[#repo_ids__repo_ids_valid]
=== link:#repo_ids__repo_ids_valid[Valid Repo ID for RPMs]

The list of rpm packages in the SBOM will be inspected. A violation will be produced if any of them do not specify a "repository_id" from the list of known accepted repository ids.

*Solution*: Ensure every rpm is from a known and accepted repository and that the data in the SBOM correctly records that.

* Rule type: [rule-type-indicator failure]#FAILURE#
* FAILURE message: `RPM repo id check failed: %s`
* Code: `repo_ids.repo_ids_valid`
* https://github.com/enterprise-contract/ec-policies/blob/{page-origin-refhash}/policy/release/repo_ids.rego#L32[Source, window="_blank"]

[#repo_ids__repo_ids_list_provided]
=== link:#repo_ids__repo_ids_list_provided[Valid repo id list provided]

A list of valid and known repository ids should be available in the data.

*Solution*: Include a data source that provides a list of known repository ids, for example https://github.com/release-engineering/rhtap-ec-policy/blob/main/data/known_rpm_repositories.yml

* Rule type: [rule-type-indicator failure]#FAILURE#
* FAILURE message: `Valid repo id list not provided: %s`
* Code: `repo_ids.repo_ids_list_provided`
* https://github.com/enterprise-contract/ec-policies/blob/{page-origin-refhash}/policy/release/repo_ids.rego#L14[Source, window="_blank"]

[#sbom_cyclonedx_package]
== link:#sbom_cyclonedx_package[SBOM CycloneDX]

Expand Down
3 changes: 3 additions & 0 deletions antora/docs/modules/ROOT/partials/release_policy_nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@
**** xref:release_policy.adoc#rpm_signature__allowed[Allowed RPM signature key]
**** xref:release_policy.adoc#rpm_signature__result_format[Result format]
**** xref:release_policy.adoc#rpm_signature__rule_data_provided[Rule data provided]
*** xref:release_policy.adoc#repo_ids_package[Repo IDs]
**** xref:release_policy.adoc#repo_ids__repo_ids_valid[Valid Repo ID for RPMs]
**** xref:release_policy.adoc#repo_ids__repo_ids_list_provided[Valid repo id list provided]
*** xref:release_policy.adoc#sbom_cyclonedx_package[SBOM CycloneDX]
**** xref:release_policy.adoc#sbom_cyclonedx__allowed[Allowed]
**** xref:release_policy.adoc#sbom_cyclonedx__allowed_package_external_references[Allowed package external references]
Expand Down
23 changes: 23 additions & 0 deletions example/data/known_rpm_repositories.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
# Copyright The Enterprise Contract Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0

# See also https://github.com/release-engineering/rhtap-ec-policy/blob/main/data/known_rpm_repositories.yml
known_rpm_repositories:
- "rhel-9-for-x86_64-appstream-rpms"
- "rhel-9-for-x86_64-appstream-source-rpms"
- "rhel-9-for-x86_64-baseos-rpms"
- "rhel-9-for-x86_64-baseos-source-rpms"
170 changes: 170 additions & 0 deletions policy/release/repo_ids.rego
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#
# METADATA
# title: Repo IDs
# description: >-
# Checks that all RPMs listed in the SBOM have a valid and known
# repository id.
#
package policy.release.repo_ids

import rego.v1

import data.lib

# METADATA
# title: Known repo id list provided
# description: >-
# A list of valid and known repository ids should be available in the data.
# custom:
# short_name: repo_ids_list_provided
# failure_msg: 'Valid repo id list not provided: %s'
# solution: >-
# Include a data source that provides a list of known repository ids, for example
# https://github.com/release-engineering/rhtap-ec-policy/blob/main/data/known_rpm_repositories.yml
# collections:
# - redhat
#
deny contains result if {
some problem in data_problems
result := lib.result_helper(rego.metadata.chain(), [problem])
}

# METADATA
# title: All rpms have known repo ids
# description: >-
# The list of rpm packages in the SBOM will be inspected. A violation will be produced if any
# of them do not specify a "repository_id" from the list of known accepted repository ids.
# custom:
# short_name: repo_ids_valid
# failure_msg: 'RPM repo id check failed: %s'
# solution: >-
# Ensure every rpm is from a known and accepted repository and that the data in the SBOM
# correctly records that.
# # Todo: Until the sbom generation is upated, this will always fail,
# # so for now we don't include it in the redhat collection.
# # See https://issues.redhat.com/browse/STONEBLD-2638
# #collections:
# #- redhat
#
deny contains result if {
# Don't bother with this unless there's some data available
count(data_problems) == 0

some problem in repo_id_problems
result := lib.result_helper(rego.metadata.chain(), [problem])
}

data_problems contains reason if {
not known_repo_ids
reason := "The 'known_rpm_repositories' key was not found in the data"
}

data_problems contains reason if {
not is_array(known_repo_ids)
reason := "The 'known_rpm_repositories' data is not an array"
}

data_problems contains reason if {
count(known_repo_ids) == 0
reason := "The 'known_rpm_repositories' data is empty"
}

data_problems contains reason if {
is_array(known_repo_ids)
non_strings := [r |
some r in known_repo_ids
not is_string(r)
]
count(non_strings) > 0
reason := "The 'known_rpm_repositories' data is not a list of strings"
}

# See https://github.com/release-engineering/rhtap-ec-policy/tree/main/data
# This list of repo_ids is not under the 'rule_data' key, so we don't use lib.rule_data.
known_repo_ids := data.known_rpm_repositories

repo_id_problems contains reason if {
bad_items := all_rpm_purls - raw_purls(all_parsed_rpm_purls)
count(bad_items) > 0

reason := sprintf(
"Some rpm components in the SBOM have purls that could not be parsed: %s",
[truncated_list_to_string(bad_items)],
)
}

repo_id_problems contains reason if {
bad_items := all_parsed_rpm_purls - all_parsed_purls_with_repo_ids
count(bad_items) > 0

reason := sprintf(
"Some rpm components in the SBOM did not specify a repository_id value in their purl: %s",
[truncated_list_to_string(raw_purls(bad_items))],
)
}

repo_id_problems contains reason if {
bad_items := all_parsed_purls_with_repo_ids - all_parsed_purls_with_known_repo_ids
count(bad_items) > 0

reason := sprintf(
"Some rpm components in the SBOM specify an unknown or disallowed repository_id: %s",
[truncated_list_to_string(raw_purls(bad_items))],
)
}

all_parsed_purls_with_known_repo_ids contains p if {
some p in all_parsed_purls_with_repo_ids
purl_repo_id(p.parsed) in known_repo_ids
}

all_parsed_purls_with_repo_ids contains p if {
some p in all_parsed_rpm_purls
purl_repo_id(p.parsed) # any repo id
}

# Keep the raw and parsed purl in a little struct-like
all_parsed_rpm_purls contains p if {
some purl in all_rpm_purls
ec.purl.is_valid(purl)
p := {
"raw": purl,
"parsed": ec.purl.parse(purl),
}
}

all_rpm_purls contains p if {
some sbom in all_sboms
some component in sbom.components
p := component.purl

# I'm assuming this is faster than parsing it and checking the type
startswith(p, "pkg:rpm")
}

# In future there will be SPDX sboms also
all_sboms := lib.sbom.cyclonedx_sboms

# Extract a repo id from a parsed purl
purl_repo_id(parsed_purl) := purl_qualifier("repository_id", parsed_purl)

# Extract a named qualifier
purl_qualifier(key, parsed_purl) := result if {
some qualifier in parsed_purl.qualifiers
qualifier.key == key
result := qualifier.value
}

# Avoid including thousands of bad purls in the violation reason
max_bad_purls := 10

truncated_list_to_string(items) := output if {
extras_count := count(items) - max_bad_purls
extras_count > 0
output := sprintf(
"%s and %d more",
[lib.quoted_values_string(array.slice(lib.to_array(items), 0, max_bad_purls)), extras_count],
)
} else := lib.quoted_values_string(items)

raw_purls(purl_structs) := {p.raw | some p in purl_structs}
140 changes: 140 additions & 0 deletions policy/release/repo_ids_test.rego
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package policy.release.repo_ids_test

import rego.v1

import data.lib
import data.policy.release.repo_ids

test_repo_id_data_missing if {
expected := {
"code": "repo_ids.repo_ids_list_provided",
"msg": "Valid repo id list not provided: The 'known_rpm_repositories' key was not found in the data",
}

lib.assert_equal_results({expected}, repo_ids.deny)
}

test_repo_id_data_empty if {
expected := {
"code": "repo_ids.repo_ids_list_provided",
"msg": "Valid repo id list not provided: The 'known_rpm_repositories' data is empty",
}

lib.assert_equal_results({expected}, repo_ids.deny) with data.known_rpm_repositories as []
}

test_repo_id_data_not_an_array if {
expected := {
"code": "repo_ids.repo_ids_list_provided",
"msg": "Valid repo id list not provided: The 'known_rpm_repositories' data is not an array",
}

lib.assert_equal_results({expected}, repo_ids.deny) with data.known_rpm_repositories as "spam"
lib.assert_equal_results({expected}, repo_ids.deny) with data.known_rpm_repositories as {"chunky": "bacon"}
lib.assert_equal_results({expected}, repo_ids.deny) with data.known_rpm_repositories as 42
}

test_repo_id_data_not_strings if {
expected := {
"code": "repo_ids.repo_ids_list_provided",
"msg": "Valid repo id list not provided: The 'known_rpm_repositories' data is not a list of strings",
}

lib.assert_equal_results({expected}, repo_ids.deny) with data.known_rpm_repositories as ["spam", 42]
}

test_repo_id_all if {
lib.assert_equal_results(
{p1, p2, p3, p4, p5},
repo_ids.all_rpm_purls,
) with repo_ids.all_sboms as fake_sboms
}

test_repo_id_all_parsed if {
lib.assert_equal_results(
{p1, p2, p3, p4},
repo_ids.raw_purls(repo_ids.all_parsed_rpm_purls),
) with repo_ids.all_sboms as fake_sboms
}

test_repo_id_all_with if {
lib.assert_equal_results(
{p1, p2, p3},
repo_ids.raw_purls(repo_ids.all_parsed_purls_with_repo_ids),
) with repo_ids.all_sboms as fake_sboms
}

test_repo_id_all_known if {
lib.assert_equal_results(
{p1, p2},
repo_ids.raw_purls(repo_ids.all_parsed_purls_with_known_repo_ids),
) with repo_ids.all_sboms as fake_sboms with data.known_rpm_repositories as fake_repo_id_list
}

test_repo_id_purls_invalid_rpm_purls if {
expected := {
"code": "repo_ids.repo_ids_valid",
# regal ignore:line-length
"msg": "RPM repo id check failed: Some rpm components in the SBOM have purls that could not be parsed: 'pkg:rpm_borken'",
}

lib.assert_equal_results({expected}, repo_ids.deny) with repo_ids.all_sboms as [fake_sbom({p1, p2, p5, p6})]
with data.known_rpm_repositories as fake_repo_id_list
}

test_repo_id_purls_missing_repo_ids if {
expected := {
"code": "repo_ids.repo_ids_valid",
# regal ignore:line-length
"msg": "RPM repo id check failed: Some rpm components in the SBOM did not specify a repository_id value in their purl: 'pkg:rpm/redhat/[email protected]?arch=amd64&pastry_id=unknown'",
}

lib.assert_equal_results({expected}, repo_ids.deny) with repo_ids.all_sboms as [fake_sbom({p1, p2, p4, p6})]
with data.known_rpm_repositories as fake_repo_id_list
}

test_repo_id_purls_unknown_repo_ids if {
expected := {
"code": "repo_ids.repo_ids_valid",
# regal ignore:line-length
"msg": "RPM repo id check failed: Some rpm components in the SBOM specify an unknown or disallowed repository_id: 'pkg:rpm/redhat/[email protected]?arch=amd64&repository_id=rhel-23-unrecognized-2-rpms'",
}

lib.assert_equal_results({expected}, repo_ids.deny) with repo_ids.all_sboms as [fake_sbom({p1, p2, p3, p6})]
with data.known_rpm_repositories as fake_repo_id_list
}

test_truncated_list_to_string if {
lib.assert_equal(
"'a', 'b', 'c' and 2 more",
repo_ids.truncated_list_to_string(["a", "b", "c", "d", "e"]),
) with repo_ids.max_bad_purls as 3

lib.assert_equal(
"'a', 'b', 'c', 'd', 'e'",
repo_ids.truncated_list_to_string(["a", "b", "c", "d", "e"]),
) with repo_ids.max_bad_purls as 5
}

test_all_sboms if {
# (Needed for 100% coverage)
lib.assert_equal("spam-1000", repo_ids.all_sboms) with lib.sbom.cyclonedx_sboms as "spam-1000"
}

fake_sboms := [fake_sbom({p1, p2, p3, p4, p5, p6})]

fake_sbom(fake_purls) := {"components": [{"purl": p} | some p in fake_purls]}

fake_repo_id_list := ["rhel-23-for-spam-9-rpms", "rhel-42-for-bacon-12-rpms"]

p1 := "pkg:rpm/redhat/[email protected]?arch=amd64&repository_id=rhel-23-for-spam-9-rpms"

p2 := "pkg:rpm/redhat/[email protected]?arch=amd64&repository_id=rhel-42-for-bacon-12-rpms"

p3 := "pkg:rpm/redhat/[email protected]?arch=amd64&repository_id=rhel-23-unrecognized-2-rpms"

p4 := "pkg:rpm/redhat/[email protected]?arch=amd64&pastry_id=unknown"

p5 := "pkg:rpm_borken"

p6 := "pkg:golang/gitplanet.com/[email protected]?arch=amd64"

0 comments on commit 35a11a7

Please sign in to comment.