-
-
Notifications
You must be signed in to change notification settings - Fork 618
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a SARIF output formatter (#1113)
This commit adds a formatter that outputs JSON in a specific SARIF format according to spec at [1]. This code is largely leveraged from an existing implementation found here [2]. SARIF format is very useful for integration into ecosystems such as GitHub's Actions. [1] https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html [2] https://github.com/microsoft/bandit-sarif-formatter Closes #646 Signed-off-by: Eric Brown <[email protected]>
- Loading branch information
Showing
8 changed files
with
530 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,372 @@ | ||
# Copyright (c) Microsoft. All Rights Reserved. | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Note: this code mostly incorporated from | ||
# https://github.com/microsoft/bandit-sarif-formatter | ||
# | ||
r""" | ||
=============== | ||
SARIF formatter | ||
=============== | ||
This formatter outputs the issues in SARIF formatted JSON. | ||
:Example: | ||
.. code-block:: javascript | ||
{ | ||
"runs": [ | ||
{ | ||
"tool": { | ||
"driver": { | ||
"name": "Bandit", | ||
"organization": "PyCQA", | ||
"rules": [ | ||
{ | ||
"id": "B101", | ||
"name": "assert_used", | ||
"properties": { | ||
"tags": [ | ||
"security", | ||
"external/cwe/cwe-703" | ||
], | ||
"precision": "high" | ||
}, | ||
"helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html" | ||
} | ||
], | ||
"version": "1.7.8", | ||
"semanticVersion": "1.7.8" | ||
} | ||
}, | ||
"invocations": [ | ||
{ | ||
"executionSuccessful": true, | ||
"endTimeUtc": "2024-03-05T03:28:48Z" | ||
} | ||
], | ||
"properties": { | ||
"metrics": { | ||
"_totals": { | ||
"loc": 1, | ||
"nosec": 0, | ||
"skipped_tests": 0, | ||
"SEVERITY.UNDEFINED": 0, | ||
"CONFIDENCE.UNDEFINED": 0, | ||
"SEVERITY.LOW": 1, | ||
"CONFIDENCE.LOW": 0, | ||
"SEVERITY.MEDIUM": 0, | ||
"CONFIDENCE.MEDIUM": 0, | ||
"SEVERITY.HIGH": 0, | ||
"CONFIDENCE.HIGH": 1 | ||
}, | ||
"./examples/assert.py": { | ||
"loc": 1, | ||
"nosec": 0, | ||
"skipped_tests": 0, | ||
"SEVERITY.UNDEFINED": 0, | ||
"SEVERITY.LOW": 1, | ||
"SEVERITY.MEDIUM": 0, | ||
"SEVERITY.HIGH": 0, | ||
"CONFIDENCE.UNDEFINED": 0, | ||
"CONFIDENCE.LOW": 0, | ||
"CONFIDENCE.MEDIUM": 0, | ||
"CONFIDENCE.HIGH": 1 | ||
} | ||
} | ||
}, | ||
"results": [ | ||
{ | ||
"message": { | ||
"text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code." | ||
}, | ||
"level": "note", | ||
"locations": [ | ||
{ | ||
"physicalLocation": { | ||
"region": { | ||
"snippet": { | ||
"text": "assert True\n" | ||
}, | ||
"endColumn": 11, | ||
"endLine": 1, | ||
"startColumn": 0, | ||
"startLine": 1 | ||
}, | ||
"artifactLocation": { | ||
"uri": "examples/assert.py" | ||
}, | ||
"contextRegion": { | ||
"snippet": { | ||
"text": "assert True\n" | ||
}, | ||
"endLine": 1, | ||
"startLine": 1 | ||
} | ||
} | ||
} | ||
], | ||
"properties": { | ||
"issue_confidence": "HIGH", | ||
"issue_severity": "LOW" | ||
}, | ||
"ruleId": "B101", | ||
"ruleIndex": 0 | ||
} | ||
] | ||
} | ||
], | ||
"version": "2.1.0", | ||
"$schema": "https://json.schemastore.org/sarif-2.1.0.json" | ||
} | ||
.. versionadded:: 1.7.8 | ||
""" # noqa: E501 | ||
import logging | ||
import pathlib | ||
import sys | ||
import urllib.parse as urlparse | ||
from datetime import datetime | ||
|
||
import sarif_om as om | ||
from jschema_to_python.to_json import to_json | ||
|
||
import bandit | ||
from bandit.core import docs_utils | ||
|
||
LOG = logging.getLogger(__name__) | ||
SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json" | ||
SCHEMA_VER = "2.1.0" | ||
TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ" | ||
|
||
|
||
def report(manager, fileobj, sev_level, conf_level, lines=-1): | ||
"""Prints issues in SARIF format | ||
:param manager: the bandit manager object | ||
:param fileobj: The output file object, which may be sys.stdout | ||
:param sev_level: Filtering severity level | ||
:param conf_level: Filtering confidence level | ||
:param lines: Number of lines to report, -1 for all | ||
""" | ||
|
||
log = om.SarifLog( | ||
schema_uri=SCHEMA_URI, | ||
version=SCHEMA_VER, | ||
runs=[ | ||
om.Run( | ||
tool=om.Tool( | ||
driver=om.ToolComponent( | ||
name="Bandit", | ||
organization=bandit.__author__, | ||
semantic_version=bandit.__version__, | ||
version=bandit.__version__, | ||
) | ||
), | ||
invocations=[ | ||
om.Invocation( | ||
end_time_utc=datetime.utcnow().strftime(TS_FORMAT), | ||
execution_successful=True, | ||
) | ||
], | ||
properties={"metrics": manager.metrics.data}, | ||
) | ||
], | ||
) | ||
|
||
run = log.runs[0] | ||
invocation = run.invocations[0] | ||
|
||
skips = manager.get_skipped() | ||
add_skipped_file_notifications(skips, invocation) | ||
|
||
issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level) | ||
|
||
add_results(issues, run) | ||
|
||
serializedLog = to_json(log) | ||
|
||
with fileobj: | ||
fileobj.write(serializedLog) | ||
|
||
if fileobj.name != sys.stdout.name: | ||
LOG.info("SARIF output written to file: %s", fileobj.name) | ||
|
||
|
||
def add_skipped_file_notifications(skips, invocation): | ||
if skips is None or len(skips) == 0: | ||
return | ||
|
||
if invocation.tool_configuration_notifications is None: | ||
invocation.tool_configuration_notifications = [] | ||
|
||
for skip in skips: | ||
(file_name, reason) = skip | ||
|
||
notification = om.Notification( | ||
level="error", | ||
message=om.Message(text=reason), | ||
locations=[ | ||
om.Location( | ||
physical_location=om.PhysicalLocation( | ||
artifact_location=om.ArtifactLocation( | ||
uri=to_uri(file_name) | ||
) | ||
) | ||
) | ||
], | ||
) | ||
|
||
invocation.tool_configuration_notifications.append(notification) | ||
|
||
|
||
def add_results(issues, run): | ||
if run.results is None: | ||
run.results = [] | ||
|
||
rules = {} | ||
rule_indices = {} | ||
for issue in issues: | ||
result = create_result(issue, rules, rule_indices) | ||
run.results.append(result) | ||
|
||
if len(rules) > 0: | ||
run.tool.driver.rules = list(rules.values()) | ||
|
||
|
||
def create_result(issue, rules, rule_indices): | ||
issue_dict = issue.as_dict() | ||
|
||
rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices) | ||
|
||
physical_location = om.PhysicalLocation( | ||
artifact_location=om.ArtifactLocation( | ||
uri=to_uri(issue_dict["filename"]) | ||
) | ||
) | ||
|
||
add_region_and_context_region( | ||
physical_location, | ||
issue_dict["line_range"], | ||
issue_dict["col_offset"], | ||
issue_dict["end_col_offset"], | ||
issue_dict["code"], | ||
) | ||
|
||
return om.Result( | ||
rule_id=rule.id, | ||
rule_index=rule_index, | ||
message=om.Message(text=issue_dict["issue_text"]), | ||
level=level_from_severity(issue_dict["issue_severity"]), | ||
locations=[om.Location(physical_location=physical_location)], | ||
properties={ | ||
"issue_confidence": issue_dict["issue_confidence"], | ||
"issue_severity": issue_dict["issue_severity"], | ||
}, | ||
) | ||
|
||
|
||
def level_from_severity(severity): | ||
if severity == "HIGH": | ||
return "error" | ||
elif severity == "MEDIUM": | ||
return "warning" | ||
elif severity == "LOW": | ||
return "note" | ||
else: | ||
return "warning" | ||
|
||
|
||
def add_region_and_context_region( | ||
physical_location, line_range, col_offset, end_col_offset, code | ||
): | ||
if code: | ||
first_line_number, snippet_lines = parse_code(code) | ||
snippet_line = snippet_lines[line_range[0] - first_line_number] | ||
snippet = om.ArtifactContent(text=snippet_line) | ||
else: | ||
snippet = None | ||
|
||
physical_location.region = om.Region( | ||
start_line=line_range[0], | ||
end_line=line_range[1] if len(line_range) > 1 else line_range[0], | ||
start_column=col_offset + 1, | ||
end_column=end_col_offset + 1, | ||
snippet=snippet, | ||
) | ||
|
||
if code: | ||
physical_location.context_region = om.Region( | ||
start_line=first_line_number, | ||
end_line=first_line_number + len(snippet_lines) - 1, | ||
snippet=om.ArtifactContent(text="".join(snippet_lines)), | ||
) | ||
|
||
|
||
def parse_code(code): | ||
code_lines = code.split("\n") | ||
|
||
# The last line from the split has nothing in it; it's an artifact of the | ||
# last "real" line ending in a newline. Unless, of course, it doesn't: | ||
last_line = code_lines[len(code_lines) - 1] | ||
|
||
last_real_line_ends_in_newline = False | ||
if len(last_line) == 0: | ||
code_lines.pop() | ||
last_real_line_ends_in_newline = True | ||
|
||
snippet_lines = [] | ||
first_line_number = 0 | ||
first = True | ||
for code_line in code_lines: | ||
number_and_snippet_line = code_line.split(" ", 1) | ||
if first: | ||
first_line_number = int(number_and_snippet_line[0]) | ||
first = False | ||
|
||
snippet_line = number_and_snippet_line[1] + "\n" | ||
snippet_lines.append(snippet_line) | ||
|
||
if not last_real_line_ends_in_newline: | ||
last_line = snippet_lines[len(snippet_lines) - 1] | ||
snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1] | ||
|
||
return first_line_number, snippet_lines | ||
|
||
|
||
def create_or_find_rule(issue_dict, rules, rule_indices): | ||
rule_id = issue_dict["test_id"] | ||
if rule_id in rules: | ||
return rules[rule_id], rule_indices[rule_id] | ||
|
||
rule = om.ReportingDescriptor( | ||
id=rule_id, | ||
name=issue_dict["test_name"], | ||
help_uri=docs_utils.get_url(rule_id), | ||
properties={ | ||
"tags": [ | ||
"security", | ||
f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}", | ||
], | ||
"precision": issue_dict["issue_confidence"].lower(), | ||
}, | ||
) | ||
|
||
index = len(rules) | ||
rules[rule_id] = rule | ||
rule_indices[rule_id] = index | ||
return rule, index | ||
|
||
|
||
def to_uri(file_path): | ||
pure_path = pathlib.PurePath(file_path) | ||
if pure_path.is_absolute(): | ||
return pure_path.as_uri() | ||
else: | ||
# Replace backslashes with slashes. | ||
posix_path = pure_path.as_posix() | ||
# %-encode special characters. | ||
return urlparse.quote(posix_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
----- | ||
sarif | ||
----- | ||
|
||
.. automodule:: bandit.formatters.sarif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.