Skip to content

Commit

Permalink
CM-22319 - Run secrets scanning asynchronously using polling mechanism (
Browse files Browse the repository at this point in the history
  • Loading branch information
MarshalX authored Oct 30, 2023
1 parent 640b586 commit ebbca2c
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 83 deletions.
12 changes: 8 additions & 4 deletions cycode/cli/code_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,13 +546,13 @@ def perform_scan(
is_commit_range: bool,
scan_parameters: dict,
) -> ZippedFileScanResult:
if scan_type in (consts.SCA_SCAN_TYPE, consts.SAST_SCAN_TYPE):
return perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters)

if is_commit_range:
return cycode_client.commit_range_zipped_file_scan(scan_type, zipped_documents, scan_id)

return cycode_client.zipped_file_scan(scan_type, zipped_documents, scan_id, scan_parameters, is_git_diff)
if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
return cycode_client.zipped_file_scan(scan_type, zipped_documents, scan_id, scan_parameters, is_git_diff)

return perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters)


def perform_scan_async(
Expand Down Expand Up @@ -1025,6 +1025,10 @@ def _map_detections_per_file(detections: List[dict]) -> List[DetectionsPerFile]:
def _get_file_name_from_detection(detection: dict) -> str:
if detection['category'] == 'SAST':
return detection['detection_details']['file_path']
if detection['category'] == 'SecretDetection':
file_path = detection['detection_details']['file_path']
file_name = detection['detection_details']['file_name']
return os.path.join(file_path, file_name)

return detection['detection_details']['file_name']

Expand Down
45 changes: 33 additions & 12 deletions cycode/cyclient/scan_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,36 @@ def content_scan(self, scan_type: str, file_name: str, content: str, is_git_diff
)
return self.parse_scan_response(response)

def get_zipped_file_scan_url_path(self, scan_type: str) -> str:
return f'{self.scan_config.get_service_name(scan_type)}/{self.SCAN_CONTROLLER_PATH}/zipped-file'

def zipped_file_scan(
self, scan_type: str, zip_file: InMemoryZip, scan_id: str, scan_parameters: dict, is_git_diff: bool = False
) -> models.ZippedFileScanResult:
url_path = f'{self.scan_config.get_service_name(scan_type)}/{self.SCAN_CONTROLLER_PATH}/zipped-file'
files = {'file': ('multiple_files_scan.zip', zip_file.read())}

response = self.scan_cycode_client.post(
url_path=url_path,
url_path=self.get_zipped_file_scan_url_path(scan_type),
data={'scan_id': scan_id, 'is_git_diff': is_git_diff, 'scan_parameters': json.dumps(scan_parameters)},
files=files,
hide_response_content_log=self._hide_response_log,
)

return self.parse_zipped_file_scan_response(response)

def get_zipped_file_scan_async_url_path(self, scan_type: str) -> str:
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
async_entity_type = self.scan_config.get_async_entity_type(scan_type)

url_prefix = self.scan_config.get_scans_prefix()
return f'{url_prefix}/{self.SCAN_CONTROLLER_PATH}/{async_scan_type}/{async_entity_type}'

def zipped_file_scan_async(
self, zip_file: InMemoryZip, scan_type: str, scan_parameters: dict, is_git_diff: bool = False
) -> models.ScanInitializationResponse:
url_path = f'{self.scan_config.get_scans_prefix()}/{self.SCAN_CONTROLLER_PATH}/{scan_type}/repository'
files = {'file': ('multiple_files_scan.zip', zip_file.read())}
response = self.scan_cycode_client.post(
url_path=url_path,
url_path=self.get_zipped_file_scan_async_url_path(scan_type),
data={'is_git_diff': is_git_diff, 'scan_parameters': json.dumps(scan_parameters)},
files=files,
)
Expand Down Expand Up @@ -80,13 +88,17 @@ def multiple_zipped_file_scan_async(
)
return models.ScanInitializationResponseSchema().load(response.json())

def get_scan_details_path(self, scan_id: str) -> str:
return f'{self.scan_config.get_scans_prefix()}/{self.SCAN_CONTROLLER_PATH}/{scan_id}'

def get_scan_details(self, scan_id: str) -> models.ScanDetailsResponse:
url_path = f'{self.scan_config.get_scans_prefix()}/{self.SCAN_CONTROLLER_PATH}/{scan_id}'
response = self.scan_cycode_client.get(url_path=url_path)
response = self.scan_cycode_client.get(url_path=self.get_scan_details_path(scan_id))
return models.ScanDetailsResponseSchema().load(response.json())

def get_scan_detections_path(self) -> str:
return f'{self.scan_config.get_detections_prefix()}/{self.DETECTIONS_SERVICE_CONTROLLER_PATH}'

def get_scan_detections(self, scan_id: str) -> List[dict]:
url_path = f'{self.scan_config.get_detections_prefix()}/{self.DETECTIONS_SERVICE_CONTROLLER_PATH}'
params = {'scan_id': scan_id}

page_size = 200
Expand All @@ -100,7 +112,9 @@ def get_scan_detections(self, scan_id: str) -> List[dict]:
params['page_number'] = page_number

response = self.scan_cycode_client.get(
url_path=url_path, params=params, hide_response_content_log=self._hide_response_log
url_path=self.get_scan_detections_path(),
params=params,
hide_response_content_log=self._hide_response_log,
).json()
detections.extend(response)

Expand All @@ -109,9 +123,13 @@ def get_scan_detections(self, scan_id: str) -> List[dict]:

return detections

def get_get_scan_detections_count_path(self) -> str:
return f'{self.scan_config.get_detections_prefix()}/{self.DETECTIONS_SERVICE_CONTROLLER_PATH}/count'

def get_scan_detections_count(self, scan_id: str) -> int:
url_path = f'{self.scan_config.get_detections_prefix()}/{self.DETECTIONS_SERVICE_CONTROLLER_PATH}/count'
response = self.scan_cycode_client.get(url_path=url_path, params={'scan_id': scan_id})
response = self.scan_cycode_client.get(
url_path=self.get_get_scan_detections_count_path(), params={'scan_id': scan_id}
)
return response.json().get('count', 0)

def commit_range_zipped_file_scan(
Expand All @@ -126,9 +144,11 @@ def commit_range_zipped_file_scan(
)
return self.parse_zipped_file_scan_response(response)

def get_report_scan_status_path(self, scan_type: str, scan_id: str) -> str:
return f'{self.scan_config.get_service_name(scan_type)}/{self.SCAN_CONTROLLER_PATH}/{scan_id}/status'

def report_scan_status(self, scan_type: str, scan_id: str, scan_status: dict) -> None:
url_path = f'{self.scan_config.get_service_name(scan_type)}/{self.SCAN_CONTROLLER_PATH}/{scan_id}/status'
self.scan_cycode_client.post(url_path=url_path, body=scan_status)
self.scan_cycode_client.post(url_path=self.get_report_scan_status_path(scan_type, scan_id), body=scan_status)

@staticmethod
def parse_scan_response(response: Response) -> models.ScanResult:
Expand All @@ -140,6 +160,7 @@ def parse_zipped_file_scan_response(response: Response) -> models.ZippedFileScan

@staticmethod
def get_service_name(scan_type: str) -> Optional[str]:
# TODO(MarshalX): get_service_name should be removed from ScanClient? Because it exists in ScanConfig
if scan_type == 'secret':
return 'secret'
if scan_type == 'iac':
Expand Down
16 changes: 16 additions & 0 deletions cycode/cyclient/scan_config_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,22 @@ class ScanConfigBase(ABC):
def get_service_name(self, scan_type: str) -> str:
...

@staticmethod
def get_async_scan_type(scan_type: str) -> str:
if scan_type == 'secret':
return 'Secrets'
if scan_type == 'iac':
return 'InfraConfiguration'

return scan_type.upper()

@staticmethod
def get_async_entity_type(scan_type: str) -> str:
if scan_type == 'secret':
return 'zippedfile'

return 'repository'

@abstractmethod
def get_scans_prefix(self) -> str:
...
Expand Down
11 changes: 5 additions & 6 deletions tests/cli/test_main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import json
from typing import TYPE_CHECKING
from uuid import UUID

import pytest
import responses
from click.testing import CliRunner

from cycode.cli.main import main_cli
from tests.conftest import CLI_ENV_VARS, TEST_FILES_PATH
from tests.cyclient.test_scan_client import get_zipped_file_scan_response, get_zipped_file_scan_url
from tests.conftest import CLI_ENV_VARS, TEST_FILES_PATH, ZIP_CONTENT_PATH
from tests.cyclient.mocked_responses.scan_client import mock_scan_async_responses

_PATH_TO_SCAN = TEST_FILES_PATH.joinpath('zip_content').absolute()

Expand All @@ -27,12 +28,10 @@ def _is_json(plain: str) -> bool:
@pytest.mark.parametrize('output', ['text', 'json'])
def test_passing_output_option(output: str, scan_client: 'ScanClient', api_token_response: responses.Response) -> None:
scan_type = 'secret'
scan_id = UUID('12345678-418f-47ee-abb0-012345678901')

responses.add(get_zipped_file_scan_response(get_zipped_file_scan_url(scan_type, scan_client)))
responses.add(api_token_response)
# Scan report is not mocked.
# This raises connection error on the attempt to report scan.
# It doesn't perform real request
mock_scan_async_responses(responses, scan_type, scan_client, scan_id, ZIP_CONTENT_PATH)

args = ['--output', output, 'scan', '--soft-fail', 'path', str(_PATH_TO_SCAN)]
result = CliRunner().invoke(main_cli, args, env=CLI_ENV_VARS)
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
CLI_ENV_VARS = {'CYCODE_CLIENT_ID': _CLIENT_ID, 'CYCODE_CLIENT_SECRET': _CLIENT_SECRET}

TEST_FILES_PATH = Path(__file__).parent.joinpath('test_files').absolute()
MOCKED_RESPONSES_PATH = Path(__file__).parent.joinpath('cyclient/mocked_responses/data').absolute()
ZIP_CONTENT_PATH = TEST_FILES_PATH.joinpath('zip_content').absolute()


@pytest.fixture(scope='session')
Expand Down
Empty file.
83 changes: 83 additions & 0 deletions tests/cyclient/mocked_responses/data/detections.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
[
{
"source_policy_name": "Secrets detection",
"source_policy_type": "SensitiveContent",
"source_entity_name": null,
"source_entity_id": null,
"detection_type_id": "7dff932a-418f-47ee-abb0-703e0f6592cd",
"root_id": null,
"status": "Open",
"status_updated_at": null,
"status_reason": null,
"status_change_message": null,
"source_entity_type": "Audit",
"detection_details": {
"organization_name": null,
"organization_id": "",
"sha512": "6e6c867188c04340d9ecfa1b7e56a356e605f2a70fbda865f11b4a57eb07e634",
"provider": "CycodeCli",
"concrete_provider": "CycodeCli",
"length": 55,
"start_position": 19,
"line": 0,
"commit_id": null,
"member_id": "",
"member_name": "",
"member_email": "",
"author_name": "",
"author_email": "",
"branch_name": "",
"committer_name": "",
"committed_at": "0001-01-01T00:00:00+00:00",
"file_path": "%FILEPATH%",
"file_name": "secrets.py",
"file_extension": ".py",
"url": null,
"should_resolve_upon_branch_deletion": false,
"position_in_line": 19,
"repository_name": null,
"repository_id": null,
"old_detection_id": "f35c42f99d3712d4593d5ee16a9ceb36ca9fb20b33e68edd0e00847e6a02a7b6"
},
"severity": "Medium",
"remediable": false,
"correlation_message": "Secret of type 'Slack Token' was found in filename 'secrets.py' within '' repository",
"provider": "CycodeCli",
"scan_id": "%SCAN_ID%",
"assignee_id": null,
"type": "slack-token",
"is_hidden": false,
"tags": [],
"detection_rule_id": "26ab3395-2522-4061-a50a-c69c2d622ca1",
"classification": null,
"priority": 0,
"metadata": null,
"labels": [],
"detection_id": "f35c42f99d3712d4593d5ee16a9ceb36ca9fb20b33e68edd0e00847e6a02a7b6",
"internal_note": null,
"sdlc_stages": [
"Code",
"Container Registry",
"Productivity Tools",
"Cloud",
"Build"
],
"policy_labels": [],
"category": "SecretDetection",
"sub_category": "SensitiveContent",
"sub_category_v2": "Messaging Systems",
"policy_tags": [],
"remediations": [],
"instruction_details": {
"instruction_name_to_single_id_map": null,
"instruction_name_to_multiple_ids_map": null,
"instruction_tags": null
},
"external_detection_references": [],
"project_ids": [],
"tenant_id": "123456-663f-4e27-9170-e559c2379292",
"id": "123456-895a-4830-b6c1-b948e99b71a4",
"created_date": "2023-10-25T11:07:14.7516793+00:00",
"updated_date": "2023-10-25T11:07:14.7616063+00:00"
}
]
Loading

0 comments on commit ebbca2c

Please sign in to comment.