Skip to content

Commit

Permalink
CM-40699, CM-40700 - Implement new sync flow for Secrets and IaC (#251)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarshalX authored Oct 1, 2024
1 parent 7e528ea commit 70d3cce
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 27 deletions.
41 changes: 24 additions & 17 deletions cycode/cli/commands/scan/code_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,17 @@ def _should_use_scan_service(scan_type: str, scan_parameters: dict) -> bool:
return scan_type == consts.SECRET_SCAN_TYPE and scan_parameters.get('report') is True


def _should_use_sync_flow(scan_type: str, sync_option: bool, scan_parameters: Optional[dict] = None) -> bool:
def _should_use_sync_flow(
command_scan_type: str, scan_type: str, sync_option: bool, scan_parameters: Optional[dict] = None
) -> bool:
if not sync_option:
return False

if scan_type not in (consts.SCA_SCAN_TYPE,):
raise ValueError(f'Sync scan is not available for {scan_type} scan type.')
if command_scan_type not in {'path', 'repository'}:
raise ValueError(f'Sync flow is not available for "{command_scan_type}" command type. Remove --sync option.')

if scan_type is consts.SAST_SCAN_TYPE:
raise ValueError('Sync scan is not available for SAST scan type.')

if scan_parameters.get('report') is True:
raise ValueError('You can not use sync flow with report option. Either remove "report" or "sync" option.')
Expand Down Expand Up @@ -163,7 +168,7 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local
scan_completed = False

should_use_scan_service = _should_use_scan_service(scan_type, scan_parameters)
should_use_sync_flow = _should_use_sync_flow(scan_type, sync_option, scan_parameters)
should_use_sync_flow = _should_use_sync_flow(command_scan_type, scan_type, sync_option, scan_parameters)

try:
logger.debug('Preparing local files, %s', {'batch_size': len(batch)})
Expand Down Expand Up @@ -217,7 +222,7 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local
zip_file_size,
command_scan_type,
error_message,
should_use_scan_service,
should_use_scan_service or should_use_sync_flow, # sync flow implies scan service
)

return scan_id, error, local_scan_result
Expand Down Expand Up @@ -359,6 +364,8 @@ def scan_commit_range_documents(
scan_parameters: Optional[dict] = None,
timeout: Optional[int] = None,
) -> None:
"""Used by SCA only"""

cycode_client = context.obj['client']
scan_type = context.obj['scan_type']
severity_threshold = context.obj['severity_threshold']
Expand Down Expand Up @@ -484,7 +491,8 @@ def perform_scan(
should_use_sync_flow: bool = False,
) -> ZippedFileScanResult:
if should_use_sync_flow:
return perform_scan_sync(cycode_client, zipped_documents, scan_type, scan_parameters)
# it does not support commit range scans; should_use_sync_flow handles it
return perform_scan_sync(cycode_client, zipped_documents, scan_type, scan_parameters, is_git_diff)

if scan_type in (consts.SCA_SCAN_TYPE, consts.SAST_SCAN_TYPE) or should_use_scan_service:
return perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters, is_commit_range)
Expand Down Expand Up @@ -520,12 +528,13 @@ def perform_scan_sync(
zipped_documents: 'InMemoryZip',
scan_type: str,
scan_parameters: dict,
is_git_diff: bool = False,
) -> ZippedFileScanResult:
scan_results = cycode_client.zipped_file_scan_sync(zipped_documents, scan_type, scan_parameters)
scan_results = cycode_client.zipped_file_scan_sync(zipped_documents, scan_type, scan_parameters, is_git_diff)
logger.debug('Sync scan request has been triggered successfully, %s', {'scan_id': scan_results.id})
return ZippedFileScanResult(
did_detect=True,
detections_per_file=_map_detections_per_file_and_commit_id(scan_results.detection_messages),
detections_per_file=_map_detections_per_file_and_commit_id(scan_type, scan_results.detection_messages),
scan_id=scan_results.id,
)

Expand Down Expand Up @@ -610,7 +619,7 @@ def get_document_detections(
commit_id = detections_per_file.commit_id

logger.debug(
'Going to find the document of the violated file., %s', {'file_name': file_name, 'commit_id': commit_id}
'Going to find the document of the violated file, %s', {'file_name': file_name, 'commit_id': commit_id}
)

document = _get_document_by_file_name(documents_to_scan, file_name, commit_id)
Expand Down Expand Up @@ -874,7 +883,7 @@ def _get_scan_result(

return ZippedFileScanResult(
did_detect=True,
detections_per_file=_map_detections_per_file_and_commit_id(scan_raw_detections),
detections_per_file=_map_detections_per_file_and_commit_id(scan_type, scan_raw_detections),
scan_id=scan_id,
report_url=_try_get_report_url_if_needed(cycode_client, should_get_report, scan_id, scan_type),
)
Expand Down Expand Up @@ -904,7 +913,7 @@ def _try_get_report_url_if_needed(
logger.debug('Failed to get report URL', exc_info=e)


def _map_detections_per_file_and_commit_id(raw_detections: List[dict]) -> List[DetectionsPerFile]:
def _map_detections_per_file_and_commit_id(scan_type: str, raw_detections: List[dict]) -> List[DetectionsPerFile]:
"""Converts list of detections (async flow) to list of DetectionsPerFile objects (sync flow).
Args:
Expand All @@ -923,7 +932,7 @@ def _map_detections_per_file_and_commit_id(raw_detections: List[dict]) -> List[D
# FIXME(MarshalX): investigate this field mapping
raw_detection['message'] = raw_detection['correlation_message']

file_name = _get_file_name_from_detection(raw_detection)
file_name = _get_file_name_from_detection(scan_type, raw_detection)
detection: Detection = DetectionSchema().load(raw_detection)
commit_id: Optional[str] = detection.detection_details.get('commit_id') # could be None
group_by_key = (file_name, commit_id)
Expand All @@ -942,12 +951,10 @@ def _map_detections_per_file_and_commit_id(raw_detections: List[dict]) -> List[D
]


def _get_file_name_from_detection(raw_detection: dict) -> str:
category = raw_detection.get('category')

if category == 'SAST':
def _get_file_name_from_detection(scan_type: str, raw_detection: dict) -> str:
if scan_type == consts.SAST_SCAN_TYPE:
return raw_detection['detection_details']['file_path']
if category == 'SecretDetection':
if scan_type == consts.SECRET_SCAN_TYPE:
return _get_secret_file_name_from_detection(raw_detection)

return raw_detection['detection_details']['file_name']
Expand Down
7 changes: 3 additions & 4 deletions cycode/cli/commands/scan/repository/repository_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,10 @@ def repository_command(context: click.Context, path: str, branch: str) -> None:

documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)

perform_pre_scan_documents_actions(context, scan_type, documents_to_scan, is_git_diff=False)
perform_pre_scan_documents_actions(context, scan_type, documents_to_scan)

logger.debug('Found all relevant files for scanning %s', {'path': path, 'branch': branch})
scan_documents(
context, documents_to_scan, is_git_diff=False, scan_parameters=get_scan_parameters(context, (path,))
)
scan_parameters = get_scan_parameters(context, (path,))
scan_documents(context, documents_to_scan, scan_parameters=scan_parameters)
except Exception as e:
handle_scan_exception(context, e)
9 changes: 8 additions & 1 deletion cycode/cyclient/cycode_token_based_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
from cycode.cli.user_settings.jwt_creator import JwtCreator
from cycode.cyclient.cycode_client import CycodeClient

_NGINX_PLAIN_ERRORS = [
b'Invalid JWT Token',
b'JWT Token Needed',
b'JWT Token validation failed',
]


class CycodeTokenBasedClient(CycodeClient):
"""Send requests with JWT."""
Expand Down Expand Up @@ -82,7 +88,8 @@ def _execute(
response = super()._execute(*args, **kwargs)

# backend returns 200 and plain text. no way to catch it with .raise_for_status()
if response.status_code == 200 and response.content in {b'Invalid JWT Token\n\n', b'JWT Token Needed\n\n'}:
nginx_error_response = any(response.content.startswith(plain_error) for plain_error in _NGINX_PLAIN_ERRORS)
if response.status_code == 200 and nginx_error_response:
# if cached token is invalid, try to refresh it and retry the request
self.refresh_access_token()
response = super()._execute(*args, **kwargs)
Expand Down
27 changes: 22 additions & 5 deletions cycode/cyclient/scan_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
self._hide_response_log = hide_response_log

def get_scan_controller_path(self, scan_type: str, should_use_scan_service: bool = False) -> str:
if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
if not should_use_scan_service and scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
# we don't use async flow for IaC scan yet
return self._SCAN_SERVICE_CONTROLLER_PATH
if not should_use_scan_service and scan_type == consts.SECRET_SCAN_TYPE:
Expand Down Expand Up @@ -106,14 +106,31 @@ def get_zipped_file_scan_async_url_path(self, scan_type: str, should_use_sync_fl
)
return f'{scan_service_url_path}/{async_scan_type}/{async_entity_type}'

def get_zipped_file_scan_sync_url_path(self, scan_type: str) -> str:
server_scan_type = self.scan_config.get_async_scan_type(scan_type)
scan_service_url_path = self.get_scan_service_url_path(
scan_type, should_use_scan_service=True, should_use_sync_flow=True
)
return f'{scan_service_url_path}/{server_scan_type}/repository'

def zipped_file_scan_sync(
self, zip_file: InMemoryZip, scan_type: str, scan_parameters: dict
self,
zip_file: InMemoryZip,
scan_type: str,
scan_parameters: dict,
is_git_diff: bool = False,
) -> models.ScanResultsSyncFlow:
files = {'file': ('multiple_files_scan.zip', zip_file.read())}
del scan_parameters['report'] # BE raises validation error instead of ignoring it

if 'report' in scan_parameters:
del scan_parameters['report'] # BE raises validation error instead of ignoring it

response = self.scan_cycode_client.post(
url_path=self.get_zipped_file_scan_async_url_path(scan_type, should_use_sync_flow=True),
data={'scan_parameters': json.dumps(scan_parameters)},
url_path=self.get_zipped_file_scan_sync_url_path(scan_type),
data={
'is_git_diff': is_git_diff,
'scan_parameters': json.dumps(scan_parameters),
},
files=files,
hide_response_content_log=self._hide_response_log,
timeout=60,
Expand Down

0 comments on commit 70d3cce

Please sign in to comment.