Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CM-29446 - Performance improvements for SCA (new sync flow) #209

Merged
merged 8 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions cycode/cli/commands/scan/code_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,21 @@ def set_issue_detected_by_scan_results(context: click.Context, scan_results: Lis
set_issue_detected(context, any(scan_result.issue_detected for scan_result in scan_results))


def _should_use_scan_service(scan_type: str, scan_parameters: Optional[dict] = None) -> bool:
return scan_type == consts.SECRET_SCAN_TYPE and scan_parameters is not None and scan_parameters['report'] is True
def _should_use_scan_service(scan_type: str, scan_parameters: dict) -> bool:
return scan_type == consts.SECRET_SCAN_TYPE and scan_parameters.get('report') is True


def _should_use_sync_flow(scan_type: str, sync_option: bool, scan_parameters: Optional[dict] = None) -> bool:
if not sync_option:
return False

if scan_type not in (consts.SCA_SCAN_TYPE,):
raise ValueError(f'Sync scan is not available for {scan_type} scan type.')

if scan_parameters.get('report') is True:
raise ValueError('You can not use sync flow with report option. Either remove "report" or "sync" option.')

return True


def _enrich_scan_result_with_data_from_detection_rules(
Expand Down Expand Up @@ -141,6 +154,7 @@ def _get_scan_documents_thread_func(
cycode_client = context.obj['client']
scan_type = context.obj['scan_type']
severity_threshold = context.obj['severity_threshold']
sync_option = context.obj['sync']
command_scan_type = context.info_name

scan_parameters['aggregation_id'] = str(_generate_unique_id())
Expand All @@ -151,7 +165,9 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local

scan_id = str(_generate_unique_id())
scan_completed = False

should_use_scan_service = _should_use_scan_service(scan_type, scan_parameters)
should_use_sync_flow = _should_use_sync_flow(scan_type, sync_option, scan_parameters)

try:
logger.debug('Preparing local files, %s', {'batch_size': len(batch)})
Expand All @@ -166,6 +182,7 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local
is_commit_range,
scan_parameters,
should_use_scan_service,
should_use_sync_flow,
)

_enrich_scan_result_with_data_from_detection_rules(cycode_client, scan_type, scan_result)
Expand Down Expand Up @@ -439,7 +456,11 @@ def perform_scan(
is_commit_range: bool,
scan_parameters: dict,
should_use_scan_service: bool = False,
should_use_sync_flow: bool = False,
) -> ZippedFileScanResult:
if should_use_sync_flow:
return perform_scan_sync(cycode_client, zipped_documents, scan_type, scan_parameters)

if scan_type in (consts.SCA_SCAN_TYPE, consts.SAST_SCAN_TYPE) or should_use_scan_service:
return perform_scan_async(cycode_client, zipped_documents, scan_type, scan_parameters)

Expand All @@ -466,6 +487,21 @@ def perform_scan_async(
)


def perform_scan_sync(
cycode_client: 'ScanClient',
zipped_documents: 'InMemoryZip',
scan_type: str,
scan_parameters: dict,
) -> ZippedFileScanResult:
scan_results = cycode_client.zipped_file_scan_sync(zipped_documents, scan_type, scan_parameters)
logger.debug('scan request has been triggered successfully, scan id: %s', scan_results.id)
return ZippedFileScanResult(
did_detect=True,
detections_per_file=_map_detections_per_file(scan_results.detection_messages),
scan_id=scan_results.id,
)


def perform_commit_range_scan_async(
cycode_client: 'ScanClient',
from_commit_zipped_documents: 'InMemoryZip',
Expand Down Expand Up @@ -888,10 +924,10 @@ def _map_detections_per_file(detections: List[dict]) -> List[DetectionsPerFile]:


def _get_file_name_from_detection(detection: dict) -> str:
if detection['category'] == 'SAST':
if detection.get('category') == 'SAST':
return detection['detection_details']['file_path']

if detection['category'] == 'SecretDetection':
if detection.get('category') == 'SecretDetection':
return _get_secret_file_name_from_detection(detection)

return detection['detection_details']['file_name']
Expand Down
12 changes: 11 additions & 1 deletion cycode/cli/commands/scan/scan_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
'--scan-type',
'-t',
default='secret',
help='Specify the type of scan you wish to execute (the default is Secrets)',
help='Specify the type of scan you wish to execute (the default is Secrets).',
type=click.Choice(config['scans']['supported_scans']),
)
@click.option(
Expand Down Expand Up @@ -100,6 +100,14 @@
type=bool,
required=False,
)
@click.option(
'--sync',
is_flag=True,
default=False,
help='Run scan synchronously (the default is asynchronous).',
type=bool,
required=False,
)
@click.pass_context
def scan_command(
context: click.Context,
Expand All @@ -113,6 +121,7 @@ def scan_command(
monitor: bool,
report: bool,
no_restore: bool,
sync: bool,
) -> int:
"""Scans for Secrets, IaC, SCA or SAST violations."""
if show_secret:
Expand All @@ -127,6 +136,7 @@ def scan_command(

context.obj['client'] = get_scan_cycode_client(client_id, secret, not context.obj['show_secret'])
context.obj['scan_type'] = scan_type
context.obj['sync'] = sync
context.obj['severity_threshold'] = severity_threshold
context.obj['monitor'] = monitor
context.obj['report'] = report
Expand Down
4 changes: 1 addition & 3 deletions cycode/cli/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from enum import Enum
from typing import Dict, List, NamedTuple, Optional, Type

from cycode.cyclient import logger
from cycode.cyclient.models import Detection


Expand Down Expand Up @@ -46,8 +45,7 @@ def try_get_value(name: str) -> any:
@staticmethod
def get_member_weight(name: str) -> any:
weight = Severity.try_get_value(name)
if weight is None:
logger.debug(f'missing severity in enum: {name}')
if weight is None: # if License Compliance
return -2
return weight

Expand Down
7 changes: 6 additions & 1 deletion cycode/cyclient/cycode_client_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,14 @@ def _execute(
url = self.build_full_url(self.api_url, endpoint)
logger.debug(f'Executing {method.upper()} request to {url}')

timeout = self.timeout
if 'timeout' in kwargs:
timeout = kwargs['timeout']
del kwargs['timeout']

try:
headers = self.get_request_headers(headers, without_auth=without_auth)
response = request(method=method, url=url, timeout=self.timeout, headers=headers, **kwargs)
response = request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)

content = 'HIDDEN' if hide_response_content_log else response.text
logger.debug(f'Response {response.status_code} from {url}. Content: {content}')
Expand Down
18 changes: 18 additions & 0 deletions cycode/cyclient/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,21 @@ class Meta:
@post_load
def build_dto(self, data: Dict[str, Any], **_) -> DetectionRule:
return DetectionRule(**data)


@dataclass
class ScanResultsSyncFlow:
id: str
detection_messages: List[Dict]


class ScanResultsSyncFlowSchema(Schema):
class Meta:
unknown = EXCLUDE

id = fields.String()
detection_messages = fields.List(fields.Dict())

@post_load
def build_dto(self, data: Dict[str, Any], **_) -> ScanResultsSyncFlow:
return ScanResultsSyncFlow(**data)
34 changes: 30 additions & 4 deletions cycode/cyclient/scan_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,20 @@ def get_detections_service_controller_path(self, scan_type: str) -> str:

return self._DETECTIONS_SERVICE_CLI_CONTROLLER_PATH

def get_scan_service_url_path(self, scan_type: str, should_use_scan_service: bool = False) -> str:
@staticmethod
def get_scan_flow_type(should_use_sync_flow: bool = False) -> str:
if should_use_sync_flow:
return '/sync'

return ''

def get_scan_service_url_path(
self, scan_type: str, should_use_scan_service: bool = False, should_use_sync_flow: bool = False
) -> str:
service_path = self.scan_config.get_service_name(scan_type, should_use_scan_service)
controller_path = self.get_scan_controller_path(scan_type)
return f'{service_path}/{controller_path}'
flow_type = self.get_scan_flow_type(should_use_sync_flow)
return f'{service_path}/{controller_path}{flow_type}'

def content_scan(self, scan_type: str, file_name: str, content: str, is_git_diff: bool = True) -> models.ScanResult:
path = f'{self.get_scan_service_url_path(scan_type)}/content'
Expand Down Expand Up @@ -82,12 +92,28 @@ def get_scan_report_url(self, scan_id: str, scan_type: str) -> models.ScanReport
response = self.scan_cycode_client.get(url_path=self.get_scan_report_url_path(scan_id, scan_type))
return models.ScanReportUrlResponseSchema().build_dto(response.json())

def get_zipped_file_scan_async_url_path(self, scan_type: str) -> str:
def get_zipped_file_scan_async_url_path(self, scan_type: str, should_use_sync_flow: bool = False) -> str:
async_scan_type = self.scan_config.get_async_scan_type(scan_type)
async_entity_type = self.scan_config.get_async_entity_type(scan_type)
scan_service_url_path = self.get_scan_service_url_path(scan_type, True)
scan_service_url_path = self.get_scan_service_url_path(
scan_type, should_use_scan_service=True, should_use_sync_flow=should_use_sync_flow
)
return f'{scan_service_url_path}/{async_scan_type}/{async_entity_type}'

def zipped_file_scan_sync(
self, zip_file: InMemoryZip, scan_type: str, scan_parameters: dict
) -> models.ScanResultsSyncFlow:
files = {'file': ('multiple_files_scan.zip', zip_file.read())}
del scan_parameters['report'] # BE raises validation error instead of ignoring it
response = self.scan_cycode_client.post(
url_path=self.get_zipped_file_scan_async_url_path(scan_type, should_use_sync_flow=True),
data={'scan_parameters': json.dumps(scan_parameters)},
files=files,
hide_response_content_log=self._hide_response_log,
timeout=60,
)
return models.ScanResultsSyncFlowSchema().load(response.json())

def zipped_file_scan_async(
self,
zip_file: InMemoryZip,
Expand Down
Loading