Skip to content

Commit

Permalink
CM-29953 - Scanning many paths in one CLI run (#183)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarshalX authored Dec 7, 2023
1 parent 16bc696 commit 0366b40
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 33 deletions.
6 changes: 3 additions & 3 deletions cycode/cli/commands/report/sbom/path/path_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cycode.cli import consts
from cycode.cli.commands.report.sbom.common import create_sbom_report, send_report_feedback
from cycode.cli.exceptions.handle_report_sbom_errors import handle_report_exception
from cycode.cli.files_collector.path_documents import get_relevant_document
from cycode.cli.files_collector.path_documents import get_relevant_documents
from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions
from cycode.cli.files_collector.zip_documents import zip_documents
from cycode.cli.utils.get_api_client import get_report_cycode_client
Expand All @@ -28,8 +28,8 @@ def path_command(context: click.Context, path: str) -> None:
report_execution_id = -1

try:
documents = get_relevant_document(
progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path
documents = get_relevant_documents(
progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, (path,)
)
# TODO(MarshalX): combine perform_pre_scan_documents_actions with get_relevant_document.
# unhardcode usage of context in perform_pre_scan_documents_actions
Expand Down
37 changes: 23 additions & 14 deletions cycode/cli/commands/scan/code_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
from cycode.cli.files_collector.path_documents import get_relevant_document
from cycode.cli.files_collector.path_documents import get_relevant_documents
from cycode.cli.files_collector.repository_documents import (
get_commit_range_modified_documents,
get_diff_file_path,
Expand Down Expand Up @@ -68,7 +68,7 @@ def scan_sca_commit_range(context: click.Context, path: str, commit_range: str)
scan_type = context.obj['scan_type']
progress_bar = context.obj['progress_bar']

scan_parameters = get_scan_parameters(context, path)
scan_parameters = get_scan_parameters(context, (path,))
from_commit_rev, to_commit_rev = parse_commit_range(commit_range, path)
from_commit_documents, to_commit_documents = get_commit_range_modified_documents(
progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, path, from_commit_rev, to_commit_rev
Expand All @@ -82,13 +82,13 @@ def scan_sca_commit_range(context: click.Context, path: str, commit_range: str)
scan_commit_range_documents(context, from_commit_documents, to_commit_documents, scan_parameters=scan_parameters)


def scan_disk_files(context: click.Context, path: str) -> None:
scan_parameters = get_scan_parameters(context, path)
def scan_disk_files(context: click.Context, paths: Tuple[str]) -> None:
scan_parameters = get_scan_parameters(context, paths)
scan_type = context.obj['scan_type']
progress_bar = context.obj['progress_bar']

try:
documents = get_relevant_document(progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, scan_type, path)
documents = get_relevant_documents(progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, scan_type, paths)
perform_pre_scan_documents_actions(context, scan_type, documents)
scan_documents(context, documents, scan_parameters=scan_parameters)
except Exception as e:
Expand Down Expand Up @@ -535,22 +535,31 @@ def get_default_scan_parameters(context: click.Context) -> dict:
}


def get_scan_parameters(context: click.Context, path: str) -> dict:
def get_scan_parameters(context: click.Context, paths: Tuple[str]) -> dict:
scan_parameters = get_default_scan_parameters(context)
remote_url = try_get_git_remote_url(path)

if len(paths) != 1:
# ignore remote url if multiple paths are provided
return scan_parameters

remote_url = try_get_git_remote_url(paths[0])
if remote_url:
# TODO(MarshalX): remove hardcode
# TODO(MarshalX): remove hardcode in context
context.obj['remote_url'] = remote_url
scan_parameters.update(remote_url)
scan_parameters.update(
{
'remote_url': remote_url,
}
)

return scan_parameters


def try_get_git_remote_url(path: str) -> Optional[dict]:
def try_get_git_remote_url(path: str) -> Optional[str]:
try:
git_remote_url = Repo(path).remotes[0].config_reader.get('url')
return {
'remote_url': git_remote_url,
}
remote_url = Repo(path).remotes[0].config_reader.get('url')
logger.debug(f'Found Git remote URL "{remote_url}" in path "{path}"')
return remote_url
except Exception as e:
logger.debug('Failed to get git remote URL. %s', {'exception_message': str(e)})
return None
Expand Down
10 changes: 6 additions & 4 deletions cycode/cli/commands/scan/path/path_command.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from typing import Tuple

import click

from cycode.cli.commands.scan.code_scanner import scan_disk_files
from cycode.cyclient import logger


@click.command(short_help='Scan the files in the path provided in the command.')
@click.argument('path', nargs=1, type=click.Path(exists=True, resolve_path=True), required=True)
@click.argument('paths', nargs=-1, type=click.Path(exists=True, resolve_path=True), required=True)
@click.pass_context
def path_command(context: click.Context, path: str) -> None:
def path_command(context: click.Context, paths: Tuple[str]) -> None:
progress_bar = context.obj['progress_bar']
progress_bar.start()

logger.debug('Starting path scan process, %s', {'path': path})
scan_disk_files(context, path)
logger.debug('Starting path scan process, %s', {'paths': paths})
scan_disk_files(context, paths)
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
required=False,
)
@click.pass_context
def repisotiry_command(context: click.Context, path: str, branch: str) -> None:
def repository_command(context: click.Context, path: str, branch: str) -> None:
try:
logger.debug('Starting repository scan process, %s', {'path': path, 'branch': branch})

Expand Down Expand Up @@ -54,7 +54,7 @@ def repisotiry_command(context: click.Context, path: str, branch: str) -> None:

logger.debug('Found all relevant files for scanning %s', {'path': path, 'branch': branch})
scan_documents(
context, documents_to_scan, is_git_diff=False, scan_parameters=get_scan_parameters(context, path)
context, documents_to_scan, is_git_diff=False, scan_parameters=get_scan_parameters(context, (path,))
)
except Exception as e:
handle_scan_exception(context, e)
2 changes: 2 additions & 0 deletions cycode/cli/commands/scan/scan_ci/scan_ci_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from cycode.cli.commands.scan.code_scanner import scan_commit_range
from cycode.cli.commands.scan.scan_ci.ci_integrations import get_commit_range

# This command is not finished yet. It is not used in the codebase.


@click.command(
short_help='Execute scan in a CI environment which relies on the '
Expand Down
6 changes: 3 additions & 3 deletions cycode/cli/commands/scan/scan_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from cycode.cli.commands.scan.path.path_command import path_command
from cycode.cli.commands.scan.pre_commit.pre_commit_command import pre_commit_command
from cycode.cli.commands.scan.pre_receive.pre_receive_command import pre_receive_command
from cycode.cli.commands.scan.repository.repisotiry_command import repisotiry_command
from cycode.cli.commands.scan.repository.repository_command import repository_command
from cycode.cli.config import config
from cycode.cli.consts import (
ISSUE_DETECTED_STATUS_CODE,
Expand All @@ -21,14 +21,14 @@

@click.group(
commands={
'repository': repisotiry_command,
'repository': repository_command,
'commit_history': commit_history_command,
'path': path_command,
'pre_commit': pre_commit_command,
'pre_receive': pre_receive_command,
},
short_help='Scan the content for Secrets/IaC/SCA/SAST violations. '
'You`ll need to specify which scan type to perform: ci/commit_history/path/repository/etc.',
'You`ll need to specify which scan type to perform: commit_history/path/repository/etc.',
)
@click.option(
'--scan-type',
Expand Down
18 changes: 11 additions & 7 deletions cycode/cli/files_collector/path_documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import TYPE_CHECKING, Iterable, List
from typing import TYPE_CHECKING, Iterable, List, Tuple

import pathspec

Expand Down Expand Up @@ -48,9 +48,13 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L


def _get_relevant_files(
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, path: str
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, paths: Tuple[str]
) -> List[str]:
all_files_to_scan = _get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**'])
all_files_to_scan = []
for path in paths:
all_files_to_scan.extend(
_get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**'])
)

# we are double the progress bar section length because we are going to process the files twice
# first time to get the file list with respect of excluded patterns (excluding takes seconds to execute)
Expand All @@ -70,7 +74,7 @@ def _get_relevant_files(
progress_bar.set_section_length(progress_bar_section, progress_bar_section_len)

logger.debug(
'Found all relevant files for scanning %s', {'path': path, 'file_to_scan_count': len(relevant_files_to_scan)}
'Found all relevant files for scanning %s', {'paths': paths, 'file_to_scan_count': len(relevant_files_to_scan)}
)

return relevant_files_to_scan
Expand All @@ -89,15 +93,15 @@ def _handle_tfplan_file(file: str, content: str, is_git_diff: bool) -> Document:
return Document(document_name, tf_content, is_git_diff)


def get_relevant_document(
def get_relevant_documents(
progress_bar: 'BaseProgressBar',
progress_bar_section: 'ProgressBarSection',
scan_type: str,
path: str,
paths: Tuple[str],
*,
is_git_diff: bool = False,
) -> List[Document]:
relevant_files = _get_relevant_files(progress_bar, progress_bar_section, scan_type, path)
relevant_files = _get_relevant_files(progress_bar, progress_bar_section, scan_type, paths)

documents: List[Document] = []
for file in relevant_files:
Expand Down

0 comments on commit 0366b40

Please sign in to comment.