From 1ff231e2e1ae32d1cdff41e0af878285e4490293 Mon Sep 17 00:00:00 2001 From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com> Date: Mon, 17 Feb 2025 16:50:20 +0100 Subject: [PATCH] Added semgrep workflow (#176) --- .github/workflows/semgrep.yaml | 65 +++++++++++++++++++ .pre-commit-config.yaml | 7 ++ luxonis_train/__main__.py | 7 +- luxonis_train/callbacks/upload_checkpoint.py | 4 +- luxonis_train/core/utils/infer_utils.py | 2 +- luxonis_train/models/luxonis_lightning.py | 5 +- luxonis_train/nodes/base_node.py | 7 +- luxonis_train/nodes/blocks/blocks.py | 7 +- luxonis_train/nodes/necks/svtr_neck/blocks.py | 15 ++--- requirements-dev.txt | 2 +- tests/integration/test_cli_commands.py | 2 +- 11 files changed, 99 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/semgrep.yaml diff --git a/.github/workflows/semgrep.yaml b/.github/workflows/semgrep.yaml new file mode 100644 index 00000000..c0e755b6 --- /dev/null +++ b/.github/workflows/semgrep.yaml @@ -0,0 +1,65 @@ +name: Semgrep SAST Scan + +on: + pull_request: + +jobs: + semgrep: + # User definable name of this GitHub Actions job. + name: semgrep/ci + # If you are self-hosting, change the following `runs-on` value: + runs-on: ubuntu-latest + container: + # A Docker image with Semgrep installed. Do not change this. + image: returntocorp/semgrep + # Skip any PR created by dependabot to avoid permission issues: + if: (github.actor != 'dependabot[bot]') + permissions: + # required for all workflows + security-events: write + # only required for workflows in private repositories + actions: read + contents: read + + steps: + # Fetch project source with GitHub Actions Checkout. + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Perform Semgrep Analysis + # @NOTE: This is the actual semgrep command to scan your code. + # Modify the --config option to 'r/all' to scan using all rules, + # or use multiple flags to specify particular rules, such as + # --config r/all --config custom/rules + run: semgrep scan -q --sarif --config auto --config "p/secrets" . > semgrep-results.sarif + + - name: Pretty-Print SARIF Output + run: | + jq . semgrep-results.sarif > formatted-semgrep-results.sarif || echo "{}" + echo "Formatted SARIF Output (First 20 lines):" + head -n 20 formatted-semgrep-results.sarif || echo "{}" + + - name: Validate JSON Output + run: | + if ! jq empty formatted-semgrep-results.sarif > /dev/null 2>&1; then + echo "⚠️ Semgrep output is not valid JSON. Skipping annotations." + exit 0 + fi + + - name: Add PR Annotations for Semgrep Findings + run: | + total_issues=$(jq '.runs[0].results | length' formatted-semgrep-results.sarif) + if [[ "$total_issues" -eq 0 ]]; then + echo "✅ No Semgrep issues found!" + exit 0 + fi + + jq -c '.runs[0].results[]' formatted-semgrep-results.sarif | while IFS= read -r issue; do + file=$(echo "$issue" | jq -r '.locations[0].physicalLocation.artifactLocation.uri') + line=$(echo "$issue" | jq -r '.locations[0].physicalLocation.region.startLine') + message=$(echo "$issue" | jq -r '.message.text') + + if [[ -n "$file" && -n "$line" && -n "$message" ]]; then + echo "::error file=$file,line=$line,title=Semgrep Issue::${message}" + fi + done \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c9355abb..95ae0b89 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,3 +21,10 @@ repos: - id: mdformat additional_dependencies: - mdformat-gfm==0.3.6 + + - repo: https://github.com/semgrep/pre-commit + rev: 'v1.108.0' + hooks: + - id: semgrep + args: ['--config', 'auto', '--config', 'p/secrets', '--error', '--quiet'] + diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index bf747afa..68152d5d 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -1,3 +1,4 @@ +import importlib.util from enum import Enum from importlib.metadata import version from pathlib import Path @@ -254,7 +255,11 @@ def common( ] = None, ): if source: - exec(source.read_text(), globals(), globals()) + spec = importlib.util.spec_from_file_location(source.stem, source) + if spec: + module = importlib.util.module_from_spec(spec=spec) + if spec.loader: + spec.loader.exec_module(module) if __name__ == "__main__": diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py index 540ed0fd..450e4b9d 100644 --- a/luxonis_train/callbacks/upload_checkpoint.py +++ b/luxonis_train/callbacks/upload_checkpoint.py @@ -47,7 +47,9 @@ def on_save_checkpoint( .parent.with_suffix(".ckpt") .name ) - torch.save(checkpoint, temp_filename) + torch.save( # nosemgrep + checkpoint, temp_filename + ) module.logger.upload_artifact(temp_filename, typ="weights") os.remove(temp_filename) diff --git a/luxonis_train/core/utils/infer_utils.py b/luxonis_train/core/utils/infer_utils.py index b9917766..50bdb311 100644 --- a/luxonis_train/core/utils/infer_utils.py +++ b/luxonis_train/core/utils/infer_utils.py @@ -221,7 +221,7 @@ def generator() -> DatasetIterator: keep_aspect_ratio=model.cfg_preprocessing.keep_aspect_ratio, ) loader = torch_data.DataLoader( - loader, batch_size=model.cfg.trainer.batch_size + loader, batch_size=model.cfg.trainer.batch_size, pin_memory=True ) infer_from_loader(model, loader, save_dir, img_paths) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index af5b7367..5c5919ff 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -1018,7 +1018,10 @@ def load_checkpoint(self, path: str | Path | None) -> None: return path = str(path) - checkpoint = torch.load(path, map_location=self.device) + + checkpoint = torch.load( # nosemgrep + path, map_location=self.device + ) if "state_dict" not in checkpoint: raise ValueError("Checkpoint does not contain state_dict.") diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 24627afb..876ad277 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -396,11 +396,10 @@ def load_checkpoint(self, path: str, strict: bool = True) -> None: """ local_path = safe_download(url=path) if local_path: - state_dict = torch.load( + # load explicitly to cpu, PL takes care of transfering to CUDA is needed + state_dict = torch.load( # nosemgrep local_path, weights_only=False, map_location="cpu" - )[ - "state_dict" - ] # load explicitly to cpu, PL takes care of transfering to CUDA is needed + )["state_dict"] self.load_state_dict(state_dict, strict=strict) logging.info(f"Checkpoint for {self.name} loaded.") else: diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 9c56ca6e..a3984085 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -1,7 +1,6 @@ import math from typing import Literal, TypeVar -import numpy as np import torch import torch.nn.functional as F from torch import Tensor, nn @@ -537,12 +536,12 @@ def _fuse_bn_tensor( assert isinstance(branch, nn.BatchNorm2d) if not hasattr(self, "id_tensor"): input_dim = self.in_channels // self.groups - kernel_value = np.zeros( - (self.in_channels, input_dim, 3, 3), dtype=np.float32 + kernel_value = torch.zeros( + (self.in_channels, input_dim, 3, 3), dtype=torch.float32 ) for i in range(self.in_channels): kernel_value[i, i % input_dim, 1, 1] = 1 - self.id_tensor = torch.from_numpy(kernel_value) + self.id_tensor = kernel_value kernel = self.id_tensor running_mean = branch.running_mean running_var = branch.running_var diff --git a/luxonis_train/nodes/necks/svtr_neck/blocks.py b/luxonis_train/nodes/necks/svtr_neck/blocks.py index f186d473..5a11a018 100644 --- a/luxonis_train/nodes/necks/svtr_neck/blocks.py +++ b/luxonis_train/nodes/necks/svtr_neck/blocks.py @@ -171,15 +171,12 @@ def __init__( attn_drop: float = 0.0, drop_path: float = 0.0, act_layer: type[nn.Module] = nn.GELU, - norm_layer: type[nn.Module] | str = "nn.LayerNorm", + norm_layer: type[nn.Module] = nn.LayerNorm, epsilon: float = 1e-6, prenorm: bool = True, ): super().__init__() - if isinstance(norm_layer, str): - self.norm1 = eval(norm_layer)(dim, eps=epsilon) - else: - self.norm1 = norm_layer(dim) + self.norm1 = norm_layer(dim, eps=epsilon) if mixer == "Global" or mixer == "Local": self.mixer = Attention( dim, @@ -211,10 +208,8 @@ def __init__( self.drop_path = ( DropPath(drop_path) if drop_path > 0.0 else nn.Identity() ) - if isinstance(norm_layer, str): - self.norm2 = eval(norm_layer)(dim, eps=epsilon) - else: - self.norm2 = norm_layer(dim) + + self.norm2 = norm_layer(dim, eps=epsilon) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp_ratio = mlp_ratio self.mlp = Mlp( @@ -284,7 +279,7 @@ def __init__( act_layer=nn.ReLU, attn_drop=attn_drop_rate, drop_path=drop_path, - norm_layer="nn.LayerNorm", + norm_layer=nn.LayerNorm, epsilon=1e-05, prenorm=False, ) diff --git a/requirements-dev.txt b/requirements-dev.txt index b8b82a23..3a456f3b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,4 @@ opencv-stubs>=0.0.8 pytest-cov>=4.1.0 pytest-subtests>=0.12.1 pytest-md>=0.2.0 -pytest-order>=1.3.0 +pytest-order>=1.3.0 \ No newline at end of file diff --git a/tests/integration/test_cli_commands.py b/tests/integration/test_cli_commands.py index 19a2e3e1..e39f9874 100644 --- a/tests/integration/test_cli_commands.py +++ b/tests/integration/test_cli_commands.py @@ -26,7 +26,7 @@ def prepare(): def run_command(command: str) -> tuple[str, str, int]: result = subprocess.run( command, - shell=True, + shell=True, # nosemgrep text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,