From 1ff231e2e1ae32d1cdff41e0af878285e4490293 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Mon, 17 Feb 2025 16:50:20 +0100
Subject: [PATCH] Added semgrep workflow (#176)

---
 .github/workflows/semgrep.yaml                | 65 +++++++++++++++++++
 .pre-commit-config.yaml                       |  7 ++
 luxonis_train/__main__.py                     |  7 +-
 luxonis_train/callbacks/upload_checkpoint.py  |  4 +-
 luxonis_train/core/utils/infer_utils.py       |  2 +-
 luxonis_train/models/luxonis_lightning.py     |  5 +-
 luxonis_train/nodes/base_node.py              |  7 +-
 luxonis_train/nodes/blocks/blocks.py          |  7 +-
 luxonis_train/nodes/necks/svtr_neck/blocks.py | 15 ++---
 requirements-dev.txt                          |  2 +-
 tests/integration/test_cli_commands.py        |  2 +-
 11 files changed, 99 insertions(+), 24 deletions(-)
 create mode 100644 .github/workflows/semgrep.yaml

diff --git a/.github/workflows/semgrep.yaml b/.github/workflows/semgrep.yaml
new file mode 100644
index 00000000..c0e755b6
--- /dev/null
+++ b/.github/workflows/semgrep.yaml
@@ -0,0 +1,65 @@
+name: Semgrep SAST Scan
+
+on:
+  pull_request:
+
+jobs:
+  semgrep:
+    # User definable name of this GitHub Actions job.
+    name: semgrep/ci
+    # If you are self-hosting, change the following `runs-on` value:
+    runs-on: ubuntu-latest
+    container:
+      # A Docker image with Semgrep installed. Do not change this.
+      image: returntocorp/semgrep
+    # Skip any PR created by dependabot to avoid permission issues:
+    if: (github.actor != 'dependabot[bot]')
+    permissions:
+      # required for all workflows
+      security-events: write
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    steps:
+      # Fetch project source with GitHub Actions Checkout.
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Perform Semgrep Analysis
+      # @NOTE: This is the actual semgrep command to scan your code.
+      # Modify the --config option to 'r/all' to scan using all rules,
+      # or use multiple flags to specify particular rules, such as
+      # --config r/all --config custom/rules
+        run: semgrep scan -q --sarif --config auto --config "p/secrets" . > semgrep-results.sarif
+        
+      - name: Pretty-Print SARIF Output
+        run: |
+          jq . semgrep-results.sarif > formatted-semgrep-results.sarif || echo "{}"
+          echo "Formatted SARIF Output (First 20 lines):"
+          head -n 20 formatted-semgrep-results.sarif || echo "{}"
+
+      - name: Validate JSON Output
+        run: |
+          if ! jq empty formatted-semgrep-results.sarif > /dev/null 2>&1; then
+            echo "⚠️ Semgrep output is not valid JSON. Skipping annotations."
+            exit 0
+          fi
+
+      - name: Add PR Annotations for Semgrep Findings
+        run: |
+          total_issues=$(jq '.runs[0].results | length' formatted-semgrep-results.sarif)
+          if [[ "$total_issues" -eq 0 ]]; then
+            echo "✅ No Semgrep issues found!"
+            exit 0
+          fi
+
+          jq -c '.runs[0].results[]' formatted-semgrep-results.sarif | while IFS= read -r issue; do
+            file=$(echo "$issue" | jq -r '.locations[0].physicalLocation.artifactLocation.uri')
+            line=$(echo "$issue" | jq -r '.locations[0].physicalLocation.region.startLine')
+            message=$(echo "$issue" | jq -r '.message.text')
+
+            if [[ -n "$file" && -n "$line" && -n "$message" ]]; then
+              echo "::error file=$file,line=$line,title=Semgrep Issue::${message}"
+            fi
+          done
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c9355abb..95ae0b89 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,3 +21,10 @@ repos:
       - id: mdformat
         additional_dependencies:
           - mdformat-gfm==0.3.6
+
+  - repo: https://github.com/semgrep/pre-commit
+    rev: 'v1.108.0'
+    hooks:
+      - id: semgrep
+        args: ['--config', 'auto', '--config', 'p/secrets', '--error', '--quiet']
+
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index bf747afa..68152d5d 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -1,3 +1,4 @@
+import importlib.util
 from enum import Enum
 from importlib.metadata import version
 from pathlib import Path
@@ -254,7 +255,11 @@ def common(
     ] = None,
 ):
     if source:
-        exec(source.read_text(), globals(), globals())
+        spec = importlib.util.spec_from_file_location(source.stem, source)
+        if spec:
+            module = importlib.util.module_from_spec(spec=spec)
+            if spec.loader:
+                spec.loader.exec_module(module)
 
 
 if __name__ == "__main__":
diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
index 540ed0fd..450e4b9d 100644
--- a/luxonis_train/callbacks/upload_checkpoint.py
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -47,7 +47,9 @@ def on_save_checkpoint(
                         .parent.with_suffix(".ckpt")
                         .name
                     )
-                    torch.save(checkpoint, temp_filename)
+                    torch.save(  # nosemgrep
+                        checkpoint, temp_filename
+                    )
                     module.logger.upload_artifact(temp_filename, typ="weights")
 
                     os.remove(temp_filename)
diff --git a/luxonis_train/core/utils/infer_utils.py b/luxonis_train/core/utils/infer_utils.py
index b9917766..50bdb311 100644
--- a/luxonis_train/core/utils/infer_utils.py
+++ b/luxonis_train/core/utils/infer_utils.py
@@ -221,7 +221,7 @@ def generator() -> DatasetIterator:
         keep_aspect_ratio=model.cfg_preprocessing.keep_aspect_ratio,
     )
     loader = torch_data.DataLoader(
-        loader, batch_size=model.cfg.trainer.batch_size
+        loader, batch_size=model.cfg.trainer.batch_size, pin_memory=True
     )
 
     infer_from_loader(model, loader, save_dir, img_paths)
diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py
index af5b7367..5c5919ff 100644
--- a/luxonis_train/models/luxonis_lightning.py
+++ b/luxonis_train/models/luxonis_lightning.py
@@ -1018,7 +1018,10 @@ def load_checkpoint(self, path: str | Path | None) -> None:
             return
 
         path = str(path)
-        checkpoint = torch.load(path, map_location=self.device)
+
+        checkpoint = torch.load(  # nosemgrep
+            path, map_location=self.device
+        )
 
         if "state_dict" not in checkpoint:
             raise ValueError("Checkpoint does not contain state_dict.")
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 24627afb..876ad277 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -396,11 +396,10 @@ def load_checkpoint(self, path: str, strict: bool = True) -> None:
         """
         local_path = safe_download(url=path)
         if local_path:
-            state_dict = torch.load(
+            # load explicitly to cpu, PL takes care of transfering to CUDA is needed
+            state_dict = torch.load(  # nosemgrep
                 local_path, weights_only=False, map_location="cpu"
-            )[
-                "state_dict"
-            ]  # load explicitly to cpu, PL takes care of transfering to CUDA is needed
+            )["state_dict"]
             self.load_state_dict(state_dict, strict=strict)
             logging.info(f"Checkpoint for {self.name} loaded.")
         else:
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 9c56ca6e..a3984085 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -1,7 +1,6 @@
 import math
 from typing import Literal, TypeVar
 
-import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import Tensor, nn
@@ -537,12 +536,12 @@ def _fuse_bn_tensor(
             assert isinstance(branch, nn.BatchNorm2d)
             if not hasattr(self, "id_tensor"):
                 input_dim = self.in_channels // self.groups
-                kernel_value = np.zeros(
-                    (self.in_channels, input_dim, 3, 3), dtype=np.float32
+                kernel_value = torch.zeros(
+                    (self.in_channels, input_dim, 3, 3), dtype=torch.float32
                 )
                 for i in range(self.in_channels):
                     kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = torch.from_numpy(kernel_value)
+                self.id_tensor = kernel_value
             kernel = self.id_tensor
             running_mean = branch.running_mean
             running_var = branch.running_var
diff --git a/luxonis_train/nodes/necks/svtr_neck/blocks.py b/luxonis_train/nodes/necks/svtr_neck/blocks.py
index f186d473..5a11a018 100644
--- a/luxonis_train/nodes/necks/svtr_neck/blocks.py
+++ b/luxonis_train/nodes/necks/svtr_neck/blocks.py
@@ -171,15 +171,12 @@ def __init__(
         attn_drop: float = 0.0,
         drop_path: float = 0.0,
         act_layer: type[nn.Module] = nn.GELU,
-        norm_layer: type[nn.Module] | str = "nn.LayerNorm",
+        norm_layer: type[nn.Module] = nn.LayerNorm,
         epsilon: float = 1e-6,
         prenorm: bool = True,
     ):
         super().__init__()
-        if isinstance(norm_layer, str):
-            self.norm1 = eval(norm_layer)(dim, eps=epsilon)
-        else:
-            self.norm1 = norm_layer(dim)
+        self.norm1 = norm_layer(dim, eps=epsilon)
         if mixer == "Global" or mixer == "Local":
             self.mixer = Attention(
                 dim,
@@ -211,10 +208,8 @@ def __init__(
         self.drop_path = (
             DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
         )
-        if isinstance(norm_layer, str):
-            self.norm2 = eval(norm_layer)(dim, eps=epsilon)
-        else:
-            self.norm2 = norm_layer(dim)
+
+        self.norm2 = norm_layer(dim, eps=epsilon)
         mlp_hidden_dim = int(dim * mlp_ratio)
         self.mlp_ratio = mlp_ratio
         self.mlp = Mlp(
@@ -284,7 +279,7 @@ def __init__(
                     act_layer=nn.ReLU,
                     attn_drop=attn_drop_rate,
                     drop_path=drop_path,
-                    norm_layer="nn.LayerNorm",
+                    norm_layer=nn.LayerNorm,
                     epsilon=1e-05,
                     prenorm=False,
                 )
diff --git a/requirements-dev.txt b/requirements-dev.txt
index b8b82a23..3a456f3b 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,4 +5,4 @@ opencv-stubs>=0.0.8
 pytest-cov>=4.1.0
 pytest-subtests>=0.12.1
 pytest-md>=0.2.0
-pytest-order>=1.3.0
+pytest-order>=1.3.0
\ No newline at end of file
diff --git a/tests/integration/test_cli_commands.py b/tests/integration/test_cli_commands.py
index 19a2e3e1..e39f9874 100644
--- a/tests/integration/test_cli_commands.py
+++ b/tests/integration/test_cli_commands.py
@@ -26,7 +26,7 @@ def prepare():
 def run_command(command: str) -> tuple[str, str, int]:
     result = subprocess.run(
         command,
-        shell=True,
+        shell=True,  # nosemgrep
         text=True,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,