From bbfba73be3188156fdf681b6134873ee36e55786 Mon Sep 17 00:00:00 2001
From: srozb <github@rozbicki.eu>
Date: Fri, 24 Jan 2025 11:11:16 +0100
Subject: [PATCH] hunt, utils and scraper tests

---
 tests/test_hunt.py        | 161 ++++++++++++++++++++++++++++++++++++++
 tests/test_utils.py       |  37 +++++++++
 tests/test_web_scraper.py |  52 ++++++++++++
 3 files changed, 250 insertions(+)
 create mode 100644 tests/test_hunt.py
 create mode 100644 tests/test_utils.py
 create mode 100644 tests/test_web_scraper.py

diff --git a/tests/test_hunt.py b/tests/test_hunt.py
new file mode 100644
index 0000000..f4d963d
--- /dev/null
+++ b/tests/test_hunt.py
@@ -0,0 +1,161 @@
+# pylint: disable=missing-module-docstring, missing-function-docstring, redefined-outer-name
+
+import pytest
+from thsensai.hunt import Scope, HuntMeta, Hunt
+from thsensai.ioc import IOCs, IOC
+from thsensai.hyp import Hypotheses
+from thsensai.infer import LLMInference
+
+
+@pytest.fixture
+def mock_llm(mocker):
+    return mocker.Mock(spec=LLMInference)
+
+
+@pytest.fixture
+def sample_iocs():
+    return IOCs(
+        iocs=[
+            IOC(type="ip", value="192.168.1.1", context="Sample context"),
+            IOC(type="domain", value="example.com", context="Another context"),
+        ]
+    )
+
+
+
+sample_hypotheses=[
+    {
+        "Hypothesis_ID": "HYP-001",
+        "Hypothesis": "Test Hypothesis 1",
+        "Rationale": "Test Rationale",
+        "Log_Sources": ["log1", "log2"],
+        "Detection_Techniques": ["technique1", "technique2"],
+        "Priority_Level": "High",
+    },
+    {
+        "Hypothesis_ID": "HYP-002",
+        "Hypothesis": "Test Hypothesis 2",
+        "Rationale": "Test Rationale",
+        "Log_Sources": ["log1", "log2"],
+        "Detection_Techniques": ["technique1", "technique2"],
+        "Priority_Level": "High",
+    },
+]
+
+
+def test_scope_generate_targets(mock_llm, mocker):
+    mock_llm.invoke_model.return_value = Scope(targets=["Target 1", "Target 2"])
+    scope = Scope()
+    hunt = Hunt(hypotheses=Hypotheses(hypotheses=[]))
+    mocker.patch(
+        "builtins.open",
+        mocker.mock_open(read_data="scope_id;description\n1;Scope 1\n2;Scope 2"),
+    )
+    scope.generate_targets("scopes.csv", hunt, mock_llm)
+    assert scope.targets == ["Target 1", "Target 2"]
+
+
+def test_scope_generate_playbooks(mock_llm, mocker):
+    mock_llm.invoke_model.return_value = Scope(playbooks=["Playbook 1", "Playbook 2"])
+    scope = Scope()
+    hunt = Hunt(hypotheses=Hypotheses(hypotheses=[]))
+    mocker.patch(
+        "builtins.open",
+        mocker.mock_open(
+            read_data="playbook_name;description\n1;Playbook 1\n2;Playbook 2"
+        ),
+    )
+    scope.generate_playbooks("playbooks.csv", hunt, mock_llm)
+    assert scope.playbooks == ["Playbook 1", "Playbook 2"]
+
+
+def test_huntmeta_generate(mock_llm, sample_iocs):
+    mock_llm.invoke_model.return_value = HuntMeta(
+        name="Test Hunt",
+        purpose="Test Purpose",
+        scope=Scope(
+            targets=["Target 1"],
+            timeframe_days=30,
+            datasources=["DataSource 1"],
+            playbooks=["Playbook 1"],
+        ),
+        expected_outcome="Test Outcome",
+    )
+    hunt_meta = HuntMeta()
+    hunt_meta.generate(sample_iocs.as_csv(), mock_llm)
+    assert hunt_meta.name == "Test Hunt"
+    assert hunt_meta.purpose == "Test Purpose"
+    assert hunt_meta.scope.targets == ["Target 1"]
+    assert hunt_meta.scope.timeframe_days == 30
+    assert hunt_meta.scope.datasources == ["DataSource 1"]
+    assert hunt_meta.scope.playbooks == ["Playbook 1"]
+    assert hunt_meta.expected_outcome == "Test Outcome"
+
+
+def test_hunt_generate_meta(mock_llm, sample_iocs):
+    hunt = Hunt(iocs=sample_iocs)
+    mock_llm.invoke_model.return_value = HuntMeta(
+        name="Test Hunt",
+        purpose="Test Purpose",
+        scope=Scope(
+            targets=["Target 1"],
+            timeframe_days=30,
+            datasources=["DataSource 1"],
+            playbooks=["Playbook 1"],
+        ),
+        expected_outcome="Test Outcome",
+    )
+    hunt.generate_meta(mock_llm)
+    assert hunt.meta.name == "Test Hunt"
+    assert hunt.meta.purpose == "Test Purpose"
+    assert hunt.meta.scope.targets == ["Target 1"]
+    assert hunt.meta.scope.timeframe_days == 30
+    assert hunt.meta.scope.datasources == ["DataSource 1"]
+    assert hunt.meta.scope.playbooks == ["Playbook 1"]
+    assert hunt.meta.expected_outcome == "Test Outcome"
+
+
+def test_hunt_generate_hypotheses(mock_llm, sample_iocs):
+    hunt = Hunt(iocs=sample_iocs)
+    mock_llm.invoke_model.return_value = Hypotheses(
+        hypotheses=sample_hypotheses
+    )
+    hunt.generate_hypotheses(mock_llm)
+    assert len(hunt.hypotheses.hypotheses) == 2
+    assert hunt.hypotheses.hypotheses[0].Hypothesis_ID == "HYP-001"
+    assert hunt.hypotheses.hypotheses[0].Hypothesis == "Test Hypothesis 1"
+    assert hunt.hypotheses.hypotheses[1].Hypothesis_ID == "HYP-002"
+    assert hunt.hypotheses.hypotheses[1].Hypothesis == "Test Hypothesis 2"
+
+
+def test_hunt_generate(mock_llm, sample_iocs):
+    hunt = Hunt(iocs=sample_iocs)
+    mock_llm.invoke_model.side_effect = [
+        HuntMeta(
+            name="Test Hunt",
+            purpose="Test Purpose",
+            scope=Scope(
+                targets=["Target 1"],
+                timeframe_days=30,
+                datasources=["DataSource 1"],
+                playbooks=["Playbook 1"],
+            ),
+            expected_outcome="Test Outcome",
+        ),
+        Hypotheses(
+            hypotheses=sample_hypotheses
+        ),
+    ]
+    hunt.generate(mock_llm)
+    assert hunt.meta.name == "Test Hunt"
+    assert hunt.meta.purpose == "Test Purpose"
+    assert hunt.meta.scope.targets == ["Target 1"]
+    assert hunt.meta.scope.timeframe_days == 30
+    assert hunt.meta.scope.datasources == ["DataSource 1"]
+    assert hunt.meta.scope.playbooks == ["Playbook 1"]
+    assert hunt.meta.expected_outcome == "Test Outcome"
+    assert len(hunt.hypotheses.hypotheses) == 2
+    assert hunt.hypotheses.hypotheses[0].Hypothesis_ID == "HYP-001"
+    assert hunt.hypotheses.hypotheses[0].Hypothesis == "Test Hypothesis 1"
+    assert hunt.hypotheses.hypotheses[1].Hypothesis_ID == "HYP-002"
+    assert hunt.hypotheses.hypotheses[1].Hypothesis == "Test Hypothesis 2"
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..bb3bec9
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,37 @@
+# pylint: disable=missing-module-docstring, missing-function-docstring, redefined-outer-name
+
+import pytest
+from thsensai.utils import generate_report_name
+from thsensai.intel import Intel
+from thsensai.infer import LLMInference
+
+
+@pytest.fixture
+def intel_obj():
+    return Intel(
+        source="https://example.com",
+        chunk_size=3000,
+        chunk_overlap=100,
+        content_chunks=[],
+    )
+
+
+@pytest.fixture
+def llm():
+    return LLMInference(model="sample-model", num_predict=-1, num_ctx=4096)
+
+
+def test_generate_report_name(intel_obj, llm):
+    report_name = generate_report_name(
+        intel_obj, llm, report_type="ioc", extension="csv"
+    )
+    expected_name = "ioc_example-com_cs-3000_co-100_nc-4096_np--1.csv"
+    assert report_name == expected_name
+
+    report_name_no_type = generate_report_name(intel_obj, llm, extension="csv")
+    expected_name_no_type = "example-com_cs-3000_co-100_nc-4096_np--1.csv"
+    assert report_name_no_type == expected_name_no_type
+
+    report_name_no_extension = generate_report_name(intel_obj, llm, report_type="ioc")
+    expected_name_no_extension = "ioc_example-com_cs-3000_co-100_nc-4096_np--1"
+    assert report_name_no_extension == expected_name_no_extension
diff --git a/tests/test_web_scraper.py b/tests/test_web_scraper.py
new file mode 100644
index 0000000..6e2b117
--- /dev/null
+++ b/tests/test_web_scraper.py
@@ -0,0 +1,52 @@
+# pylint: disable=missing-module-docstring, missing-function-docstring, redefined-outer-name
+
+from unittest.mock import patch, MagicMock
+import pytest
+from thsensai.web_scraper import scrape_web
+
+
+@pytest.fixture
+def mock_loader():
+    with patch("thsensai.web_scraper.WebBaseLoader") as mock_loader:
+        yield mock_loader
+
+
+def test_scrape_web_success(mock_loader):
+    mock_doc = MagicMock()
+    mock_doc.page_content = "Sample content"
+    mock_loader.return_value.load.return_value = [mock_doc]
+
+    urls = ("https://example.com",)
+    css_selectors = ("content",)
+    docs = scrape_web(urls, css_selectors)
+
+    assert len(docs) == 1
+    assert docs[0].page_content == "Sample content"
+
+
+def test_scrape_web_empty_content(mock_loader):
+    mock_doc = MagicMock()
+    mock_doc.page_content = ""
+    mock_doc.metadata = {"source": "https://example.com"}
+    mock_loader.return_value.load.return_value = [mock_doc]
+
+    urls = ("https://example.com",)
+    css_selectors = ("content",)
+
+    with pytest.raises(
+        ValueError,
+        match="No content extracted from the following sources: https://example.com",
+    ):
+        scrape_web(urls, css_selectors)
+
+
+def test_scrape_web_error(mock_loader):
+    mock_loader.return_value.load.side_effect = Exception("Scraping error")
+
+    urls = ("https://example.com",)
+    css_selectors = ("content",)
+
+    with pytest.raises(
+        ValueError, match="An error occurred during web scraping: Scraping error"
+    ):
+        scrape_web(urls, css_selectors)