From bbfba73be3188156fdf681b6134873ee36e55786 Mon Sep 17 00:00:00 2001 From: srozb Date: Fri, 24 Jan 2025 11:11:16 +0100 Subject: [PATCH] hunt, utils and scraper tests --- tests/test_hunt.py | 161 ++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 37 +++++++++ tests/test_web_scraper.py | 52 ++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 tests/test_hunt.py create mode 100644 tests/test_utils.py create mode 100644 tests/test_web_scraper.py diff --git a/tests/test_hunt.py b/tests/test_hunt.py new file mode 100644 index 0000000..f4d963d --- /dev/null +++ b/tests/test_hunt.py @@ -0,0 +1,161 @@ +# pylint: disable=missing-module-docstring, missing-function-docstring, redefined-outer-name + +import pytest +from thsensai.hunt import Scope, HuntMeta, Hunt +from thsensai.ioc import IOCs, IOC +from thsensai.hyp import Hypotheses +from thsensai.infer import LLMInference + + +@pytest.fixture +def mock_llm(mocker): + return mocker.Mock(spec=LLMInference) + + +@pytest.fixture +def sample_iocs(): + return IOCs( + iocs=[ + IOC(type="ip", value="192.168.1.1", context="Sample context"), + IOC(type="domain", value="example.com", context="Another context"), + ] + ) + + + +sample_hypotheses=[ + { + "Hypothesis_ID": "HYP-001", + "Hypothesis": "Test Hypothesis 1", + "Rationale": "Test Rationale", + "Log_Sources": ["log1", "log2"], + "Detection_Techniques": ["technique1", "technique2"], + "Priority_Level": "High", + }, + { + "Hypothesis_ID": "HYP-002", + "Hypothesis": "Test Hypothesis 2", + "Rationale": "Test Rationale", + "Log_Sources": ["log1", "log2"], + "Detection_Techniques": ["technique1", "technique2"], + "Priority_Level": "High", + }, +] + + +def test_scope_generate_targets(mock_llm, mocker): + mock_llm.invoke_model.return_value = Scope(targets=["Target 1", "Target 2"]) + scope = Scope() + hunt = Hunt(hypotheses=Hypotheses(hypotheses=[])) + mocker.patch( + "builtins.open", + mocker.mock_open(read_data="scope_id;description\n1;Scope 1\n2;Scope 2"), + ) + scope.generate_targets("scopes.csv", hunt, mock_llm) + assert scope.targets == ["Target 1", "Target 2"] + + +def test_scope_generate_playbooks(mock_llm, mocker): + mock_llm.invoke_model.return_value = Scope(playbooks=["Playbook 1", "Playbook 2"]) + scope = Scope() + hunt = Hunt(hypotheses=Hypotheses(hypotheses=[])) + mocker.patch( + "builtins.open", + mocker.mock_open( + read_data="playbook_name;description\n1;Playbook 1\n2;Playbook 2" + ), + ) + scope.generate_playbooks("playbooks.csv", hunt, mock_llm) + assert scope.playbooks == ["Playbook 1", "Playbook 2"] + + +def test_huntmeta_generate(mock_llm, sample_iocs): + mock_llm.invoke_model.return_value = HuntMeta( + name="Test Hunt", + purpose="Test Purpose", + scope=Scope( + targets=["Target 1"], + timeframe_days=30, + datasources=["DataSource 1"], + playbooks=["Playbook 1"], + ), + expected_outcome="Test Outcome", + ) + hunt_meta = HuntMeta() + hunt_meta.generate(sample_iocs.as_csv(), mock_llm) + assert hunt_meta.name == "Test Hunt" + assert hunt_meta.purpose == "Test Purpose" + assert hunt_meta.scope.targets == ["Target 1"] + assert hunt_meta.scope.timeframe_days == 30 + assert hunt_meta.scope.datasources == ["DataSource 1"] + assert hunt_meta.scope.playbooks == ["Playbook 1"] + assert hunt_meta.expected_outcome == "Test Outcome" + + +def test_hunt_generate_meta(mock_llm, sample_iocs): + hunt = Hunt(iocs=sample_iocs) + mock_llm.invoke_model.return_value = HuntMeta( + name="Test Hunt", + purpose="Test Purpose", + scope=Scope( + targets=["Target 1"], + timeframe_days=30, + datasources=["DataSource 1"], + playbooks=["Playbook 1"], + ), + expected_outcome="Test Outcome", + ) + hunt.generate_meta(mock_llm) + assert hunt.meta.name == "Test Hunt" + assert hunt.meta.purpose == "Test Purpose" + assert hunt.meta.scope.targets == ["Target 1"] + assert hunt.meta.scope.timeframe_days == 30 + assert hunt.meta.scope.datasources == ["DataSource 1"] + assert hunt.meta.scope.playbooks == ["Playbook 1"] + assert hunt.meta.expected_outcome == "Test Outcome" + + +def test_hunt_generate_hypotheses(mock_llm, sample_iocs): + hunt = Hunt(iocs=sample_iocs) + mock_llm.invoke_model.return_value = Hypotheses( + hypotheses=sample_hypotheses + ) + hunt.generate_hypotheses(mock_llm) + assert len(hunt.hypotheses.hypotheses) == 2 + assert hunt.hypotheses.hypotheses[0].Hypothesis_ID == "HYP-001" + assert hunt.hypotheses.hypotheses[0].Hypothesis == "Test Hypothesis 1" + assert hunt.hypotheses.hypotheses[1].Hypothesis_ID == "HYP-002" + assert hunt.hypotheses.hypotheses[1].Hypothesis == "Test Hypothesis 2" + + +def test_hunt_generate(mock_llm, sample_iocs): + hunt = Hunt(iocs=sample_iocs) + mock_llm.invoke_model.side_effect = [ + HuntMeta( + name="Test Hunt", + purpose="Test Purpose", + scope=Scope( + targets=["Target 1"], + timeframe_days=30, + datasources=["DataSource 1"], + playbooks=["Playbook 1"], + ), + expected_outcome="Test Outcome", + ), + Hypotheses( + hypotheses=sample_hypotheses + ), + ] + hunt.generate(mock_llm) + assert hunt.meta.name == "Test Hunt" + assert hunt.meta.purpose == "Test Purpose" + assert hunt.meta.scope.targets == ["Target 1"] + assert hunt.meta.scope.timeframe_days == 30 + assert hunt.meta.scope.datasources == ["DataSource 1"] + assert hunt.meta.scope.playbooks == ["Playbook 1"] + assert hunt.meta.expected_outcome == "Test Outcome" + assert len(hunt.hypotheses.hypotheses) == 2 + assert hunt.hypotheses.hypotheses[0].Hypothesis_ID == "HYP-001" + assert hunt.hypotheses.hypotheses[0].Hypothesis == "Test Hypothesis 1" + assert hunt.hypotheses.hypotheses[1].Hypothesis_ID == "HYP-002" + assert hunt.hypotheses.hypotheses[1].Hypothesis == "Test Hypothesis 2" diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..bb3bec9 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,37 @@ +# pylint: disable=missing-module-docstring, missing-function-docstring, redefined-outer-name + +import pytest +from thsensai.utils import generate_report_name +from thsensai.intel import Intel +from thsensai.infer import LLMInference + + +@pytest.fixture +def intel_obj(): + return Intel( + source="https://example.com", + chunk_size=3000, + chunk_overlap=100, + content_chunks=[], + ) + + +@pytest.fixture +def llm(): + return LLMInference(model="sample-model", num_predict=-1, num_ctx=4096) + + +def test_generate_report_name(intel_obj, llm): + report_name = generate_report_name( + intel_obj, llm, report_type="ioc", extension="csv" + ) + expected_name = "ioc_example-com_cs-3000_co-100_nc-4096_np--1.csv" + assert report_name == expected_name + + report_name_no_type = generate_report_name(intel_obj, llm, extension="csv") + expected_name_no_type = "example-com_cs-3000_co-100_nc-4096_np--1.csv" + assert report_name_no_type == expected_name_no_type + + report_name_no_extension = generate_report_name(intel_obj, llm, report_type="ioc") + expected_name_no_extension = "ioc_example-com_cs-3000_co-100_nc-4096_np--1" + assert report_name_no_extension == expected_name_no_extension diff --git a/tests/test_web_scraper.py b/tests/test_web_scraper.py new file mode 100644 index 0000000..6e2b117 --- /dev/null +++ b/tests/test_web_scraper.py @@ -0,0 +1,52 @@ +# pylint: disable=missing-module-docstring, missing-function-docstring, redefined-outer-name + +from unittest.mock import patch, MagicMock +import pytest +from thsensai.web_scraper import scrape_web + + +@pytest.fixture +def mock_loader(): + with patch("thsensai.web_scraper.WebBaseLoader") as mock_loader: + yield mock_loader + + +def test_scrape_web_success(mock_loader): + mock_doc = MagicMock() + mock_doc.page_content = "Sample content" + mock_loader.return_value.load.return_value = [mock_doc] + + urls = ("https://example.com",) + css_selectors = ("content",) + docs = scrape_web(urls, css_selectors) + + assert len(docs) == 1 + assert docs[0].page_content == "Sample content" + + +def test_scrape_web_empty_content(mock_loader): + mock_doc = MagicMock() + mock_doc.page_content = "" + mock_doc.metadata = {"source": "https://example.com"} + mock_loader.return_value.load.return_value = [mock_doc] + + urls = ("https://example.com",) + css_selectors = ("content",) + + with pytest.raises( + ValueError, + match="No content extracted from the following sources: https://example.com", + ): + scrape_web(urls, css_selectors) + + +def test_scrape_web_error(mock_loader): + mock_loader.return_value.load.side_effect = Exception("Scraping error") + + urls = ("https://example.com",) + css_selectors = ("content",) + + with pytest.raises( + ValueError, match="An error occurred during web scraping: Scraping error" + ): + scrape_web(urls, css_selectors)