Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic metadata #357

Merged
merged 27 commits into from
May 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1c6c930
TaskResult: Add task
Apr 7, 2022
1fd98ad
TaskResult: convert to attrs
Apr 7, 2022
8445c87
TaskResult: new_tasks -> subtasks
Apr 7, 2022
2c8f798
Report: move `handler` attribute to the proper subclass
Apr 7, 2022
3b1676c
Report: make it possible to add non-error related reports
Apr 7, 2022
e8e5ba4
processing: store some metadata in reports
Apr 8, 2022
517bb77
processing: remove multiple input files support
Apr 8, 2022
0b831c9
processing: introduce `ProcessResult`
e3krisztian May 5, 2022
f1c2620
processing: process_files -> process_file
Apr 21, 2022
72178f4
remove unused function get_existing_extract_dirs
Apr 21, 2022
1f6c0b2
refactor/rename variables to better differentiate extraction and carv…
e3krisztian Apr 19, 2022
3c33785
Drop support for directory as input
e3krisztian Apr 21, 2022
5d2a082
processing: _process_one_file function is no longer needed
e3krisztian May 5, 2022
0abb454
write metadata result
Apr 25, 2022
829d750
CLI: optionally create JSON report file
e3krisztian Apr 25, 2022
ab97550
CLI: also get help with -h
e3krisztian Apr 25, 2022
df9aea8
Simplify command output capture
e3krisztian Apr 27, 2022
61a52e3
JSON output: handle bytes, and disregard JSON encoding failures
e3krisztian Apr 27, 2022
ebf1f28
Report recognized chunks
e3krisztian Apr 27, 2022
9c4b792
refactor: get_extract_dir_for_input -> ExtractionConfig.get_extract_d…
e3krisztian Apr 27, 2022
d13ee37
fix(tests): remove import conftest
e3krisztian Apr 28, 2022
60e3d6e
existing JSON report output file prevents run without --force
e3krisztian Apr 29, 2022
b17742b
Generate unique identifiers
e3krisztian Apr 29, 2022
1601c99
Add id-s to chunks, and chunk_id-s to tasks
e3krisztian Apr 29, 2022
6cff605
feat(metadata): report unknown chunks
e3krisztian May 2, 2022
ded0daf
github: run tests verbosely
e3krisztian May 6, 2022
5a7ada4
Add tests for the processing report
e3krisztian Apr 28, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-publish-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
run: docker run --rm ${{ env.DOCKER_IMAGE }} --show-external-dependencies

- name: Check unblob - run for a file with --verbose
run: docker run --rm -v "$(pwd)"/tests/integration/archive/zip/regular:/test ${{ env.DOCKER_IMAGE }} -v -e /tmp /test
run: docker run --rm -v "$(pwd)"/tests/integration/archive/zip/regular:/test ${{ env.DOCKER_IMAGE }} -v -e /tmp /test/__input__/apple.zip

- name: Build and push
if: ${{ github.event_name == 'push' && github.ref_name == 'main' }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ jobs:
uses: ./.github/actions/setup-git-lfs

- name: Run pytest
run: poetry run pytest
run: poetry run pytest -vvv
2 changes: 1 addition & 1 deletion default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ self // {

# romfs sample file contains some funky symlinks which get
# removed when source is copyed to the nix store.
pytest -k "not test_all_handlers[filesystem.romfs]" --no-cov
pytest -vvv -k "not test_all_handlers[filesystem.romfs]" --no-cov
)
'';
});
Expand Down
18 changes: 9 additions & 9 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from unblob.extractors import Command
from unblob.models import Handler, HexString
from pathlib import Path

import pytest

from unblob.models import Task, TaskResult
from unblob.testing import ( # noqa: F401 (module imported but unused)
configure_logging,
extraction_config,
)


class TestHandler(Handler):
NAME = "test_handler"
PATTERNS = [HexString("21 3C")]
EXTRACTOR = Command("testcommand", "for", "test", "handler")

def calculate_chunk(self, *args, **kwargs):
pass
@pytest.fixture
def task_result():
task = Task(path=Path("/nonexistent"), depth=0, chunk_id="")
return TaskResult(task)
2 changes: 0 additions & 2 deletions tests/extractors/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def test_command_execution_failure(tmpdir: Path):
except ExtractError as e:
assert list(e.reports) == [
ExtractCommandFailedReport(
handler=None,
command=mock.ANY,
stdout=b"stdout",
stderr=b"stderr",
Expand All @@ -70,7 +69,6 @@ def test_command_not_found(tmpdir: Path):
except ExtractError as e:
assert list(e.reports) == [
ExtractorDependencyNotFoundReport(
handler=None,
dependencies=["this-command-should-not-exist-in-any-system"],
)
]
41 changes: 20 additions & 21 deletions tests/test_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import pytest

from unblob.models import File, Handler, Regex, ValidChunk
from unblob.processing import ExtractionConfig, process_files
from unblob.processing import ExtractionConfig, process_file
from unblob.testing import check_result

_ZIP_CONTENT = b"good file"
# replacing _ZIP_CONTENT with _DAMAGED_ZIP_CONTENT will result in CRC error at unpacking time
Expand All @@ -33,10 +34,8 @@ def wrapzip(filename: str, content: bytes) -> bytes:


@pytest.fixture()
def input_dir(tmp_path: Path):
input_dir = tmp_path / "input"
input_dir.mkdir()
return input_dir
def input_file(tmp_path: Path):
return tmp_path / "input_file"


@pytest.fixture()
Expand All @@ -46,41 +45,41 @@ def output_dir(tmp_path):
return output_dir


def test_remove_extracted_chunks(input_dir: Path, output_dir: Path):
(input_dir / "blob").write_bytes(ZIP_BYTES)
def test_remove_extracted_chunks(input_file: Path, output_dir: Path):
input_file.write_bytes(ZIP_BYTES)
config = ExtractionConfig(
extract_root=output_dir,
entropy_depth=0,
)

all_reports = process_files(config, input_dir)
all_reports = process_file(config, input_file)
assert list(output_dir.glob("**/*.zip")) == []
assert all_reports == [], f"Unexpected error reports: {all_reports}"
check_result(all_reports)


def test_keep_all_problematic_chunks(input_dir: Path, output_dir: Path):
(input_dir / "blob").write_bytes(DAMAGED_ZIP_BYTES)
def test_keep_all_problematic_chunks(input_file: Path, output_dir: Path):
input_file.write_bytes(DAMAGED_ZIP_BYTES)
config = ExtractionConfig(
extract_root=output_dir,
entropy_depth=0,
)

all_reports = process_files(config, input_dir)
all_reports = process_file(config, input_file)
# damaged zip file should not be removed
assert all_reports != [], "Unexpectedly no errors found!"
assert all_reports.errors != [], "Unexpectedly no errors found!"
assert list(output_dir.glob("**/*.zip"))


def test_keep_all_unknown_chunks(input_dir: Path, output_dir: Path):
(input_dir / "blob").write_bytes(b"unknown1" + ZIP_BYTES + b"unknown2")
def test_keep_all_unknown_chunks(input_file: Path, output_dir: Path):
input_file.write_bytes(b"unknown1" + ZIP_BYTES + b"unknown2")
config = ExtractionConfig(
extract_root=output_dir,
entropy_depth=0,
)

all_reports = process_files(config, input_dir)
all_reports = process_file(config, input_file)
assert list(output_dir.glob("**/*.unknown"))
assert all_reports == [], f"Unexpected error reports: {all_reports}"
check_result(all_reports)


class _HandlerWithNullExtractor(Handler):
Expand All @@ -92,13 +91,13 @@ def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk:
return ValidChunk(start_offset=start_offset, end_offset=start_offset + 1)


def test_keep_chunks_with_null_extractor(input_dir: Path, output_dir: Path):
(input_dir / "blob").write_text("some text")
def test_keep_chunks_with_null_extractor(input_file: Path, output_dir: Path):
input_file.write_bytes(b"some text")
config = ExtractionConfig(
extract_root=output_dir,
entropy_depth=0,
handlers=(_HandlerWithNullExtractor,),
)
all_reports = process_files(config, input_dir)
all_reports = process_file(config, input_file)
assert list(output_dir.glob("**/*.null"))
assert all_reports == [], f"Unexpected error reports: {all_reports}"
check_result(all_reports)
43 changes: 27 additions & 16 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,23 @@

import pytest
from click.testing import CliRunner
from conftest import TestHandler

import unblob.cli
from unblob.extractors import Command
from unblob.handlers import BUILTIN_HANDLERS
from unblob.models import Handler, HexString
from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, ExtractionConfig


class TestHandler(Handler):
NAME = "test_handler"
PATTERNS = [HexString("21 3C")]
EXTRACTOR = Command("testcommand", "for", "test", "handler")

def calculate_chunk(self, *args, **kwargs):
pass


class ExistingCommandHandler(TestHandler):
EXTRACTOR = Command("sh", "something")

Expand Down Expand Up @@ -86,7 +95,7 @@ def test_help(params):
result = runner.invoke(unblob.cli.cli, params)
assert result.exit_code == 0
# NOTE: In practice, it writes "Usage: unblob ...", this is done in the `cli.main` with `click.make_context`
assert result.output.startswith("Usage: cli [OPTIONS] FILES...")
assert result.output.startswith("Usage: cli [OPTIONS] FILE")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -118,19 +127,19 @@ def test_without_file(params: List[str]):
runner = CliRunner()
result = runner.invoke(unblob.cli.cli, params)
assert result.exit_code == 2
assert "Missing argument 'FILES...'" in result.output
assert "Missing argument 'FILE'" in result.output


def test_non_existing_file(tmp_path: Path):
runner = CliRunner()
path = Path("non/existing/path/54")
result = runner.invoke(unblob.cli.cli, ["--extract-dir", str(tmp_path), str(path)])
assert result.exit_code == 2
assert "Invalid value for 'FILES...'" in result.output
assert f"Path '{str(path)}' does not exist" in result.output
assert "Invalid value for 'FILE'" in result.output
assert f"File '{str(path)}' does not exist" in result.output


def test_empty_dir_as_file(tmp_path: Path):
def test_dir_for_file(tmp_path: Path):
runner = CliRunner()
out_path = tmp_path.joinpath("out")
out_path.mkdir()
Expand All @@ -139,7 +148,7 @@ def test_empty_dir_as_file(tmp_path: Path):
result = runner.invoke(
unblob.cli.cli, ["--extract-dir", str(out_path), str(in_path)]
)
assert result.exit_code == 0
assert result.exit_code != 0


@pytest.mark.parametrize(
Expand Down Expand Up @@ -172,13 +181,14 @@ def test_archive_success(
/ "archive"
/ "zip"
/ "regular"
/ "__input__/"
/ "__input__"
/ "apple.zip"
)
process_files_mock = mock.MagicMock()
process_file_mock = mock.MagicMock()
logger_config_mock = mock.MagicMock()
new_params = params + ["--extract-dir", str(tmp_path), str(in_path)]
with mock.patch.object(
unblob.cli, "process_files", process_files_mock
unblob.cli, "process_file", process_file_mock
), mock.patch.object(unblob.cli, "configure_logger", logger_config_mock):
result = runner.invoke(unblob.cli.cli, new_params)
assert result.exit_code == 0
Expand All @@ -192,7 +202,7 @@ def test_archive_success(
process_num=expected_process_num,
handlers=BUILTIN_HANDLERS,
)
process_files_mock.assert_called_once_with(config, in_path)
process_file_mock.assert_called_once_with(config, in_path, None)
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)


Expand All @@ -214,17 +224,18 @@ def test_keep_extracted_chunks(
/ "archive"
/ "zip"
/ "regular"
/ "__input__/"
/ "__input__"
/ "apple.zip"
)
params = args + ["--extract-dir", str(tmp_path), str(in_path)]

process_files_mock = mock.MagicMock()
with mock.patch.object(unblob.cli, "process_files", process_files_mock):
process_file_mock = mock.MagicMock()
with mock.patch.object(unblob.cli, "process_file", process_file_mock):
result = runner.invoke(unblob.cli.cli, params)

assert result.exit_code == 0
process_files_mock.assert_called_once()
process_file_mock.assert_called_once()
assert (
process_files_mock.call_args.args[0].keep_extracted_chunks
process_file_mock.call_args.args[0].keep_extracted_chunks
== keep_extracted_chunks
), fail_message
Loading