Skip to content

Commit

Permalink
existing JSON report output file prevents run without --force
Browse files Browse the repository at this point in the history
  • Loading branch information
e3krisztian committed Apr 29, 2022
1 parent 0afe366 commit f5e4bce
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 25 deletions.
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def test_archive_success(
process_num=expected_process_num,
handlers=BUILTIN_HANDLERS,
)
process_file_mock.assert_called_once_with(config, in_path)
process_file_mock.assert_called_once_with(config, in_path, None)
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)


Expand Down
14 changes: 2 additions & 12 deletions unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(
"--force",
is_flag=True,
show_default=True,
help="Force extraction removing previously extracted files.",
help="Force extraction even if outputs already exist (they are removed).",
)
@click.option(
"-d",
Expand Down Expand Up @@ -201,17 +201,7 @@ def cli(
)

logger.info("Start processing file", file=file)
results = process_file(config, file)

if report_file:
try:
report_file.write_text(results.to_json())
except IOError as e:
logger.error("Can not write JSON report", path=report_file, msg=str(e))
except Exception:
logger.exception("Can not write JSON report", path=report_file)
else:
logger.info("JSON report written", path=report_file)
results = process_file(config, file, report_file)

return results

Expand Down
83 changes: 71 additions & 12 deletions unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import statistics
from operator import attrgetter
from pathlib import Path
from typing import Iterable, List
from typing import Iterable, List, Optional

import attr
import plotext as plt
Expand All @@ -30,6 +30,7 @@
from .report import (
ExtractDirectoryExistsReport,
FileMagicReport,
Report,
StatReport,
UnknownError,
)
Expand Down Expand Up @@ -79,19 +80,36 @@ def get_extract_dir_for(self, path: Path) -> Path:


@terminate_gracefully
def process_file(config: ExtractionConfig, path: Path) -> ProcessResult:
if not path.is_file():
raise ValueError("path is not a file", path)

def process_file(
config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None
) -> ProcessResult:
task = Task(
path=path,
path=input_path,
depth=0,
)

errors = check_extract_directory(task, config)
if not input_path.is_file():
raise ValueError("input_path is not a file", input_path)

errors = prepare_extract_dir(config, input_path)
if not prepare_report_file(config, report_file):
logger.error(
"File not processed, as report could not be written", file=input_path
)
return ProcessResult({})

if errors:
return ProcessResult(errors)
process_result = ProcessResult({task: TaskResult(task, errors)})
else:
process_result = _process_task(config, task)

if report_file:
write_json_report(report_file, process_result)

return process_result


def _process_task(config: ExtractionConfig, task: Task) -> ProcessResult:
processor = Processor(config)
aggregated_result = ProcessResult()

Expand All @@ -113,21 +131,62 @@ def process_result(pool, result):
return aggregated_result


def check_extract_directory(task: Task, config: ExtractionConfig):
errors = {}
def prepare_extract_dir(config: ExtractionConfig, input_file: Path) -> List[Report]:
errors = []

extract_dir = config.get_extract_dir_for(task.path)
extract_dir = config.get_extract_dir_for(input_file)
if extract_dir.exists():
if config.force_extract:
logger.info("Removing extract dir", path=extract_dir)
shutil.rmtree(extract_dir)
else:
report = ExtractDirectoryExistsReport(path=extract_dir)
logger.error("Extraction directory already exist", path=str(extract_dir))
errors[task] = TaskResult(task, [report])
errors.append(report)

return errors


def prepare_report_file(config: ExtractionConfig, report_file: Optional[Path]) -> bool:
"""An in advance preparation to prevent report writing failing after an expensive extraction.
Returns True if report writing is not known in advance to fail.
"""
if not report_file:
# we will not write report at all
return True

if report_file.exists() and not config.force_extract:
logger.error("Report file exists and --force not specified.", path=report_file)
return False

try:
if report_file.exists():
logger.info("Removing existing report file", path=report_file)
report_file.write_text("")
report_file.unlink()
except IOError as e:
logger.error(
"There is a problem with the report file",
path=report_file,
msg=str(e),
)
return False

return True


def write_json_report(report_file: Path, process_result: ProcessResult):
try:
report_file.write_text(process_result.to_json())
except IOError as e:
logger.error("Can not write JSON report", path=report_file, msg=str(e))
except Exception:
logger.exception("Can not write JSON report", path=report_file)
else:
logger.info("JSON report written", path=report_file)


class Processor:
def __init__(self, config: ExtractionConfig):
self._config = config
Expand Down

0 comments on commit f5e4bce

Please sign in to comment.