existing JSON report output file prevents run without --force

onekey-sec · Apr 29, 2022 · f5e4bce · f5e4bce
1 parent 0afe366
commit f5e4bce
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 25 deletions.
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -202,7 +202,7 @@ def test_archive_success(
         process_num=expected_process_num,
         handlers=BUILTIN_HANDLERS,
     )
-    process_file_mock.assert_called_once_with(config, in_path)
+    process_file_mock.assert_called_once_with(config, in_path, None)
     logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)
 
 

diff --git a/unblob/cli.py b/unblob/cli.py
@@ -97,7 +97,7 @@ def __init__(
     "--force",
     is_flag=True,
     show_default=True,
-    help="Force extraction removing previously extracted files.",
+    help="Force extraction even if outputs already exist (they are removed).",
 )
 @click.option(
     "-d",
@@ -201,17 +201,7 @@ def cli(
     )
 
     logger.info("Start processing file", file=file)
-    results = process_file(config, file)
-
-    if report_file:
-        try:
-            report_file.write_text(results.to_json())
-        except IOError as e:
-            logger.error("Can not write JSON report", path=report_file, msg=str(e))
-        except Exception:
-            logger.exception("Can not write JSON report", path=report_file)
-        else:
-            logger.info("JSON report written", path=report_file)
+    results = process_file(config, file, report_file)
 
     return results
 

diff --git a/unblob/processing.py b/unblob/processing.py
@@ -3,7 +3,7 @@
 import statistics
 from operator import attrgetter
 from pathlib import Path
-from typing import Iterable, List
+from typing import Iterable, List, Optional
 
 import attr
 import plotext as plt
@@ -30,6 +30,7 @@
 from .report import (
     ExtractDirectoryExistsReport,
     FileMagicReport,
+    Report,
     StatReport,
     UnknownError,
 )
@@ -79,19 +80,36 @@ def get_extract_dir_for(self, path: Path) -> Path:
 
 
 @terminate_gracefully
-def process_file(config: ExtractionConfig, path: Path) -> ProcessResult:
-    if not path.is_file():
-        raise ValueError("path is not a file", path)
-
+def process_file(
+    config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None
+) -> ProcessResult:
     task = Task(
-        path=path,
+        path=input_path,
         depth=0,
     )
 
-    errors = check_extract_directory(task, config)
+    if not input_path.is_file():
+        raise ValueError("input_path is not a file", input_path)
+
+    errors = prepare_extract_dir(config, input_path)
+    if not prepare_report_file(config, report_file):
+        logger.error(
+            "File not processed, as report could not be written", file=input_path
+        )
+        return ProcessResult({})
+
     if errors:
-        return ProcessResult(errors)
+        process_result = ProcessResult({task: TaskResult(task, errors)})
+    else:
+        process_result = _process_task(config, task)
 
+    if report_file:
+        write_json_report(report_file, process_result)
+
+    return process_result
+
+
+def _process_task(config: ExtractionConfig, task: Task) -> ProcessResult:
     processor = Processor(config)
     aggregated_result = ProcessResult()
 
@@ -113,21 +131,62 @@ def process_result(pool, result):
     return aggregated_result
 
 
-def check_extract_directory(task: Task, config: ExtractionConfig):
-    errors = {}
+def prepare_extract_dir(config: ExtractionConfig, input_file: Path) -> List[Report]:
+    errors = []
 
-    extract_dir = config.get_extract_dir_for(task.path)
+    extract_dir = config.get_extract_dir_for(input_file)
     if extract_dir.exists():
         if config.force_extract:
+            logger.info("Removing extract dir", path=extract_dir)
             shutil.rmtree(extract_dir)
         else:
             report = ExtractDirectoryExistsReport(path=extract_dir)
             logger.error("Extraction directory already exist", path=str(extract_dir))
-            errors[task] = TaskResult(task, [report])
+            errors.append(report)
 
     return errors
 
 
+def prepare_report_file(config: ExtractionConfig, report_file: Optional[Path]) -> bool:
+    """An in advance preparation to prevent report writing failing after an expensive extraction.
+
+    Returns True if report writing is not known in advance to fail.
+    """
+    if not report_file:
+        # we will not write report at all
+        return True
+
+    if report_file.exists() and not config.force_extract:
+        logger.error("Report file exists and --force not specified.", path=report_file)
+        return False
+
+    try:
+        if report_file.exists():
+            logger.info("Removing existing report file", path=report_file)
+        report_file.write_text("")
+        report_file.unlink()
+    except IOError as e:
+        logger.error(
+            "There is a problem with the report file",
+            path=report_file,
+            msg=str(e),
+        )
+        return False
+
+    return True
+
+
+def write_json_report(report_file: Path, process_result: ProcessResult):
+    try:
+        report_file.write_text(process_result.to_json())
+    except IOError as e:
+        logger.error("Can not write JSON report", path=report_file, msg=str(e))
+    except Exception:
+        logger.exception("Can not write JSON report", path=report_file)
+    else:
+        logger.info("JSON report written", path=report_file)
+
+
 class Processor:
     def __init__(self, config: ExtractionConfig):
         self._config = config