diff --git a/bench/app.py b/bench/app.py index 322afa54a..cf4378985 100755 --- a/bench/app.py +++ b/bench/app.py @@ -24,6 +24,7 @@ from bench.utils import ( UNSET_ARG, fetch_details_from_tag, + get_app_cache_extract_filter, get_available_folder_name, get_bench_cache_path, is_bench_directory, @@ -343,7 +344,12 @@ def get_cached(self) -> bool: click.secho(f"Getting {self.app_name} from cache", fg="yellow") with tarfile.open(cache_path, mode) as tar: - tar.extractall(app_path.parent) + try: + tar.extractall(app_path.parent, filter=get_app_cache_extract_filter()) + except Exception: + logger.exception(f"Cache extraction failed for {self.app_name}") + shutil.rmtree(app_path) + return False return True diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index bd07ec4b1..3fc2a7bb3 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -9,6 +9,7 @@ from glob import glob from pathlib import Path from shlex import split +from tarfile import TarInfo from typing import List, Optional, Tuple # imports - third party imports @@ -569,3 +570,35 @@ def get_cmd_from_sysargv(): break return cmd_from_ctx + + +def get_app_cache_extract_filter( + count_threshold: int = 10_000, + size_threshold: int = 1_000_000_000, +): # -> Callable[[TarInfo, str], TarInfo | None] + state = dict(count=0, size=0) + + AbsoluteLinkError = Exception + def data_filter(m: TarInfo, _:str) -> TarInfo: + return m + + if (sys.version_info.major == 3 and sys.version_info.minor > 7) or sys.version_info.major > 3: + from tarfile import data_filter, AbsoluteLinkError + + def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: + state["count"] += 1 + state["size"] += member.size + + if state["count"] > count_threshold: + raise RuntimeError(f"Number of entries exceeds threshold ({state['count']})") + + if state["size"] > size_threshold: + raise RuntimeError(f"Extracted size exceeds threshold ({state['size']})") + + try: + return data_filter(member, dest_path) + except AbsoluteLinkError: + # Links created by `frappe` after extraction + return None + + return filter_function