diff --git a/.github/workflows/on-push.yml b/.github/workflows/on-push.yml index 9a782d0..8f9e5c5 100644 --- a/.github/workflows/on-push.yml +++ b/.github/workflows/on-push.yml @@ -1,8 +1,32 @@ name: Linting & Test on: - push + push + +env: + PYTHON_VERSION: 3.9.18 + POETRY_VERSION: 1.6.1 jobs: - call-linting-from-utils: - uses: aarhusstadsarkiv/acautils/.github/workflows/linting_ruff-mypy.yml@main \ No newline at end of file + linting: + uses: aarhusstadsarkiv/acautils/.github/workflows/linting_ruff-mypy.yml@main + pytest: + name: pytest + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + - uses: abatilo/actions-poetry@v2 + with: + poetry-version: ${{ env.POETRY_VERSION }} + - uses: actions/setup-go@v4 + - run: poetry install + - run: go install github.com/richardlehane/siegfried/cmd/sf@latest + - name: Unit test + env: + GOPATH: /home/runner/go + run: | + poetry run coverage run -m pytest + poetry run coverage report -m --fail-under=80 --skip-empty --skip-covered diff --git a/.gitignore b/.gitignore index 846de60..2398bca 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,5 @@ cython_debug/ /*.db .ruff_cache/ + +/tests/tmp/ diff --git a/acacore/__version__.py b/acacore/__version__.py index 493f741..260c070 100644 --- a/acacore/__version__.py +++ b/acacore/__version__.py @@ -1 +1 @@ -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/acacore/database/column.py b/acacore/database/column.py index c133906..f22aea8 100644 --- a/acacore/database/column.py +++ b/acacore/database/column.py @@ -56,7 +56,9 @@ def _schema_to_column(name: str, schema: dict, defs: Optional[dict[str, dict]] = sql_type = _sql_schema_types.get(schema_type, None) type_name: str = schema.get("format", schema_type) - if type_name in _sql_schema_type_converters: + if schema.get("enum"): + to_entry, from_entry = lambda e: e.value, str + elif type_name in _sql_schema_type_converters: to_entry, from_entry = _sql_schema_type_converters[type_name] else: raise TypeError(f"Cannot recognize type from schema {schema!r}") diff --git a/acacore/database/files_db.py b/acacore/database/files_db.py index b58072d..254d9bc 100644 --- a/acacore/database/files_db.py +++ b/acacore/database/files_db.py @@ -126,6 +126,7 @@ def init(self): self.files.create(True) self.metadata.create(True) self.converted_files.create(True) + self.history.create(True) self.not_converted.create(True) self.identification_warnings.create(True) self.signature_count.create(True) diff --git a/acacore/models/base.py b/acacore/models/base.py index e4562c4..a81a789 100644 --- a/acacore/models/base.py +++ b/acacore/models/base.py @@ -1,4 +1,3 @@ -import json from pathlib import Path from typing import Any @@ -12,4 +11,4 @@ def dump(self, to_file: Path) -> None: to_file.write_text(super().model_dump_json(), encoding="utf-8") def encode(self) -> Any: # noqa: ANN401 - return json.loads(super().model_dump_json()) + return super().model_dump(mode="json") diff --git a/acacore/models/file.py b/acacore/models/file.py index 6e88225..dd159db 100644 --- a/acacore/models/file.py +++ b/acacore/models/file.py @@ -20,10 +20,10 @@ class Action(Enum): - CONVERT = "Convertool: To convert." - REPLACE = "Convertool: Replace with template. File is not preservable." - MANUAL = "Manual: File should be converted manually. [info about the manual conversion from reference_files]." - RENAME = "Renamer: File has extension mismatch. Should be renamed" + CONVERT = "CONVERT" # To convert. + REPLACE = "REPLACE" # Replace with template. File is not preservable. + MANUAL = "MANUAL" # File should be converted manually. [info about the manual conversion from reference_files]. + RENAME = "RENAME" # File has extension mismatch. Should be renamed # ----------------------------------------------------------------------------- @@ -150,14 +150,11 @@ def size_fmt(self) -> str: str File size in human-readable format. """ - return str(size_fmt(self.get_absolute_path().stat().st_size)) + return size_fmt(self.get_absolute_path().stat().st_size) def get_bof_and_eof(self) -> Tuple[str, str]: """Get the first and last kilobyte of the file. - Args: - file (Path): Path to file - Returns: Tuple[str,str]: BOF and then EOF as `str`. """ @@ -172,7 +169,7 @@ def get_bof_and_eof(self) -> Tuple[str, str]: # File too small :) file_bytes.seek(-file_bytes.tell(), 2) eof = file_bytes.read(1024).hex() - return (bof, eof) + return bof, eof class ArchiveFile(Identification, File): diff --git a/acacore/utils/functions.py b/acacore/utils/functions.py index 6aa828e..020f8b9 100644 --- a/acacore/utils/functions.py +++ b/acacore/utils/functions.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Callable from typing import Optional from typing import TypeVar @@ -18,3 +19,14 @@ def or_none(func: Callable[[T], R]) -> Callable[[T], Optional[R]]: object: A function of type (T) -> R | None. """ return lambda x: None if x is None else func(x) + + +def rm_tree(path: Path): + if not path.is_dir(): + path.unlink(missing_ok=True) + return + + for item in path.iterdir(): + rm_tree(item) if item.is_dir() else item.unlink(missing_ok=True) + + path.rmdir() diff --git a/acacore/utils/io.py b/acacore/utils/io.py index 75904ad..7eb0f78 100644 --- a/acacore/utils/io.py +++ b/acacore/utils/io.py @@ -9,13 +9,12 @@ def size_fmt(size: float) -> str: Parameters ---------- - size: float + size: The file size in bytes. Returns: ------- - str - Human readable string representing size in binary multiples. + Human readable string representing size in binary multiples. """ unit: int = int(log2(size) // 10) unit = unit if unit < len(binary_units) else len(binary_units) - 1 diff --git a/poetry.lock b/poetry.lock index dd49ea2..e802b13 100644 --- a/poetry.lock +++ b/poetry.lock @@ -82,6 +82,95 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "coverage" +version = "7.3.2" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d872145f3a3231a5f20fd48500274d7df222e291d90baa2026cc5152b7ce86bf"}, + {file = "coverage-7.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:310b3bb9c91ea66d59c53fa4989f57d2436e08f18fb2f421a1b0b6b8cc7fffda"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f47d39359e2c3779c5331fc740cf4bce6d9d680a7b4b4ead97056a0ae07cb49a"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa72dbaf2c2068404b9870d93436e6d23addd8bbe9295f49cbca83f6e278179c"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:beaa5c1b4777f03fc63dfd2a6bd820f73f036bfb10e925fce067b00a340d0f3f"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dbc1b46b92186cc8074fee9d9fbb97a9dd06c6cbbef391c2f59d80eabdf0faa6"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:315a989e861031334d7bee1f9113c8770472db2ac484e5b8c3173428360a9148"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d1bc430677773397f64a5c88cb522ea43175ff16f8bfcc89d467d974cb2274f9"}, + {file = "coverage-7.3.2-cp310-cp310-win32.whl", hash = "sha256:a889ae02f43aa45032afe364c8ae84ad3c54828c2faa44f3bfcafecb5c96b02f"}, + {file = "coverage-7.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0ba320de3fb8c6ec16e0be17ee1d3d69adcda99406c43c0409cb5c41788a611"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ac8c802fa29843a72d32ec56d0ca792ad15a302b28ca6203389afe21f8fa062c"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:89a937174104339e3a3ffcf9f446c00e3a806c28b1841c63edb2b369310fd074"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e267e9e2b574a176ddb983399dec325a80dbe161f1a32715c780b5d14b5f583a"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2443cbda35df0d35dcfb9bf8f3c02c57c1d6111169e3c85fc1fcc05e0c9f39a3"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4175e10cc8dda0265653e8714b3174430b07c1dca8957f4966cbd6c2b1b8065a"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf38419fb1a347aaf63481c00f0bdc86889d9fbf3f25109cf96c26b403fda1"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c913b556a116b8d5f6ef834038ba983834d887d82187c8f73dec21049abd65c"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1981f785239e4e39e6444c63a98da3a1db8e971cb9ceb50a945ba6296b43f312"}, + {file = "coverage-7.3.2-cp311-cp311-win32.whl", hash = "sha256:43668cabd5ca8258f5954f27a3aaf78757e6acf13c17604d89648ecc0cc66640"}, + {file = "coverage-7.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10c39c0452bf6e694511c901426d6b5ac005acc0f78ff265dbe36bf81f808a2"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4cbae1051ab791debecc4a5dcc4a1ff45fc27b91b9aee165c8a27514dd160836"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12d15ab5833a997716d76f2ac1e4b4d536814fc213c85ca72756c19e5a6b3d63"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c7bba973ebee5e56fe9251300c00f1579652587a9f4a5ed8404b15a0471f216"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe494faa90ce6381770746077243231e0b83ff3f17069d748f645617cefe19d4"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6e9589bd04d0461a417562649522575d8752904d35c12907d8c9dfeba588faf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d51ac2a26f71da1b57f2dc81d0e108b6ab177e7d30e774db90675467c847bbdf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99b89d9f76070237975b315b3d5f4d6956ae354a4c92ac2388a5695516e47c84"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fa28e909776dc69efb6ed975a63691bc8172b64ff357e663a1bb06ff3c9b589a"}, + {file = "coverage-7.3.2-cp312-cp312-win32.whl", hash = "sha256:289fe43bf45a575e3ab10b26d7b6f2ddb9ee2dba447499f5401cfb5ecb8196bb"}, + {file = "coverage-7.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7dbc3ed60e8659bc59b6b304b43ff9c3ed858da2839c78b804973f613d3e92ed"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f94b734214ea6a36fe16e96a70d941af80ff3bfd716c141300d95ebc85339738"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:af3d828d2c1cbae52d34bdbb22fcd94d1ce715d95f1a012354a75e5913f1bda2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630b13e3036e13c7adc480ca42fa7afc2a5d938081d28e20903cf7fd687872e2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9eacf273e885b02a0273bb3a2170f30e2d53a6d53b72dbe02d6701b5296101c"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f17966e861ff97305e0801134e69db33b143bbfb36436efb9cfff6ec7b2fd9"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b4275802d16882cf9c8b3d057a0839acb07ee9379fa2749eca54efbce1535b82"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:72c0cfa5250f483181e677ebc97133ea1ab3eb68645e494775deb6a7f6f83901"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb536f0dcd14149425996821a168f6e269d7dcd2c273a8bff8201e79f5104e76"}, + {file = "coverage-7.3.2-cp38-cp38-win32.whl", hash = "sha256:307adb8bd3abe389a471e649038a71b4eb13bfd6b7dd9a129fa856f5c695cf92"}, + {file = "coverage-7.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:88ed2c30a49ea81ea3b7f172e0269c182a44c236eb394718f976239892c0a27a"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b631c92dfe601adf8f5ebc7fc13ced6bb6e9609b19d9a8cd59fa47c4186ad1ce"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d3d9df4051c4a7d13036524b66ecf7a7537d14c18a384043f30a303b146164e9"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f7363d3b6a1119ef05015959ca24a9afc0ea8a02c687fe7e2d557705375c01f"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f11cc3c967a09d3695d2a6f03fb3e6236622b93be7a4b5dc09166a861be6d25"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:149de1d2401ae4655c436a3dced6dd153f4c3309f599c3d4bd97ab172eaf02d9"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3a4006916aa6fee7cd38db3bfc95aa9c54ebb4ffbfc47c677c8bba949ceba0a6"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9028a3871280110d6e1aa2df1afd5ef003bab5fb1ef421d6dc748ae1c8ef2ebc"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f805d62aec8eb92bab5b61c0f07329275b6f41c97d80e847b03eb894f38d083"}, + {file = "coverage-7.3.2-cp39-cp39-win32.whl", hash = "sha256:d1c88ec1a7ff4ebca0219f5b1ef863451d828cccf889c173e1253aa84b1e07ce"}, + {file = "coverage-7.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4767da59464bb593c07afceaddea61b154136300881844768037fd5e859353f"}, + {file = "coverage-7.3.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:ae97af89f0fbf373400970c0a21eef5aa941ffeed90aee43650b81f7d7f47637"}, + {file = "coverage-7.3.2.tar.gz", hash = "sha256:be32ad29341b0170e795ca590e1c07e81fc061cb5b10c74ce7203491484404ef"}, +] + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "exceptiongroup" +version = "1.1.3" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -130,6 +219,21 @@ files = [ docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "pydantic" version = "2.4.2" @@ -267,6 +371,28 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "ruff" version = "0.0.286" @@ -338,4 +464,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "0ea02aad62004aa8056fd027dab074af00d2b20bbf73c72cc64a19d10aafbb81" +content-hash = "96e596ec609dcd7cd6925e4a9de8a841634930cf0dae33defb9e97e31d54b3f7" diff --git a/pyproject.toml b/pyproject.toml index e754f2c..7947a23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "acacore" -version = "0.3.0" +version = "0.3.1" description = "" authors = ["Matteo Campinoti "] license = "GPL-3.0" @@ -10,11 +10,13 @@ readme = "README.md" python = "^3.9" pydantic = "^2.4.2" tqdm = "^4.66.1" +coverage = "^7.3.2" [tool.poetry.group.dev.dependencies] ruff = "^0.0.286" black = "^23.7.0" +pytest = "^7.4.3" [build-system] requires = ["poetry-core"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..39c433b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,32 @@ +from json import loads +from pathlib import Path + +import pytest + +from acacore.utils.functions import rm_tree + + +@pytest.fixture(scope="session") +def test_folder() -> Path: + return Path(__file__).parent + + +@pytest.fixture(scope="session") +def temp_folder(test_folder: Path) -> Path: + return test_folder / "tmp" + + +@pytest.fixture(scope="session") +def test_files(test_folder: Path) -> Path: + return test_folder / "files" + + +@pytest.fixture(scope="session") +def test_files_data(test_files: Path) -> dict[str, dict]: + return loads(test_files.joinpath("files.json").read_text()) + + +@pytest.fixture(autouse=True, scope="session") +def _pre_test(temp_folder: Path): + rm_tree(temp_folder) + temp_folder.mkdir(parents=True, exist_ok=True) diff --git a/tests/file_action_enum_tests.py b/tests/file_action_enum_tests.py deleted file mode 100644 index d3ac8e0..0000000 --- a/tests/file_action_enum_tests.py +++ /dev/null @@ -1,51 +0,0 @@ -import unittest -from uuid import uuid4 - -from acacore.models.file import Action -from acacore.models.file import File - - -class TestFileActionEnum(unittest.TestCase): - def setUp(self): - self.file = File( - id=1, - uuid=uuid4(), - checksum="abc123", - puid="fmt/18", - relative_path="test.txt", - is_binary=False, - file_size_in_bytes=1024, - signature="test", - ) - - def test_action_enum_values(self): - assert Action.CONVERT.value == "Convertool: To convert." - assert Action.REPLACE.value == "Convertool: Replace with template. File is not preservable." - assert Action.MANUAL.value == ( - "Manual: File should be converted manually. [info about the manual conversion " "from reference_files]." - ) - assert Action.RENAME.value == "Renamer: File has extension mismatch. Should be renamed" - - def test_file_action(self): - self.file.action = Action.CONVERT - assert self.file.action == Action.CONVERT - assert self.file.action.value == "Convertool: To convert." - - self.file.action = Action.REPLACE - assert self.file.action == Action.REPLACE - assert self.file.action.value == "Convertool: Replace with template. File is not preservable." - - self.file.action = Action.MANUAL - assert self.file.action == Action.MANUAL - assert ( - self.file.action.value - == "Manual: File should be converted manually. [info about the manual conversion from reference_files]." - ) - - self.file.action = Action.RENAME - assert self.file.action == Action.RENAME - assert self.file.action.value == "Renamer: File has extension mismatch. Should be renamed" - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/files/files.json b/tests/files/files.json new file mode 100644 index 0000000..051d01b --- /dev/null +++ b/tests/files/files.json @@ -0,0 +1,98 @@ +{ + "json.json": { + "filesize": 355, + "errors": "", + "matches": { + "ns": "pronom", + "id": "fmt/817", + "format": "JSON Data Interchange Format", + "version": "", + "mime": "application/json", + "match_class": "", + "basis": ["extension match json"], + "warning": ["match on extension only"], + "URI": null, + "permalink": null + } + }, + "lwp.lwp": { + "filesize": 19172, + "errors": "", + "matches": { + "ns": "pronom", + "id": "x-fmt/340", + "format": "Lotus WordPro Document", + "version": "96", + "mime": "application/lwp", + "match_class": "Word Processor", + "basis": ["extension match lwp", "byte match at 0, 32"], + "warning": [], + "URI": null, + "permalink": null + } + }, + "mp3.mp3": { + "filesize": 109374, + "errors": "", + "matches": { + "ns": "pronom", + "id": "fmt/134", + "format": "MPEG 1/2 Audio Layer 3", + "version": "", + "mime": "audio/mpeg", + "match_class": "Audio", + "basis": ["extension match mp3", "byte match at 0, 1521 (signature 5/9)"], + "warning": [], + "URI": null, + "permalink": null + } + }, + "mp4.mp4": { + "filesize": 146693, + "errors": "", + "matches": { + "ns": "pronom", + "id": "fmt/199", + "format": "MPEG-4 Media File", + "version": "", + "mime": "application/mp4", + "match_class": "Audio, Video", + "basis": ["extension match mp4", "byte match at [[4 8] [135072 4]]"], + "warning": [], + "URI": null, + "permalink": null + } + }, + "pdf.pdf": { + "filesize": 38468, + "errors": "", + "matches": { + "ns": "pronom", + "id": "fmt/276", + "format": "Acrobat PDF 1.7 - Portable Document Format", + "version": "1.7", + "mime": "application/pdf", + "match_class": "Page Description", + "basis": ["extension match pdf", "byte match at [[0 8] [38463 5]]"], + "warning": [], + "URI": null, + "permalink": null + } + }, + "txt.txt": { + "filesize": 2136, + "errors": "", + "matches": { + "ns": "pronom", + "id": "x-fmt/111", + "format": "Plain Text File", + "version": "", + "mime": "text/plain", + "match_class": "", + "basis": ["extension match txt", "text match UTF-8 Unicode"], + "warning": [], + "URI": null, + "permalink": null + } + } +} \ No newline at end of file diff --git a/tests/files/json.json b/tests/files/json.json new file mode 100644 index 0000000..d48b6e5 --- /dev/null +++ b/tests/files/json.json @@ -0,0 +1,10 @@ +{ + "test": { + "1": [ + "tests\\test_data\\gis_test\\_test\\docCollection\\1\\test.id", + "tests\\test_data\\gis_test\\_test\\docCollection\\2\\test.tab", + "tests\\test_data\\gis_test\\_test\\docCollection\\3\\test.map", + "tests\\test_data\\gis_test\\_test\\docCollection\\4\\test.dat" + ] + } +} diff --git a/tests/files/lwp.lwp b/tests/files/lwp.lwp new file mode 100644 index 0000000..dd80c03 Binary files /dev/null and b/tests/files/lwp.lwp differ diff --git a/tests/files/mp3.mp3 b/tests/files/mp3.mp3 new file mode 100644 index 0000000..ec34b75 Binary files /dev/null and b/tests/files/mp3.mp3 differ diff --git a/tests/files/mp4.mp4 b/tests/files/mp4.mp4 new file mode 100644 index 0000000..0ec1a5b Binary files /dev/null and b/tests/files/mp4.mp4 differ diff --git a/tests/files/pdf.pdf b/tests/files/pdf.pdf new file mode 100644 index 0000000..b4c4b6e Binary files /dev/null and b/tests/files/pdf.pdf differ diff --git a/tests/files/txt.txt b/tests/files/txt.txt new file mode 100644 index 0000000..9a72be6 --- /dev/null +++ b/tests/files/txt.txt @@ -0,0 +1,7 @@ +Vil bare teste at vores "warning" bliver genereret! + +Så her kommer noget lorem ipsum. + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris eleifend, neque a faucibus tincidunt, massa mauris cursus massa, et aliquam mauris mi sed quam. Aliquam erat volutpat. Nullam condimentum metus et lorem consequat mollis. Duis volutpat nisi ac ligula volutpat vulputate. Aliquam tempus leo risus, vitae placerat velit euismod sit amet. Nunc facilisis orci mollis nisi scelerisque dignissim. In in urna vitae neque fermentum venenatis vitae ac augue. Nulla pulvinar molestie imperdiet. Fusce eget hendrerit ex, ut pharetra arcu. Morbi lorem risus, hendrerit quis metus nec, consectetur volutpat ante. Sed bibendum auctor diam, quis pretium ligula congue in. In hac habitasse platea dictumst. Nullam a dolor sodales mauris fringilla volutpat vel vitae sem. Nulla fringilla dictum arcu, at feugiat nibh dapibus ac. Integer commodo, ligula laoreet eleifend molestie, enim risus vestibulum felis, at pulvinar purus libero luctus sapien. Donec ac dui ut quam lacinia efficitur eget ornare ipsum. Ut at sollicitudin enim. Morbi egestas tellus turpis, at fringilla nisi interdum vitae. Quisque orci turpis, scelerisque quis mauris nec, viverra fermentum ipsum. Vestibulum sit amet nibh sit amet est dignissim vulputate. Quisque volutpat augue augue, nec accumsan ante lacinia sed. Maecenas mattis nunc rutrum justo vehicula, a pellentesque felis fringilla. Nullam tempus lacus in lorem lacinia tempus. Morbi finibus magna vitae ullamcorper porta. Nullam nec velit libero. Sed in sapien lorem. Nullam eu cursus elit, nec interdum sem. Quisque accumsan dolor nibh, sed tempus purus rhoncus pulvinar. Integer mollis, mauris eget feugiat dictum, risus velit finibus nibh, eu fringilla tellus erat non magna. Praesent volutpat purus nec purus iaculis, quis tristique magna tempus. Cras nec faucibus ante. Nam faucibus eros enim, et iaculis orci hendrerit at. Donec leo nulla, feugiat nec nulla vel, fermentum luctus augue. Sed commodo ultricies lectus. Duis ut arcu at quam aliquam sollicitudin a at nunc. Etiam. + +Forhåbentlig får vi en warning! diff --git a/tests/test_database.py b/tests/test_database.py new file mode 100644 index 0000000..aecb6ce --- /dev/null +++ b/tests/test_database.py @@ -0,0 +1,147 @@ +from hashlib import sha256 +from pathlib import Path +from random import randint +from uuid import uuid4 + +import pytest + +from acacore.database import FileDB +from acacore.database import model_to_columns +from acacore.database.base import ModelTable +from acacore.database.base import ModelView +from acacore.models.file import Action +from acacore.models.file import ConvertedFile +from acacore.models.file import File +from acacore.models.history import HistoryEntry +from acacore.models.identification import SignatureCount +from acacore.models.metadata import Metadata + + +@pytest.fixture(scope="session") +def database_path(temp_folder: Path) -> Path: + return temp_folder / "files.db" + + +@pytest.fixture(scope="session") +def test_file(test_files: Path, test_files_data: dict[str, dict]) -> File: + filename, filedata = next(iter(test_files_data.items())) + file: Path = test_files / filename + return File( + id=randint(1, 10000), + uuid=uuid4(), + checksum=sha256(file.read_bytes()).hexdigest(), + puid=filedata["matches"]["id"], + relative_path=file.relative_to(test_files), + is_binary=True, + file_size_in_bytes=file.stat().st_size, + signature=filedata["matches"]["format"], + warning="; ".join(filedata["matches"]["warning"]), + action=Action.CONVERT, + ) + + +def test_database_classes(database_path: Path): + db: FileDB = FileDB(database_path) + + # Check tables classes + assert isinstance(db.files, ModelTable) + assert issubclass(db.files.model, File) + assert isinstance(db.metadata, ModelTable) + assert issubclass(db.metadata.model, Metadata) + assert isinstance(db.converted_files, ModelTable) + assert issubclass(db.converted_files.model, ConvertedFile) + assert isinstance(db.history, ModelTable) + assert issubclass(db.history.model, HistoryEntry) + + # Check views classes + assert isinstance(db.not_converted, ModelView) + assert issubclass(db.not_converted.model, File) + assert isinstance(db.identification_warnings, ModelView) + assert issubclass(db.identification_warnings.model, File) + assert isinstance(db.signature_count, ModelView) + assert issubclass(db.signature_count.model, SignatureCount) + + +# noinspection SqlResolve,SqlNoDataSourceInspection +def test_database_tables(database_path: Path): + database_path.unlink(missing_ok=True) + + db: FileDB = FileDB(database_path) + + # Create tables + db.init() + db.commit() + + # Test tables existence + tables: list[str] = [ + t for [t] in db.execute("select name from sqlite_master where type = 'table' and name != 'sqlite_master'") + ] + assert db.files.name in tables + assert db.metadata.name in tables + assert db.converted_files.name in tables + assert db.history.name in tables + + # Test views existence + views: list[str] = [ + t for [t] in db.execute("select name from sqlite_master where type = 'view' and name != 'sqlite_master'") + ] + assert db.not_converted.name in views + assert db.identification_warnings.name in views + assert db.signature_count.name in views + + +def test_database_columns(database_path: Path): + assert database_path.is_file() + + db: FileDB = FileDB(database_path) + + for table in (db.files, db.metadata, db.converted_files, db.history): + columns_from_model = tuple( + ( + column.name, + column.sql_type.lower(), + column.not_null, + ("null" if column.default is None else column.to_entry(column.default)) + if column.default is not Ellipsis + else None, + column.primary_key, + ) + for column in model_to_columns(table.model) + ) + columns_from_sql = tuple( + (column[1], column[2].lower(), bool(column[3]), column[4], bool(column[5])) + for column in db.execute(f'pragma table_info("{table.name}")').fetchall() + ) + assert columns_from_model == columns_from_sql + + +def test_insert_select(database_path: Path, test_file: File): + assert database_path.is_file() + + db: FileDB = FileDB(database_path) + test_file2 = test_file.model_copy(deep=True) + test_file2.id = test_file.id // 2 + test_file2.uuid = uuid4() + + db.files.insert(test_file) + db.files.insert(test_file2) + db.commit() + + cursor = db.files.select(where="uuid = ?", parameters=[str(test_file.uuid)]) + result_file = cursor.fetchone() + + assert issubclass(cursor.model, File) + assert cursor.table.name == db.files.name + assert test_file.model_dump() == result_file.model_dump() + + cursor = db.files.select(order_by=[("ID", "asc")]) + result_files = list(cursor) + assert len(result_files) == 2 + assert result_files[0].uuid == test_file2.uuid + assert result_files[1].uuid == test_file.uuid + + cursor = db.files.select(order_by=[("ID", "desc")]) + result_files = list(cursor) + assert len(result_files) == 2 + assert result_files[0].uuid == test_file.uuid + assert result_files[1].uuid == test_file2.uuid diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 0000000..f734704 --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,20 @@ +import pytest + +from acacore.exceptions.base import ACAException +from acacore.exceptions.files import FileCollectionError +from acacore.exceptions.files import FileParseError +from acacore.exceptions.files import IdentificationError + + +def test_subclasses(): + with pytest.raises(ACAException): + raise ACAException + + with pytest.raises(ACAException): + raise IdentificationError + + with pytest.raises(ACAException): + raise FileCollectionError + + with pytest.raises(ACAException): + raise FileParseError diff --git a/tests/test_siegfried.py b/tests/test_siegfried.py new file mode 100644 index 0000000..fbddc1b --- /dev/null +++ b/tests/test_siegfried.py @@ -0,0 +1,69 @@ +from os import environ +from pathlib import Path + +import pytest + +from acacore.exceptions.files import IdentificationError +from acacore.siegfried import Siegfried + + +@pytest.fixture() +def siegfried() -> Siegfried: + return Siegfried(Path(environ["GOPATH"], "bin", "sf"), "pronom.sig") + + +@pytest.fixture() +def siegfried_folder() -> Path: + return Path.home() / "siegfried" + + +def test_fail(siegfried: Siegfried): + with pytest.raises(IdentificationError): + siegfried.run("-version") + + +def test_update(siegfried: Siegfried, siegfried_folder: Path): + siegfried.update("pronom") + assert siegfried_folder.joinpath("pronom.sig").is_file() + assert siegfried.signature == "pronom.sig" + + siegfried.update("loc") + assert siegfried_folder.joinpath("loc.sig").is_file() + assert siegfried.signature == "loc.sig" + + siegfried.update("tika") + assert siegfried_folder.joinpath("tika.sig").is_file() + assert siegfried.signature == "tika.sig" + + siegfried.update("freedesktop") + assert siegfried_folder.joinpath("freedesktop.sig").is_file() + assert siegfried.signature == "freedesktop.sig" + + siegfried.update("pronom-tika-loc") + assert siegfried_folder.joinpath("pronom-tika-loc.sig").is_file() + assert siegfried.signature == "pronom-tika-loc.sig" + + siegfried.update("deluxe") + assert siegfried_folder.joinpath("deluxe.sig").is_file() + assert siegfried.signature == "deluxe.sig" + + # TODO: add archivematica + + +def test_identify(siegfried: Siegfried, test_files: Path, test_files_data: dict[str, dict]): + for filename, filedata in test_files_data.items(): + result = siegfried.identify(test_files / filename).files[0] + assert result.filesize == filedata["filesize"] + assert result.matches + assert result.matches[0].model_dump() == filedata["matches"] + assert result.best_match().model_dump() == filedata["matches"] + + +def test_identify_many(siegfried: Siegfried, test_files: Path, test_files_data: dict[str, dict]): + results = siegfried.identify_many([test_files / name for name in test_files_data]) + for [_, result], [filename, filedata] in zip(results, test_files_data.items()): + assert result.filename == str(test_files / filename) + assert result.filesize == filedata["filesize"] + assert result.matches + assert result.matches[0].model_dump() == filedata["matches"] + assert result.best_match().model_dump() == filedata["matches"] diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..316f17c --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,36 @@ +from pathlib import Path +from re import match + +from acacore.utils.functions import or_none +from acacore.utils.io import size_fmt +from acacore.utils.log import setup_logger + + +def test_functions(): + # or_none + func = or_none(lambda _: 5) + assert func(1) == 5 + assert func(None) is None + + +def test_io(): + # size_fmt + assert size_fmt(2) == "2.0 B" + assert size_fmt(2**10) == "1.0 KiB" + assert size_fmt(2**20) == "1.0 MiB" + assert size_fmt(2**30) == "1.0 GiB" + assert size_fmt(2**40) == "1.0 TiB" + assert size_fmt(2**12 + 128) == "4.1 KiB" + + +def test_log(temp_folder: Path): + log_file: Path = temp_folder / "test.log" + logger = setup_logger("test", log_file) + logger.info("test info message") + logger.warning("test warning message") + logger.error("test error message") + log_lines: list[str] = log_file.read_text().strip().splitlines() + + assert match(r"\d{4}-\d\d-\d\d \d\d:\d\d:\d\d INFO: test info message", log_lines[0]) + assert match(r"\d{4}-\d\d-\d\d \d\d:\d\d:\d\d WARNING: test warning message", log_lines[1]) + assert match(r"\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ERROR: test error message", log_lines[2])