diff --git a/requirements-dev.txt b/requirements-dev.txt index ed2d778770..d05997fcd7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -41,7 +41,7 @@ charset-normalizer==3.3.2 # via # -r requirements.txt # requests -clamd==1.0.2 +clamav-client==0.6.0 # via -r requirements.txt click==8.1.7 # via pip-tools diff --git a/requirements.in b/requirements.in index 4393cd65fe..14ab817211 100644 --- a/requirements.in +++ b/requirements.in @@ -5,7 +5,7 @@ amclient ammcpc git+https://github.com/artefactual-labs/bagit-python.git@902051d8410219f6c5f4ce6d43e5b272cf29e89b#egg=bagit brotli -clamd +clamav-client django-autoslug django-csp django-forms-bootstrap diff --git a/requirements.txt b/requirements.txt index 28c3e1fcc1..c88495ad5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ cffi==1.17.1 # via cryptography charset-normalizer==3.3.2 # via requests -clamd==1.0.2 +clamav-client==0.6.0 # via -r requirements.in cryptography==43.0.1 # via diff --git a/src/MCPClient/lib/archivematicaClientModules b/src/MCPClient/lib/archivematicaClientModules index 4dff8ceab2..1a17491124 100644 --- a/src/MCPClient/lib/archivematicaClientModules +++ b/src/MCPClient/lib/archivematicaClientModules @@ -23,7 +23,7 @@ assignfileuuids_v0.0 = assign_file_uuids bindpid_v0.0 = bind_pid removeunneededfiles_v0.0 = remove_unneeded_files -archivematicaclamscan_v0.0 = archivematica_clamscan +antivirus_v0.0 = antivirus createevent_v0.0 = create_event examinecontents_v0.0 = examine_contents identifydspacefiles_v0.0 = identify_dspace_files diff --git a/src/MCPClient/lib/clientScripts/antivirus.py b/src/MCPClient/lib/clientScripts/antivirus.py new file mode 100755 index 0000000000..6e14605544 --- /dev/null +++ b/src/MCPClient/lib/clientScripts/antivirus.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python +# This file is part of Archivematica. +# +# Copyright 2010-2017 Artefactual Systems Inc. +# +# Archivematica is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Archivematica is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Archivematica. If not, see . +import argparse +import multiprocessing +import os +import uuid + +import django + +django.setup() +from clamav_client.scanner import get_scanner +from custom_handlers import get_script_logger +from databaseFunctions import insertIntoEvents +from django.conf import settings as mcpclient_settings +from django.core.exceptions import ValidationError +from django.db import transaction +from main.models import Event +from main.models import File + +logger = get_script_logger("archivematica.mcp.client.clamscan") + + +def concurrent_instances(): + return multiprocessing.cpu_count() + + +def file_already_scanned(file_uuid): + return ( + file_uuid != "None" + and Event.objects.filter( + file_uuid_id=file_uuid, event_type="virus check" + ).exists() + ) + + +def queue_event(file_uuid, date, scanner, passed, queue): + if passed is None or file_uuid == "None": + return + + event_detail = "" + if scanner is not None: + info = scanner.info() # This is cached. + event_detail = f'program="{info.name}"; version="{info.version}"; virusDefinitions="{info.virus_definitions}"' + + outcome = "Pass" if passed else "Fail" + logger.info("Recording new event for file %s (outcome: %s)", file_uuid, outcome) + + queue.append( + { + "fileUUID": file_uuid, + "eventIdentifierUUID": str(uuid.uuid4()), + "eventType": "virus check", + "eventDateTime": date, + "eventDetail": event_detail, + "eventOutcome": outcome, + } + ) + + +def get_parser(): + """Return a ``Namespace`` with the parsed arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument("file_uuid", metavar="fileUUID") + parser.add_argument("path", metavar="PATH", help="File or directory location") + parser.add_argument("date", metavar="DATE") + parser.add_argument( + "task_uuid", metavar="taskUUID", help="Currently unused, feel free to ignore." + ) + return parser + + +# Map user-provided backend names from configuration to the corresponding +# internal values used in the clamav_client package. Default to "clamscanner" if +# no valid backend is specified in the configuration. +SCANNERS = {"clamscanner": "clamscan", "clamdscanner": "clamd"} +DEFAULT_SCANNER = "clamdscanner" + + +def create_scanner(): + """Return the ClamAV client configured by the user and found in the + installation's environment variables. Clamdscanner may perform quicker + than Clamscanner given a larger number of objects. Return clamdscanner + object as a default if no other, or an incorrect value is specified. + """ + choice = str(mcpclient_settings.CLAMAV_CLIENT_BACKEND).lower() + backend = SCANNERS.get(choice) + if backend is None: + logger.warning( + 'Unexpected antivirus scanner (CLAMAV_CLIENT_BACKEND): "%s"; using "%s".', + choice, + DEFAULT_SCANNER, + ) + backend = SCANNERS[DEFAULT_SCANNER] + if backend == "clamd": + return get_scanner( + { + "backend": "clamd", + "address": str(mcpclient_settings.CLAMAV_SERVER), + "timeout": int(mcpclient_settings.CLAMAV_CLIENT_TIMEOUT), + "stream": bool(mcpclient_settings.CLAMAV_PASS_BY_STREAM), + } + ) + if backend == "clamscan": + return get_scanner( + { + "backend": "clamscan", + "max_file_size": float(mcpclient_settings.CLAMAV_CLIENT_MAX_FILE_SIZE), + "max_scan_size": float(mcpclient_settings.CLAMAV_CLIENT_MAX_SCAN_SIZE), + } + ) + raise ValueError("Unexpected backend configuration.") + + +def get_size(file_uuid, path): + # We're going to see this happening when files are not part of `objects/`. + if file_uuid != "None": + try: + return File.objects.get(uuid=file_uuid).size + except (File.DoesNotExist, ValidationError): + pass + # Our fallback. + try: + return os.path.getsize(path) + except Exception: + return None + + +def scan_file(event_queue, file_uuid, path, date): + if file_already_scanned(file_uuid): + logger.info("Virus scan already performed, not running scan again") + return 0 + + scanner, passed = None, False + + try: + size = get_size(file_uuid, path) + if size is None: + logger.error("Getting file size returned: %s", size) + return 1 + + max_file_size = mcpclient_settings.CLAMAV_CLIENT_MAX_FILE_SIZE * 1024 * 1024 + max_scan_size = mcpclient_settings.CLAMAV_CLIENT_MAX_SCAN_SIZE * 1024 * 1024 + + valid_scan = True + + if size > max_file_size: + logger.info( + "File will not be scanned. Size %s bytes greater than scanner " + "max file size %s bytes", + size, + max_file_size, + ) + valid_scan = False + elif size > max_scan_size: + logger.info( + "File will not be scanned. Size %s bytes greater than scanner " + "max scan size %s bytes", + size, + max_scan_size, + ) + valid_scan = False + + if valid_scan: + scanner = create_scanner() + info = scanner.info() + logger.info( + "Using scanner %s (%s - %s)", + info.name, + info.version, + info.virus_definitions, + ) + + result = scanner.scan(path) + passed, state, details = result.passed, result.state, result.details + else: + passed, state, details = None, None, None + + except Exception: + logger.error("Unexpected error scanning file %s", path, exc_info=True) + return 1 + else: + # record pass or fail, but not None if the file hasn't + # been scanned, e.g. Max File Size thresholds being too low. + if passed is not None: + logger.info("File %s scanned!", path) + logger.debug("passed=%s state=%s details=%s", passed, state, details) + finally: + queue_event(file_uuid, date, scanner, passed, event_queue) + + # If True or None, then we have no error, the file can move through the + # process as expected... + return 1 if passed is False else 0 + + +def call(jobs): + event_queue = [] + + for job in jobs: + with job.JobContext(logger=logger): + job.set_status(scan_file(event_queue, *job.args[1:])) + + with transaction.atomic(): + for e in event_queue: + insertIntoEvents(**e) diff --git a/src/MCPClient/lib/clientScripts/archivematica_clamscan.py b/src/MCPClient/lib/clientScripts/archivematica_clamscan.py deleted file mode 100755 index f3af3a2c91..0000000000 --- a/src/MCPClient/lib/clientScripts/archivematica_clamscan.py +++ /dev/null @@ -1,373 +0,0 @@ -#!/usr/bin/env python -# This file is part of Archivematica. -# -# Copyright 2010-2017 Artefactual Systems Inc. -# -# Archivematica is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Archivematica is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Archivematica. If not, see . -import abc -import argparse -import errno -import multiprocessing -import os -import re -import subprocess -import uuid - -import django - -django.setup() -from clamd import BufferTooLongError -from clamd import ClamdNetworkSocket -from clamd import ClamdUnixSocket -from clamd import ConnectionError -from custom_handlers import get_script_logger -from databaseFunctions import insertIntoEvents -from django.conf import settings as mcpclient_settings -from django.core.exceptions import ValidationError -from django.db import transaction -from main.models import Event -from main.models import File - -logger = get_script_logger("archivematica.mcp.client.clamscan") - - -def concurrent_instances(): - return multiprocessing.cpu_count() - - -def clamav_version_parts(ver): - """Both clamscan and clamd return a version string that looks like the - following:: - - ClamAV 0.99.2/23992/Fri Oct 27 05:04:12 2017 - - Given the example above, this function returns a tuple as follows:: - - ("ClamAV 0.99.2", "23992/Fri Oct 27 05:04:12 2017") - - Both elements may be None if the matching failed. - """ - parts = ver.split("/") - n = len(parts) - if n == 1: - version = parts[0] - if re.match("^ClamAV", version): - return version, None - elif n == 3: - version, defs, date = parts - return version, f"{defs}/{date}" - return None, None - - -class ScannerBase(metaclass=abc.ABCMeta): - @abc.abstractmethod - def scan(self, path): - """Scan a file and return a tuple of three elements reporting the - results. These are the three elements expected: - 1. passed (bool) - 2. state (str - "OK", "ERROR", or "FOUND") - 3. details (str - extra info when ERROR or FOUND) - """ - - @abc.abstractproperty - def version_attrs(self): - """Obtain the version details. It is expected to return a tuple of two - elements: ClamAV version number and virus definition version number. - The implementor can cache the results. - """ - - def program(self): - return self.PROGRAM - - def version(self): - return self.version_attrs()[0] - - def virus_definitions(self): - return self.version_attrs()[1] - - -class ClamdScanner(ScannerBase): - PROGRAM = "ClamAV (clamd)" - - def __init__(self): - self.addr = mcpclient_settings.CLAMAV_SERVER - self.timeout = mcpclient_settings.CLAMAV_CLIENT_TIMEOUT - self.stream = mcpclient_settings.CLAMAV_PASS_BY_STREAM - self.client = self.get_client() - - def scan(self, path): - if self.stream: - method_name = "pass_by_stream" - result_key = "stream" - else: - method_name = "pass_by_reference" - result_key = path - - passed, state, details = (False, None, None) - try: - result = getattr(self, method_name)(path) - state, details = result[result_key] - except Exception as err: - passed = ClamdScanner.clamd_exception_handler(err) - if state == "OK": - passed = True - return passed, state, details - - @staticmethod - def clamd_exception_handler(err): - """Manage each decision for an exception when it is raised. Ensure - that each decision can be tested to meet the documented Archivematica - antivirus feature definition. - """ - if isinstance(err, IOError): - if err.errno == errno.EPIPE: - logger.error( - "[Errno 32] Broken pipe. File not scanned. Check Clamd " - "StreamMaxLength" - ) - return None - elif isinstance(err, BufferTooLongError): - logger.error( - "Clamd BufferTooLongError. File not scanned. Check Clamd " - "StreamMaxLength" - ) - return None - elif isinstance(err, ConnectionError): - logger.error( - "Clamd ConnectionError. File not scanned. Check Clamd " "output: %s", - err, - ) - return None - # Return False and provide some information to the user for all other - # failures. - logger.error("Virus scanning failed: %s", err, exc_info=True) - return False - - def version_attrs(self): - try: - self._version_attrs - except AttributeError: - self._version_attrs = clamav_version_parts(self.client.version()) - return self._version_attrs - - def get_client(self): - if ":" not in self.addr: - return ClamdUnixSocket(path=self.addr) - host, port = self.addr.split(":") - return ClamdNetworkSocket(host=host, port=int(port), timeout=self.timeout) - - def pass_by_reference(self, path): - logger.info( - "File being being read by Clamdscan from filesystem \ - reference." - ) - return self.client.scan(path) - - def pass_by_stream(self, path): - logger.info("File contents being streamed to Clamdscan.") - return self.client.instream(open(path, "rb")) - - -class ClamScanner(ScannerBase): - PROGRAM = "ClamAV (clamscan)" - COMMAND = "clamscan" - - def _call(self, *args): - return subprocess.check_output((self.COMMAND,) + args) - - def scan(self, path): - passed, state, details = (False, "ERROR", None) - try: - max_file_size = ( - "--max-filesize=%dM" % mcpclient_settings.CLAMAV_CLIENT_MAX_FILE_SIZE - ) - max_scan_size = ( - "--max-scansize=%dM" % mcpclient_settings.CLAMAV_CLIENT_MAX_SCAN_SIZE - ) - self._call(max_file_size, max_scan_size, path) - except subprocess.CalledProcessError as err: - if err.returncode == 1: - state = "FOUND" - else: - logger.error("Virus scanning failed: %s", err.output, exc_info=True) - else: - passed, state = (True, "OK") - return passed, state, details - - def version_attrs(self): - try: - self._version_attrs - except AttributeError: - try: - self._version_attrs = clamav_version_parts(self._call("-V")) - except subprocess.CalledProcessError: - self._version_attrs = (None, None) - return self._version_attrs - - -def file_already_scanned(file_uuid): - return ( - file_uuid != "None" - and Event.objects.filter( - file_uuid_id=file_uuid, event_type="virus check" - ).exists() - ) - - -def queue_event(file_uuid, date, scanner, passed, queue): - if passed is None or file_uuid == "None": - return - - event_detail = "" - if scanner is not None: - event_detail = f'program="{scanner.program()}"; version="{scanner.version()}"; virusDefinitions="{scanner.virus_definitions()}"' - - outcome = "Pass" if passed else "Fail" - logger.info("Recording new event for file %s (outcome: %s)", file_uuid, outcome) - - queue.append( - { - "fileUUID": file_uuid, - "eventIdentifierUUID": str(uuid.uuid4()), - "eventType": "virus check", - "eventDateTime": date, - "eventDetail": event_detail, - "eventOutcome": outcome, - } - ) - - -def get_parser(): - """Return a ``Namespace`` with the parsed arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument("file_uuid", metavar="fileUUID") - parser.add_argument("path", metavar="PATH", help="File or directory location") - parser.add_argument("date", metavar="DATE") - parser.add_argument( - "task_uuid", metavar="taskUUID", help="Currently unused, feel free to ignore." - ) - return parser - - -SCANNERS = (ClamScanner, ClamdScanner) -SCANNERS_NAMES = tuple(b.__name__.lower() for b in SCANNERS) -DEFAULT_SCANNER = ClamdScanner - - -def get_scanner(): - """Return the ClamAV client configured by the user and found in the - installation's environment variables. Clamdscanner may perform quicker - than Clamscanner given a larger number of objects. Return clamdscanner - object as a default if no other, or an incorrect value is specified. - """ - choice = str(mcpclient_settings.CLAMAV_CLIENT_BACKEND).lower() - if choice not in SCANNERS_NAMES: - logger.warning( - "Unexpected antivirus scanner (CLAMAV_CLIENT_BACKEND):" ' "%s"; using %s.', - choice, - DEFAULT_SCANNER.__name__, - ) - return DEFAULT_SCANNER() - return SCANNERS[SCANNERS_NAMES.index(choice)]() - - -def get_size(file_uuid, path): - # We're going to see this happening when files are not part of `objects/`. - if file_uuid != "None": - try: - return File.objects.get(uuid=file_uuid).size - except (File.DoesNotExist, ValidationError): - pass - # Our fallback. - try: - return os.path.getsize(path) - except Exception: - return None - - -def scan_file(event_queue, file_uuid, path, date, task_uuid): - if file_already_scanned(file_uuid): - logger.info("Virus scan already performed, not running scan again") - return 0 - - scanner, passed = None, False - - try: - size = get_size(file_uuid, path) - if size is None: - logger.error("Getting file size returned: %s", size) - return 1 - - max_file_size = mcpclient_settings.CLAMAV_CLIENT_MAX_FILE_SIZE * 1024 * 1024 - max_scan_size = mcpclient_settings.CLAMAV_CLIENT_MAX_SCAN_SIZE * 1024 * 1024 - - valid_scan = True - - if size > max_file_size: - logger.info( - "File will not be scanned. Size %s bytes greater than scanner " - "max file size %s bytes", - size, - max_file_size, - ) - valid_scan = False - elif size > max_scan_size: - logger.info( - "File will not be scanned. Size %s bytes greater than scanner " - "max scan size %s bytes", - size, - max_scan_size, - ) - valid_scan = False - - if valid_scan: - scanner = get_scanner() - logger.info( - "Using scanner %s (%s - %s)", - scanner.program(), - scanner.version(), - scanner.virus_definitions(), - ) - - passed, state, details = scanner.scan(path) - else: - passed, state, details = None, None, None - - except Exception: - logger.error("Unexpected error scanning file %s", path, exc_info=True) - return 1 - else: - # record pass or fail, but not None if the file hasn't - # been scanned, e.g. Max File Size thresholds being too low. - if passed is not None: - logger.info("File %s scanned!", path) - logger.debug("passed=%s state=%s details=%s", passed, state, details) - finally: - queue_event(file_uuid, date, scanner, passed, event_queue) - - # If True or None, then we have no error, the file can move through the - # process as expected... - return 1 if passed is False else 0 - - -def call(jobs): - event_queue = [] - - for job in jobs: - with job.JobContext(logger=logger): - job.set_status(scan_file(event_queue, *job.args[1:])) - - with transaction.atomic(): - for e in event_queue: - insertIntoEvents(**e) diff --git a/src/MCPServer/lib/assets/workflow.json b/src/MCPServer/lib/assets/workflow.json index a1d72cf024..b8ffd22db6 100644 --- a/src/MCPServer/lib/assets/workflow.json +++ b/src/MCPServer/lib/assets/workflow.json @@ -2524,8 +2524,8 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\" \"%taskUUID%\"", - "execute": "archivematicaClamscan_v0.0", + "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\"", + "execute": "antivirus_v0.0", "filter_subdir": "objects/submissionDocumentation", "stderr_file": "%SIPLogsDirectory%clamAVScan.txt" }, @@ -2555,8 +2555,8 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\" \"%taskUUID%\"", - "execute": "archivematicaClamscan_v0.0", + "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\"", + "execute": "antivirus_v0.0", "stderr_file": "%SIPLogsDirectory%clamAVScan.txt" }, "description": { @@ -2884,8 +2884,8 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\" \"%taskUUID%\"", - "execute": "archivematicaClamscan_v0.0", + "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\"", + "execute": "antivirus_v0.0", "stderr_file": "%SIPLogsDirectory%clamAVScan.txt" }, "description": { @@ -6623,8 +6623,8 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\" \"%taskUUID%\"", - "execute": "archivematicaClamscan_v0.0", + "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\"", + "execute": "antivirus_v0.0", "filter_subdir": "objects/" }, "description": { @@ -7202,8 +7202,8 @@ "config": { "@manager": "linkTaskManagerFiles", "@model": "StandardTaskConfig", - "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\" \"%taskUUID%\"", - "execute": "archivematicaClamscan_v0.0", + "arguments": "\"%fileUUID%\" \"%relativeLocation%\" \"%date%\"", + "execute": "antivirus_v0.0", "filter_subdir": "objects/metadata", "stderr_file": "%SIPLogsDirectory%clamAVScan.txt" }, diff --git a/tests/MCPClient/test_antivirus.py b/tests/MCPClient/test_antivirus.py index b1d48ee1fd..4de3eb00e0 100644 --- a/tests/MCPClient/test_antivirus.py +++ b/tests/MCPClient/test_antivirus.py @@ -1,60 +1,60 @@ -"""Tests for the archivematica_clamscan.py client script.""" +"""Tests for the antivirus.py client script.""" from collections import OrderedDict from collections import namedtuple +from unittest import mock -import archivematica_clamscan import pytest +from antivirus import create_scanner +from antivirus import scan_file +from clamav_client.scanner import ClamdScanner +from clamav_client.scanner import ClamscanScanner +from clamav_client.scanner import Scanner +from clamav_client.scanner import ScanResult -from . import test_antivirus_clamdscan - -def test_get_scanner(settings): - """Test that get_scanner returns the correct instance of antivirus +def test_create_scanner(settings): + """Test that create_scanner returns the correct instance of antivirus per the user's configuration. Test return of clamdscanner by default.""" - # Ensure that environment settings are available to the mock classes. - test_antivirus_clamdscan.setup_clamdscanner(settings) - # Testing to ensure clamscanner is returned when explicitly set. settings.CLAMAV_CLIENT_BACKEND = "clamscanner" - scanner = archivematica_clamscan.get_scanner() - assert isinstance(scanner, archivematica_clamscan.ClamScanner) + scanner = create_scanner() + assert isinstance(scanner, ClamscanScanner) # Testing to ensure that clamdscanner is returned when explicitly set. settings.CLAMAV_CLIENT_BACKEND = "clamdscanner" - scanner = archivematica_clamscan.get_scanner() - assert isinstance(scanner, archivematica_clamscan.ClamdScanner) + scanner = create_scanner() + assert isinstance(scanner, ClamdScanner) # Testing to ensure that clamdscanner is the default returned scanner. settings.CLAMAV_CLIENT_BACKEND = "fprot" - scanner = archivematica_clamscan.get_scanner() - assert isinstance(scanner, archivematica_clamscan.ClamdScanner) + scanner = create_scanner() + assert isinstance(scanner, ClamdScanner) # Testing to ensure that clamdscanner is the default returned scanner when # the user configures an empty string. settings.CLAMAV_CLIENT_BACKEND = "" - scanner = archivematica_clamscan.get_scanner() - assert isinstance(scanner, archivematica_clamscan.ClamdScanner) + scanner = create_scanner() + assert isinstance(scanner, ClamdScanner) # Testing to ensure that clamdscanner is returned when the environment # hasn't been configured appropriately and None is returned. settings.CLAMAV_CLIENT_BACKEND = None - scanner = archivematica_clamscan.get_scanner() - assert isinstance(scanner, archivematica_clamscan.ClamdScanner) + scanner = create_scanner() + assert isinstance(scanner, ClamdScanner) # Testing to ensure that clamdscanner is returned when another variable # type is specified, e.g. in this instance, an integer. settings.CLAMAV_CLIENT_BACKEND = 10 - scanner = archivematica_clamscan.get_scanner() - assert isinstance(scanner, archivematica_clamscan.ClamdScanner) + scanner = create_scanner() + assert isinstance(scanner, ClamdScanner) args = OrderedDict() args["file_uuid"] = "ec26199f-72a4-4fd8-a94a-29144b02ddd8" args["path"] = "/path" args["date"] = "2019-12-01" -args["task_uuid"] = "c380e94e-7a7b-4ab8-aa72-ec0644cc3f5d" class FileMock: @@ -62,8 +62,9 @@ def __init__(self, size): self.size = size -class ScannerMock(archivematica_clamscan.ScannerBase): - PROGRAM = "Mock" +class ScannerMock(Scanner): + _program = "ClamAV (clamd)" + _command = "mock" def __init__(self, should_except=False, passed=False): self.should_except = should_except @@ -72,10 +73,17 @@ def __init__(self, should_except=False, passed=False): def scan(self, path): if self.should_except: raise Exception("Something really bad happened!") - return self.passed, None, None + result = ScanResult(filename=path, state="OK", details="details", err=None) + mock.patch.object( + result.__class__, + "passed", + new_callable=mock.PropertyMock, + return_value=self.passed, + ).start() + return result - def version_attrs(self): - return ("version", "virus_definitions") + def _get_version(self): + return "ClamAV 0.103.11/27400/Mon Sep 16 10:52:36 2024" def setup_test_scan_file_mocks( @@ -87,7 +95,7 @@ def setup_test_scan_file_mocks( ): deps = namedtuple("deps", ["file_already_scanned", "file_get", "scanner"])( file_already_scanned=mocker.patch( - "archivematica_clamscan.file_already_scanned", + "antivirus.file_already_scanned", return_value=file_already_scanned, ), file_get=mocker.patch( @@ -96,7 +104,7 @@ def setup_test_scan_file_mocks( scanner=ScannerMock(should_except=scanner_should_except, passed=scanner_passed), ) - mocker.patch("archivematica_clamscan.get_scanner", return_value=deps.scanner) + mocker.patch("antivirus.get_scanner", return_value=deps.scanner) return deps @@ -104,7 +112,7 @@ def setup_test_scan_file_mocks( def test_scan_file_already_scanned(mocker): deps = setup_test_scan_file_mocks(mocker, file_already_scanned=True) - exit_code = archivematica_clamscan.scan_file([], **dict(args)) + exit_code = scan_file([], **dict(args)) assert exit_code == 0 deps.file_already_scanned.assert_called_once_with(args["file_uuid"]) @@ -172,11 +180,11 @@ def test_scan_file(mocker, setup_kwargs, exit_code, queue_event_params, settings event_queue = [] - ret = archivematica_clamscan.scan_file(event_queue, **dict(args)) + ret = scan_file(event_queue, **dict(args)) # The integer returned by scan_file() is going to be used as the exit code - # of the archivematica_clamscan.py script which is important for the AM - # workflow in order to control what to do next. + # of the antivirus.py script which is important for the AM workflow in order + # to control what to do next. assert exit_code == ret # A side effect of scan_file() is to queue an event to be created in the diff --git a/tests/MCPClient/test_antivirus_clamdscan.py b/tests/MCPClient/test_antivirus_clamdscan.py deleted file mode 100644 index f9f1a2b2b1..0000000000 --- a/tests/MCPClient/test_antivirus_clamdscan.py +++ /dev/null @@ -1,158 +0,0 @@ -"""Tests for the archivematica_clamscan.py client script.""" - -import errno -from collections import namedtuple - -import archivematica_clamscan -from clamd import BufferTooLongError -from clamd import ClamdNetworkSocket -from clamd import ClamdUnixSocket -from clamd import ConnectionError - - -def setup_clamdscanner( - settings, addr="/var/run/clamav/clamd.ctl", timeout=10, stream=False -): - settings.CLAMAV_SERVER = addr - settings.CLAMAV_CLIENT_TIMEOUT = timeout - settings.CLAMAV_PASS_BY_STREAM = stream - - return archivematica_clamscan.ClamdScanner() - - -def test_clamdscanner_version_props(mocker, settings): - scanner = setup_clamdscanner(settings) - mocker.patch.object( - scanner, - "version_attrs", - return_value=("ClamAV 0.99.2", "23992/Fri Oct 27 05:04:12 2017"), - ) - - assert scanner.program() == "ClamAV (clamd)" - assert scanner.version() == "ClamAV 0.99.2" - assert scanner.virus_definitions() == "23992/Fri Oct 27 05:04:12 2017" - - -def test_clamdscanner_version_attrs(mocker, settings): - scanner = setup_clamdscanner(settings, addr="/var/run/clamav/clamd.ctl") - version = mocker.patch.object( - scanner.client, - "version", - return_value="ClamAV 0.99.2/23992/Fri Oct 27 05:04:12 2017", - ) - - assert scanner.version_attrs() == ( - "ClamAV 0.99.2", - "23992/Fri Oct 27 05:04:12 2017", - ) - version.assert_called_once() - - -def test_clamdscanner_get_client(settings): - scanner = setup_clamdscanner(settings, addr="/var/run/clamav/clamd.ctl") - assert isinstance(scanner.client, ClamdUnixSocket) - - scanner = setup_clamdscanner(settings, addr="127.0.0.1:1234", timeout=15.5) - assert isinstance(scanner.client, ClamdNetworkSocket) - assert scanner.client.host == "127.0.0.1" - assert scanner.client.port == 1234 - assert scanner.client.timeout == 15.5 - - -def test_clamdscanner_scan(mocker, settings): - OKAY_RET = ("OK", None) - ERROR_RET = ("ERROR", "Permission denied") - FOUND_RET = ("FOUND", "Eicar-Test-Signature") - - def patch(scanner, ret=OKAY_RET, excepts=False): - """Patch the scanner function and enable testing of exceptions raised - by clamdscanner that we want to control. excepts can take an argument - of True to pass a generic exception. excepts can also take an exception - as an argument for better granularity. - """ - deps = namedtuple("deps", ["pass_by_stream", "pass_by_reference"])( - pass_by_stream=mocker.patch.object( - scanner, "pass_by_stream", return_value={"stream": ret} - ), - pass_by_reference=mocker.patch.object( - scanner, "pass_by_reference", return_value={"/file": ret} - ), - ) - if excepts is not False: - e = excepts - if excepts is True: - e = Exception("Testing an unmanaged exception.") - deps.pass_by_stream.side_effect = e - deps.pass_by_reference.side_effect = e - return deps - - scanner = setup_clamdscanner(settings, stream=False) - deps = patch(scanner, ret=OKAY_RET) - passed, state, details = scanner.scan("/file") - assert passed is True - assert state == "OK" - assert details is None - deps.pass_by_stream.assert_not_called() - deps.pass_by_reference.assert_called_once() - - scanner = setup_clamdscanner(settings, stream=True) - deps = patch(scanner, ret=OKAY_RET) - passed, state, details = scanner.scan("/file") - assert passed is True - assert state == "OK" - assert details is None - deps.pass_by_stream.assert_called_once() - deps.pass_by_reference.assert_not_called() - - patch(scanner, ret=ERROR_RET) - passed, state, details = scanner.scan("/file") - assert passed is False - assert state == "ERROR" - assert details == "Permission denied" - - patch(scanner, ret=FOUND_RET) - passed, state, details = scanner.scan("/file") - assert passed is False - assert state == "FOUND" - assert details == "Eicar-Test-Signature" - - # Testing a generic Exception returned by the clamdscan micorservice. - patch(scanner, ret=OKAY_RET, excepts=True) - passed, state, details = scanner.scan("/file") - assert passed is False - assert state is None - assert details is None - - # Testing a generic IOError that is not a broken pipe error that we're - # expecting to be able to manage from clamdscan. - patch(scanner, ret=OKAY_RET, excepts=OSError("Testing a generic IO Error")) - passed, state, details = scanner.scan("/file") - assert passed is False - assert state is None - assert details is None - - # Broken pipe is a known error from the clamd library. - brokenpipe_error = OSError("Testing a broken pipe error") - brokenpipe_error.errno = errno.EPIPE - patch(scanner, ret=OKAY_RET, excepts=brokenpipe_error) - passed, state, details = scanner.scan("/file") - assert passed is None - assert state is None - assert details is None - - # The INSTREAM size limit error is known to us; test it here. - instream_error = BufferTooLongError("INSTREAM size limit exceeded. ERROR.") - patch(scanner, ret=OKAY_RET, excepts=instream_error) - passed, state, details = scanner.scan("/file") - assert passed is None - assert state is None - assert details is None - - # The clamd library can return a further error code here, and we we test it - # to make sure that if it does, it is managed. - connection_error = ConnectionError("Error while reading from socket.") - patch(scanner, ret=OKAY_RET, excepts=connection_error) - passed, state, details = scanner.scan("/file") - assert passed is None - assert state is None - assert details is None diff --git a/tests/MCPClient/test_antivirus_clamscan.py b/tests/MCPClient/test_antivirus_clamscan.py deleted file mode 100644 index f783bbe904..0000000000 --- a/tests/MCPClient/test_antivirus_clamscan.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Tests for the archivematica_clamscan.py client script.""" - -import subprocess - -import archivematica_clamscan -import pytest - - -@pytest.mark.parametrize( - "version, want", - [ - ( - "ClamAV 0.99.2/23992/Fri Oct 27 05:04:12 2017", - ("ClamAV 0.99.2", "23992/Fri Oct 27 05:04:12 2017"), - ), - ("ClamAV 0.99.2", ("ClamAV 0.99.2", None)), - ("Unexpected value", (None, None)), - ], -) -def test_clamav_version_parts(version, want): - got = archivematica_clamscan.clamav_version_parts(version) - assert got == want - - -def setup_clamscanner(): - return archivematica_clamscan.ClamScanner() - - -def test_clamscanner_version_props(mocker): - scanner = setup_clamscanner() - mocker.patch.object( - scanner, - "version_attrs", - return_value=("ClamAV 0.99.2", "23992/Fri Oct 27 05:04:12 2017"), - ) - - assert scanner.program() == "ClamAV (clamscan)" - assert scanner.version() == "ClamAV 0.99.2" - assert scanner.virus_definitions() == "23992/Fri Oct 27 05:04:12 2017" - - -def test_clamscanner_version_attrs(mocker, settings): - scanner = setup_clamscanner() - mock = mocker.patch.object( - scanner, "_call", return_value="ClamAV 0.99.2/23992/Fri Oct 27 05:04:12 2017" - ) - - assert scanner.version_attrs() == ( - "ClamAV 0.99.2", - "23992/Fri Oct 27 05:04:12 2017", - ) - mock.assert_called_once_with("-V") - - -def test_clamscanner_scan(mocker, settings): - scanner = setup_clamscanner() - mock = mocker.patch.object(scanner, "_call", return_value="Output of clamscan") - - # User configured thresholds need to be sent through to clamscanner and - # executed as part of the call to it. - settings.CLAMAV_CLIENT_MAX_FILE_SIZE = 20 - settings.CLAMAV_CLIENT_MAX_SCAN_SIZE = 20 - - max_file_size = "--max-filesize=%dM" % settings.CLAMAV_CLIENT_MAX_FILE_SIZE - max_scan_size = "--max-scansize=%dM" % settings.CLAMAV_CLIENT_MAX_SCAN_SIZE - - assert scanner.scan("/file") == (True, "OK", None) - mock.assert_called_once_with(max_file_size, max_scan_size, "/file") - - mock.side_effect = subprocess.CalledProcessError( - 1, "clamscan", "Output of clamscan" - ) - assert scanner.scan("/file") == (False, "FOUND", None) - - mock.side_effect = subprocess.CalledProcessError( - 2, "clamscan", "Output of clamscan" - ) - assert scanner.scan("/file") == (False, "ERROR", None)