From 99ee44c87002e0abdaf6ed3e756b0e99ab3c7795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jason=20=28=EC=A0=9C=EC=9D=B4=EC=8A=A8=29?= Date: Mon, 25 May 2020 15:09:11 -0400 Subject: [PATCH] Added support for crc32 and adler32 - Also added support for a custom file reading buffer size in the settings.ini file --- FileSync/__main__.py | 2 +- FileSync/main.py | 92 ++++++++++++++++++++++++++--------- FileSync/resources/strings.py | 3 ++ FileSync/settings.ini | 4 +- 4 files changed, 75 insertions(+), 26 deletions(-) diff --git a/FileSync/__main__.py b/FileSync/__main__.py index 7b05f12..5066d7d 100644 --- a/FileSync/__main__.py +++ b/FileSync/__main__.py @@ -17,7 +17,7 @@ parser.add_argument('--scan-interval', dest='scan_interval', default=5, help='Sets the time interval in seconds between directory scans (recommended - 2-5s)') parser.add_argument('--hash', dest='hash_algorithm', default='sha256', help='Sets the hashing algorithm to use for checksums (recommended - sha256)\n' - 'Supported hashing algorithms: [md5, sha1, sha224, sha256, sha384, sha512]') + 'Supported hashing algorithms: [crc32, adler32, md5, sha1, sha224, sha256, sha384, sha512]') parser.add_argument('--batch-size', dest='batch_size', default=-1, help='Sets the batch size for multi-core processing, if enabled (recommended - 100+ for large quantities of data)') parser.add_argument('--no-live-scan', dest='live_scan', action='store_true', default=False, help='Disables live scanning for changes in the directories which makes the program only sync once') parser.add_argument('--quiet', dest='quiet_feature', action='store_true', default=False, help='Suppresses all standard output messages. This is preferable for a headless environment') diff --git a/FileSync/main.py b/FileSync/main.py index 5a348af..4815b5b 100644 --- a/FileSync/main.py +++ b/FileSync/main.py @@ -1,7 +1,8 @@ -import hashlib import shutil import errno import multiprocessing +from hashlib import sha512, sha224, sha256, sha384, sha1, md5 +from zlib import crc32, adler32 from os import makedirs, walk from pathlib import Path from time import sleep, time @@ -17,17 +18,17 @@ def __init__(self, debug=False): @staticmethod def hash_classify(given_hash: str): if given_hash.lower() == H_SHA_256: - return hashlib.sha256() + return sha256() if given_hash.lower() == H_SHA_224: - return hashlib.sha224() + return sha224() if given_hash.lower() == H_SHA_384: - return hashlib.sha3_384() + return sha384() if given_hash.lower() == H_SHA_512: - return hashlib.sha512() + return sha512() elif given_hash.lower() == H_MD5: - return hashlib.md5() + return md5() elif given_hash.lower() == H_SHA_1: - return hashlib.sha1() + return sha1() else: return None @@ -73,9 +74,10 @@ def __init__(self, config, multi, no_live_scan, batch_size, hash_algo, benchmark self.multi = multi self.hash = hash_algo # Reports an error if an unsupported hash algorithm is used by the end-user. - if HashResolver.hash_classify(self.hash) is None: - print(f"Encountered an error while resolving the hash algorithm type: {self.hash}\nPlease use a supported hash.") - return + if self.hash != H_CRC_32 and self.hash != H_ADLER_32: + if HashResolver.hash_classify(self.hash) is None: + print(f"Encountered an error while resolving the hash algorithm type: {self.hash}\nPlease use a supported hash.") + return self.benchmark = benchmark self.scan_interval = scan_interval self.hasher = None @@ -120,29 +122,50 @@ def live_scan(self): def check_file_multi(self, file, file_hashes, debug) -> bool: self.hasher = HashResolver.hash_classify(self.hash) + use_crc32 = False + use_adler32 = False + if self.hash == H_CRC_32: + use_crc32 = True + if self.hash == H_ADLER_32: + use_adler32 = True with open(file, 'rb') as cur_file: - buffer = cur_file.read(1024) + buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER])) try: if self.hasher is not None: - self.hasher.update(buffer) + if not use_crc32 and not use_adler32: + self.hasher.update(buffer) else: - return False + if use_crc32: + self.hasher = crc32(buffer, 0) + elif use_adler32: + self.hasher = adler32(buffer, 0) + else: + return False except RuntimeError as e: print(f"Encountered error while hashing:\n{e}") return False while len(buffer) > 0: - buffer = cur_file.read(1024) + buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER])) try: if self.hasher is not None: - self.hasher.update(buffer) + if not use_crc32 and not use_adler32: + self.hasher.update(buffer) else: - return False + if use_crc32: + self.hasher = crc32(buffer, self.hasher) + elif use_adler32: + self.hasher = adler32(buffer, 0) + else: + return False except RuntimeError as e: print(f"Encountered error while hashing:\n{e}") return False - cur_hash = self.hasher.hexdigest() + if not use_crc32 and not use_adler32: + cur_hash = self.hasher.hexdigest() + else: + cur_hash = format(self.hasher & 0xFFFFFFF, '08x') try: if file_hashes[file.as_posix()] != cur_hash: file_hashes[file.as_posix()] = cur_hash @@ -159,29 +182,50 @@ def check_file_multi(self, file, file_hashes, debug) -> bool: def check_file_single(self, file) -> bool: self.hasher = HashResolver.hash_classify(self.hash) + use_crc32 = False + use_adler32 = False + if self.hash == H_CRC_32: + use_crc32 = True + if self.hash == H_ADLER_32: + use_adler32 = True with open(file, 'rb') as cur_file: - buffer = cur_file.read(1024) + buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER])) try: if self.hasher is not None: - self.hasher.update(buffer) + if not use_crc32 and not use_adler32: + self.hasher.update(buffer) else: - return False + if use_crc32: + self.hasher = crc32(buffer, 0) + elif use_adler32: + self.hasher = adler32(buffer, 0) + else: + return False except RuntimeError as e: print(f"Encountered error while hashing:\n{e}") return False while len(buffer) > 0: - buffer = cur_file.read(1024) + buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER])) try: if self.hasher is not None: - self.hasher.update(buffer) + if not use_crc32 and not use_adler32: + self.hasher.update(buffer) else: - return False + if use_crc32: + self.hasher = crc32(buffer, self.hasher) + elif use_adler32: + self.hasher = adler32(buffer, self.hasher) + else: + return False except RuntimeError as e: print(f"Encountered error while hashing:\n{e}") return False - cur_hash = self.hasher.hexdigest() + if not use_crc32 and not use_adler32: + cur_hash = self.hasher.hexdigest() + else: + cur_hash = format(self.hasher & 0xFFFFFFF, '08x') try: if self.hash_dict[file.as_posix()] != cur_hash: self.hash_dict[file.as_posix()] = cur_hash diff --git a/FileSync/resources/strings.py b/FileSync/resources/strings.py index feeccd8..3822096 100644 --- a/FileSync/resources/strings.py +++ b/FileSync/resources/strings.py @@ -8,6 +8,7 @@ P_SRC_DIR = 'SourceDirectory' P_DEST_DIR = 'DestinationDirectories' P_BATCH_SIZE = 'BatchProcessingGroupSize' +P_FILE_BUFFER = 'FileReadBuffer' # SUPPORTED HASHES H_SHA_256 = 'sha256' H_SHA_224 = 'sha224' @@ -15,3 +16,5 @@ H_SHA_512 = 'sha512' H_SHA_1 = 'sha1' H_MD5 = 'md5' +H_CRC_32 = 'crc32' +H_ADLER_32 = 'adler32' diff --git a/FileSync/settings.ini b/FileSync/settings.ini index aea37f5..65335fb 100644 --- a/FileSync/settings.ini +++ b/FileSync/settings.ini @@ -8,4 +8,6 @@ SourceDirectory = YourSourceDirectory ; Destination directory to copy files/folders to DestinationDirectories = YourDestinationDirectory, YourDestinationDirectory2(optionally have multiple destination directories) ; Batch processing group number -BatchProcessingGroupSize = 100 \ No newline at end of file +BatchProcessingGroupSize = 100 +; File reading buffer +FileReadBuffer = 1024 \ No newline at end of file