Added support for crc32 and adler32

- Also added support for a custom file reading buffer size in the settings.ini file
DuckBoss · May 25, 2020 · 99ee44c · 99ee44c
1 parent cb41de2
commit 99ee44c
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 26 deletions.
diff --git a/FileSync/__main__.py b/FileSync/__main__.py
@@ -17,7 +17,7 @@
     parser.add_argument('--scan-interval', dest='scan_interval', default=5, help='Sets the time interval in seconds between directory scans (recommended - 2-5s)')
     parser.add_argument('--hash', dest='hash_algorithm', default='sha256',
                         help='Sets the hashing algorithm to use for checksums (recommended - sha256)\n'
-                             'Supported hashing algorithms: [md5, sha1, sha224, sha256, sha384, sha512]')
+                             'Supported hashing algorithms: [crc32, adler32, md5, sha1, sha224, sha256, sha384, sha512]')
     parser.add_argument('--batch-size', dest='batch_size', default=-1, help='Sets the batch size for multi-core processing, if enabled (recommended - 100+ for large quantities of data)')
     parser.add_argument('--no-live-scan', dest='live_scan', action='store_true', default=False, help='Disables live scanning for changes in the directories which makes the program only sync once')
     parser.add_argument('--quiet', dest='quiet_feature', action='store_true', default=False, help='Suppresses all standard output messages. This is preferable for a headless environment')

diff --git a/FileSync/main.py b/FileSync/main.py
@@ -1,7 +1,8 @@
-import hashlib
 import shutil
 import errno
 import multiprocessing
+from hashlib import sha512, sha224, sha256, sha384, sha1, md5
+from zlib import crc32, adler32
 from os import makedirs, walk
 from pathlib import Path
 from time import sleep, time
@@ -17,17 +18,17 @@ def __init__(self, debug=False):
     @staticmethod
     def hash_classify(given_hash: str):
         if given_hash.lower() == H_SHA_256:
-            return hashlib.sha256()
+            return sha256()
         if given_hash.lower() == H_SHA_224:
-            return hashlib.sha224()
+            return sha224()
         if given_hash.lower() == H_SHA_384:
-            return hashlib.sha3_384()
+            return sha384()
         if given_hash.lower() == H_SHA_512:
-            return hashlib.sha512()
+            return sha512()
         elif given_hash.lower() == H_MD5:
-            return hashlib.md5()
+            return md5()
         elif given_hash.lower() == H_SHA_1:
-            return hashlib.sha1()
+            return sha1()
         else:
             return None
 
@@ -73,9 +74,10 @@ def __init__(self, config, multi, no_live_scan, batch_size, hash_algo, benchmark
         self.multi = multi
         self.hash = hash_algo
         # Reports an error if an unsupported hash algorithm is used by the end-user.
-        if HashResolver.hash_classify(self.hash) is None:
-            print(f"Encountered an error while resolving the hash algorithm type: {self.hash}\nPlease use a supported hash.")
-            return
+        if self.hash != H_CRC_32 and self.hash != H_ADLER_32:
+            if HashResolver.hash_classify(self.hash) is None:
+                print(f"Encountered an error while resolving the hash algorithm type: {self.hash}\nPlease use a supported hash.")
+                return
         self.benchmark = benchmark
         self.scan_interval = scan_interval
         self.hasher = None
@@ -120,29 +122,50 @@ def live_scan(self):
 
     def check_file_multi(self, file, file_hashes, debug) -> bool:
         self.hasher = HashResolver.hash_classify(self.hash)
+        use_crc32 = False
+        use_adler32 = False
+        if self.hash == H_CRC_32:
+            use_crc32 = True
+        if self.hash == H_ADLER_32:
+            use_adler32 = True
         with open(file, 'rb') as cur_file:
-            buffer = cur_file.read(1024)
+            buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
             try:
                 if self.hasher is not None:
-                    self.hasher.update(buffer)
+                    if not use_crc32 and not use_adler32:
+                        self.hasher.update(buffer)
                 else:
-                    return False
+                    if use_crc32:
+                        self.hasher = crc32(buffer, 0)
+                    elif use_adler32:
+                        self.hasher = adler32(buffer, 0)
+                    else:
+                        return False
             except RuntimeError as e:
                 print(f"Encountered error while hashing:\n{e}")
                 return False
 
             while len(buffer) > 0:
-                buffer = cur_file.read(1024)
+                buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
                 try:
                     if self.hasher is not None:
-                        self.hasher.update(buffer)
+                        if not use_crc32 and not use_adler32:
+                            self.hasher.update(buffer)
                     else:
-                        return False
+                        if use_crc32:
+                            self.hasher = crc32(buffer, self.hasher)
+                        elif use_adler32:
+                            self.hasher = adler32(buffer, 0)
+                        else:
+                            return False
                 except RuntimeError as e:
                     print(f"Encountered error while hashing:\n{e}")
                     return False
 
-        cur_hash = self.hasher.hexdigest()
+        if not use_crc32 and not use_adler32:
+            cur_hash = self.hasher.hexdigest()
+        else:
+            cur_hash = format(self.hasher & 0xFFFFFFF, '08x')
         try:
             if file_hashes[file.as_posix()] != cur_hash:
                 file_hashes[file.as_posix()] = cur_hash
@@ -159,29 +182,50 @@ def check_file_multi(self, file, file_hashes, debug) -> bool:
 
     def check_file_single(self, file) -> bool:
         self.hasher = HashResolver.hash_classify(self.hash)
+        use_crc32 = False
+        use_adler32 = False
+        if self.hash == H_CRC_32:
+            use_crc32 = True
+        if self.hash == H_ADLER_32:
+            use_adler32 = True
         with open(file, 'rb') as cur_file:
-            buffer = cur_file.read(1024)
+            buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
             try:
                 if self.hasher is not None:
-                    self.hasher.update(buffer)
+                    if not use_crc32 and not use_adler32:
+                        self.hasher.update(buffer)
                 else:
-                    return False
+                    if use_crc32:
+                        self.hasher = crc32(buffer, 0)
+                    elif use_adler32:
+                        self.hasher = adler32(buffer, 0)
+                    else:
+                        return False
             except RuntimeError as e:
                 print(f"Encountered error while hashing:\n{e}")
                 return False
 
             while len(buffer) > 0:
-                buffer = cur_file.read(1024)
+                buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
                 try:
                     if self.hasher is not None:
-                        self.hasher.update(buffer)
+                        if not use_crc32 and not use_adler32:
+                            self.hasher.update(buffer)
                     else:
-                        return False
+                        if use_crc32:
+                            self.hasher = crc32(buffer, self.hasher)
+                        elif use_adler32:
+                            self.hasher = adler32(buffer, self.hasher)
+                        else:
+                            return False
                 except RuntimeError as e:
                     print(f"Encountered error while hashing:\n{e}")
                     return False
 
-        cur_hash = self.hasher.hexdigest()
+        if not use_crc32 and not use_adler32:
+            cur_hash = self.hasher.hexdigest()
+        else:
+            cur_hash = format(self.hasher & 0xFFFFFFF, '08x')
         try:
             if self.hash_dict[file.as_posix()] != cur_hash:
                 self.hash_dict[file.as_posix()] = cur_hash

diff --git a/FileSync/resources/strings.py b/FileSync/resources/strings.py
@@ -8,10 +8,13 @@
 P_SRC_DIR = 'SourceDirectory'
 P_DEST_DIR = 'DestinationDirectories'
 P_BATCH_SIZE = 'BatchProcessingGroupSize'
+P_FILE_BUFFER = 'FileReadBuffer'
 # SUPPORTED HASHES
 H_SHA_256 = 'sha256'
 H_SHA_224 = 'sha224'
 H_SHA_384 = 'sha384'
 H_SHA_512 = 'sha512'
 H_SHA_1 = 'sha1'
 H_MD5 = 'md5'
+H_CRC_32 = 'crc32'
+H_ADLER_32 = 'adler32'
diff --git a/FileSync/settings.ini b/FileSync/settings.ini
@@ -8,4 +8,6 @@ SourceDirectory = YourSourceDirectory
 ; Destination directory to copy files/folders to
 DestinationDirectories = YourDestinationDirectory, YourDestinationDirectory2(optionally have multiple destination directories)
 ; Batch processing group number
-BatchProcessingGroupSize = 100
+BatchProcessingGroupSize = 100
+; File reading buffer
+FileReadBuffer = 1024