Skip to content

Commit

Permalink
Added support for crc32 and adler32
Browse files Browse the repository at this point in the history
- Also added support for a custom file reading buffer size in the settings.ini file
  • Loading branch information
DuckBoss committed May 25, 2020
1 parent cb41de2 commit 99ee44c
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 26 deletions.
2 changes: 1 addition & 1 deletion FileSync/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
parser.add_argument('--scan-interval', dest='scan_interval', default=5, help='Sets the time interval in seconds between directory scans (recommended - 2-5s)')
parser.add_argument('--hash', dest='hash_algorithm', default='sha256',
help='Sets the hashing algorithm to use for checksums (recommended - sha256)\n'
'Supported hashing algorithms: [md5, sha1, sha224, sha256, sha384, sha512]')
'Supported hashing algorithms: [crc32, adler32, md5, sha1, sha224, sha256, sha384, sha512]')
parser.add_argument('--batch-size', dest='batch_size', default=-1, help='Sets the batch size for multi-core processing, if enabled (recommended - 100+ for large quantities of data)')
parser.add_argument('--no-live-scan', dest='live_scan', action='store_true', default=False, help='Disables live scanning for changes in the directories which makes the program only sync once')
parser.add_argument('--quiet', dest='quiet_feature', action='store_true', default=False, help='Suppresses all standard output messages. This is preferable for a headless environment')
Expand Down
92 changes: 68 additions & 24 deletions FileSync/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import hashlib
import shutil
import errno
import multiprocessing
from hashlib import sha512, sha224, sha256, sha384, sha1, md5
from zlib import crc32, adler32
from os import makedirs, walk
from pathlib import Path
from time import sleep, time
Expand All @@ -17,17 +18,17 @@ def __init__(self, debug=False):
@staticmethod
def hash_classify(given_hash: str):
if given_hash.lower() == H_SHA_256:
return hashlib.sha256()
return sha256()
if given_hash.lower() == H_SHA_224:
return hashlib.sha224()
return sha224()
if given_hash.lower() == H_SHA_384:
return hashlib.sha3_384()
return sha384()
if given_hash.lower() == H_SHA_512:
return hashlib.sha512()
return sha512()
elif given_hash.lower() == H_MD5:
return hashlib.md5()
return md5()
elif given_hash.lower() == H_SHA_1:
return hashlib.sha1()
return sha1()
else:
return None

Expand Down Expand Up @@ -73,9 +74,10 @@ def __init__(self, config, multi, no_live_scan, batch_size, hash_algo, benchmark
self.multi = multi
self.hash = hash_algo
# Reports an error if an unsupported hash algorithm is used by the end-user.
if HashResolver.hash_classify(self.hash) is None:
print(f"Encountered an error while resolving the hash algorithm type: {self.hash}\nPlease use a supported hash.")
return
if self.hash != H_CRC_32 and self.hash != H_ADLER_32:
if HashResolver.hash_classify(self.hash) is None:
print(f"Encountered an error while resolving the hash algorithm type: {self.hash}\nPlease use a supported hash.")
return
self.benchmark = benchmark
self.scan_interval = scan_interval
self.hasher = None
Expand Down Expand Up @@ -120,29 +122,50 @@ def live_scan(self):

def check_file_multi(self, file, file_hashes, debug) -> bool:
self.hasher = HashResolver.hash_classify(self.hash)
use_crc32 = False
use_adler32 = False
if self.hash == H_CRC_32:
use_crc32 = True
if self.hash == H_ADLER_32:
use_adler32 = True
with open(file, 'rb') as cur_file:
buffer = cur_file.read(1024)
buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
try:
if self.hasher is not None:
self.hasher.update(buffer)
if not use_crc32 and not use_adler32:
self.hasher.update(buffer)
else:
return False
if use_crc32:
self.hasher = crc32(buffer, 0)
elif use_adler32:
self.hasher = adler32(buffer, 0)
else:
return False
except RuntimeError as e:
print(f"Encountered error while hashing:\n{e}")
return False

while len(buffer) > 0:
buffer = cur_file.read(1024)
buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
try:
if self.hasher is not None:
self.hasher.update(buffer)
if not use_crc32 and not use_adler32:
self.hasher.update(buffer)
else:
return False
if use_crc32:
self.hasher = crc32(buffer, self.hasher)
elif use_adler32:
self.hasher = adler32(buffer, 0)
else:
return False
except RuntimeError as e:
print(f"Encountered error while hashing:\n{e}")
return False

cur_hash = self.hasher.hexdigest()
if not use_crc32 and not use_adler32:
cur_hash = self.hasher.hexdigest()
else:
cur_hash = format(self.hasher & 0xFFFFFFF, '08x')
try:
if file_hashes[file.as_posix()] != cur_hash:
file_hashes[file.as_posix()] = cur_hash
Expand All @@ -159,29 +182,50 @@ def check_file_multi(self, file, file_hashes, debug) -> bool:

def check_file_single(self, file) -> bool:
self.hasher = HashResolver.hash_classify(self.hash)
use_crc32 = False
use_adler32 = False
if self.hash == H_CRC_32:
use_crc32 = True
if self.hash == H_ADLER_32:
use_adler32 = True
with open(file, 'rb') as cur_file:
buffer = cur_file.read(1024)
buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
try:
if self.hasher is not None:
self.hasher.update(buffer)
if not use_crc32 and not use_adler32:
self.hasher.update(buffer)
else:
return False
if use_crc32:
self.hasher = crc32(buffer, 0)
elif use_adler32:
self.hasher = adler32(buffer, 0)
else:
return False
except RuntimeError as e:
print(f"Encountered error while hashing:\n{e}")
return False

while len(buffer) > 0:
buffer = cur_file.read(1024)
buffer = cur_file.read(int(self.config[C_MAIN_SETTINGS][P_FILE_BUFFER]))
try:
if self.hasher is not None:
self.hasher.update(buffer)
if not use_crc32 and not use_adler32:
self.hasher.update(buffer)
else:
return False
if use_crc32:
self.hasher = crc32(buffer, self.hasher)
elif use_adler32:
self.hasher = adler32(buffer, self.hasher)
else:
return False
except RuntimeError as e:
print(f"Encountered error while hashing:\n{e}")
return False

cur_hash = self.hasher.hexdigest()
if not use_crc32 and not use_adler32:
cur_hash = self.hasher.hexdigest()
else:
cur_hash = format(self.hasher & 0xFFFFFFF, '08x')
try:
if self.hash_dict[file.as_posix()] != cur_hash:
self.hash_dict[file.as_posix()] = cur_hash
Expand Down
3 changes: 3 additions & 0 deletions FileSync/resources/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
P_SRC_DIR = 'SourceDirectory'
P_DEST_DIR = 'DestinationDirectories'
P_BATCH_SIZE = 'BatchProcessingGroupSize'
P_FILE_BUFFER = 'FileReadBuffer'
# SUPPORTED HASHES
H_SHA_256 = 'sha256'
H_SHA_224 = 'sha224'
H_SHA_384 = 'sha384'
H_SHA_512 = 'sha512'
H_SHA_1 = 'sha1'
H_MD5 = 'md5'
H_CRC_32 = 'crc32'
H_ADLER_32 = 'adler32'
4 changes: 3 additions & 1 deletion FileSync/settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ SourceDirectory = YourSourceDirectory
; Destination directory to copy files/folders to
DestinationDirectories = YourDestinationDirectory, YourDestinationDirectory2(optionally have multiple destination directories)
; Batch processing group number
BatchProcessingGroupSize = 100
BatchProcessingGroupSize = 100
; File reading buffer
FileReadBuffer = 1024

0 comments on commit 99ee44c

Please sign in to comment.