NDAR · yarikoptic · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023
diff --git a/.codespellrc b/.codespellrc
@@ -0,0 +1,4 @@
+[codespell]
+skip = .git,*.pdf,*.svg
+# tbe - traceback variable name
+ignore-words-list = tbe
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,22 @@
+---
+name: Codespell
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/NDATools/Download.py b/NDATools/Download.py
@@ -156,7 +156,7 @@ def __init__(self, download_config, args):
         self.default_download_batch_size = 50
         self.metadata_file_path = os.path.join(self.package_download_directory, NDATools.NDA_TOOLS_PACKAGE_FILE_METADATA)
 
-    # exlcude arg list is the long-parameter name
+    # exclude arg list is the long-parameter name
     def build_rerun_download_cmd(self, exclude_arg_list):
         download_cmd = 'downloadcmd -dp {}'.format(self.package_id)
 
@@ -323,7 +323,7 @@ def download(file_id_to_cred_list):
             # check if  these exist, and if not, get and set:
             download_record = self.download_from_s3link(package_file, cred,
                                                         failed_s3_links_file=failed_s3_links_file)
-            # dont add bytes if file-existed and didnt need to be downloaded
+            # dont add bytes if file-existed and didn't need to be downloaded
             if download_record['download_complete_time']:
                 trailing_50_file_bytes.append(download_record['actual_file_size'])
             success_files.add(package_file['package_file_id'])
@@ -512,7 +512,7 @@ def print_upload_part_info(bytes):
 
         except Exception as e:
             if not s3_link and not source_uri:
-                # we couldnt get credentials, which means the service has become un-responsive.
+                # we couldn't get credentials, which means the service has become un-responsive.
                 # Instruct the user to retry at another time
                 logger.info('')
                 logger.info(
@@ -669,7 +669,7 @@ def initialize_job_manifest_file(fp):
                 for the given target directory and download mode
             2. Make a copy of the download-progress-report and save into verification folder. Name it 'download-verification-report.csv'
             3. Consider everything currently in the download-verification-report.csv where expected file-size=actual file-size as being downloaded
-                     a. there really shouldnt be any entries where the expected filesize doesnt match actual size , but run the code anyway
+                     a. there really shouldn't be any entries where the expected filesize doesn't match actual size , but run the code anyway
             4. Read file-names of download-verification-report.csv into set() in memory
             *5. Get the complete file-listing for the download (using the provided arguments -d, -ds, -t and -dp)
             6. Add anything that is not in the set in step 4 into the download-verification-report.csv

diff --git a/NDATools/Submission.py b/NDATools/Submission.py
@@ -439,7 +439,7 @@ def run(self):
                 If the source file is from S3:
                 a) check settings.cfg for permanent user credentials (aws_access_key, aws_secret_key)
                 b) if permanent credentials are provided, use them to retrieve the source file,
-                c) if not provided use a FederationUser token from DataManager API to retreive the source file,
+                c) if not provided use a FederationUser token from DataManager API to retrieve the source file,
                 d) use credentials supplied by the submission API to upload to remote S3 location.
 
                 If the file was uploaded using multi-part, it will first complete the multi part uploads.
@@ -460,7 +460,7 @@ def run(self):
                     values, which specify where the object should be copied from (i.e., 100206 subject directory can be
                     located in s3://hcp-openaccess-temp, with a prefix of HCP_1200).
 
-                    Creates source and destination clients for S3 tranfer. If supplied in settings.cfg uses permanent 
+                    Creates source and destination clients for S3 transfer. If supplied in settings.cfg uses permanent 
                     credentials for accessing source buckets. If permanent credentials are not supplied in
                     settings.cfg, uses a tempoary FederationUser Token from DataManager API to access source bucket. 
 

diff --git a/NDATools/Utils.py b/NDATools/Utils.py
@@ -72,7 +72,7 @@ def print_and_exit(r):
             message = r.text
 
         if r.status_code == 401:
-            # provide default message if one doesnt already exist
+            # provide default message if one doesn't already exist
             message = message or 'The NDA username or password is not recognized.'
         else:
             message = '\nAn unexpected error was encountered and the program could not continue. Error message from service was: \n%s' % message

diff --git a/NDATools/Validation.py b/NDATools/Validation.py
@@ -123,7 +123,7 @@ def validate(self):
             }
 
         if self.pending_changes:
-            # remove the associated_files that have already been uplaoded
+            # remove the associated_files that have already been uploaded
             structure_to_new_associated_files = {}
             unrecognized_ds = set()
             for uuid in self.uuid_dict:

diff --git a/NDATools/clientscripts/downloadcmd.py b/NDATools/clientscripts/downloadcmd.py
@@ -113,24 +113,24 @@ def parse_args():
 For example, if the user runs:  
    downloadcmd -dp 12345 --verify
 The download-verification-report.csv file will contain a record for each file in the package 12345. Since no -d/--directory argument is provided, the program 
-will check for the existance of the files in the default download location. 
+will check for the existence of the files in the default download location. 
 
 If the user runs:
    downloadcmd -dp 12345 -d /home/myuser/customdirectory --verify
-The download-verification-report.csv file will contain a record for each file in the package 12345 and will check for the existance of files in the /foo/bar
+The download-verification-report.csv file will contain a record for each file in the package 12345 and will check for the existence of files in the /foo/bar
 
 If the user runs:
    downloadcmd -dp 12345 -d /home/myuser/customdirectory -t file-with-s3-links.csv --verify
-The download-verification-report.csv file will contain a record for each file listed in the file-with-s3-links.csv and will check for the existance of files in /foo/bar
+The download-verification-report.csv file will contain a record for each file listed in the file-with-s3-links.csv and will check for the existence of files in /foo/bar
 
 If the user runs:
    downloadcmd -dp 12345 -d /home/myuser/customdirectory -ds image03 --verify
-The download-verification-report.csv file will contain a record for each file in the package's image03 data-structure and will check for the existance of files in /foo/bar
+The download-verification-report.csv file will contain a record for each file in the package's image03 data-structure and will check for the existence of files in /foo/bar
 
 If the user runs:
    downloadcmd -dp 12345 -d /home/myuser/customdirectory -ds image03 --file-regex --verify
 The download-verification-report.csv file will contain a record for each file in the package's image03 data-structure which also matches the file-regex and will check 
-for the existance of files in /foo/bar
+for the existence of files in /foo/bar
 
 NOTE - at the moment, this option cannot be used to verify downloads to s3 locations (see -s3 option below). That will be implemented in the near
 future.''')

diff --git a/NDATools/clientscripts/vtcmd.py b/NDATools/clientscripts/vtcmd.py
@@ -73,7 +73,7 @@ def parse_args():
                         help='Flag whether to validate using a custom scope. Must enter a custom scope')
 
     parser.add_argument('-rs', '--replace-submission', metavar='<arg>', type=str, action='store', default=0,
-                        help='Use this arugment to replace a submission that has QA errors or that NDA staff has authorized manually to replace.')
+                        help='Use this argument to replace a submission that has QA errors or that NDA staff has authorized manually to replace.')
 
     parser.add_argument('-r', '--resume', action='store_true',
                         help='Restart an in-progress submission, resuming from the last successful part in a multi-part'
@@ -352,7 +352,7 @@ def retrieve_replacement_submission_params(config, submission_id):
         exit_client(signal=signal.SIGTERM,
                     message='There was a General Error communicating with the NDA server. Please try again later')
 
-    # get list of associated-files that have already been uplaoded for pending changes
+    # get list of associated-files that have already been uploaded for pending changes
     pending_changes = []
     original_submission_id = submission_id
     original_uuids = {uuid for uuid in response['validation_uuids']}

diff --git a/tests/test_download.py b/tests/test_download.py
@@ -374,7 +374,7 @@ def test(config,args, batch_size, completed_files, get_package_files_by_page_arg
 
                 assert wrap_download_batch_file_ids.call_count == 1 # its a generator method so it will always be called once, although it may yield many values
                 assert list(map(lambda x: x[0], mock_get_package_files_by_page_method.call_args_list)) == get_package_files_by_page_args_list
-                assert mock_download_method.call_count == len(all_file_ids) - len(completed_file_ids) # call download method for each file that hasnt been downloaded
+                assert mock_download_method.call_count == len(all_file_ids) - len(completed_file_ids) # call download method for each file that hasn't been downloaded
                 assert set(map(lambda x: x[0][0], mock_download_method.call_args_list)) == {f for f in all_file_ids if f not in completed_file_ids}
                 # we started batching calls to this endpoint, so it would be the
                 if d.download_mode=='package':