diff --git a/CHANGES b/CHANGES index 81f5506..7c00e93 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,12 @@ +portal_client 1.4.4 + + * Fixed authentication handling when processing manifests with + multiple FASP resources to download. Was interactively querying + the password for each download instead of just once for all. + Reported by @pbieberstein. + + - Victor Thu, 21 Mar 2019 14:00:00 -0400 + portal_client 1.4.3 * Logging in convert_to_manifest.py. diff --git a/INSTALL.md b/INSTALL.md index be2b730..74c5577 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,3 +1,4 @@ + # Installation There are several ways to install portal_client: @@ -18,7 +19,6 @@ python libraries: - [google-cloud-storage](https://pypi.org/project/google-cloud-storage/) - ## Using easy_install Download or clone the portal_client code from github. @@ -39,16 +39,17 @@ or with sudo: $ sudo easy_install . -If you are performing a non-root installation, you can still use easy_install. First, -pick an installation directory. In this example we'll use /tmp. Then add the installation -directory to your PYTHONPATH environment variable if it isn't already there: +If you are performing a non-root installation, you can still use +easy_install. First, pick an installation directory. In this example we'll +use /tmp. Then add the installation directory to your PYTHONPATH environment +variable if it isn't already there:
   $ export PYTHONPATH=$PYTHONPATH:/tmp
   
-Then invoke easy_install with the --install-dir option. Note the final '.', which tells -easy_install where to look for the setup.py script. +Then invoke easy_install with the --install-dir option. Note the final '.', +which tells easy_install where to look for the setup.py script.
   $ easy_install --install-dir /tmp .
@@ -56,9 +57,9 @@ easy_install where to look for the setup.py script.
   
 ## Using pip
 
-Another tool that is commonly used to install Python modules is pip. To use pip to 
-install portal_client, download the source code as shown above, then invoke pip as root or using
-sudo:
+Another tool that is commonly used to install Python modules is pip. To use
+pip to install portal_client, download the source code as shown above, then
+invoke pip as root or using sudo:
 
   
   $ cd portal_client
@@ -143,5 +144,5 @@ the HTTP endpoint:
 ```bash
 docker run -ti --rm -v "$PWD:/tmp" -w /tmp portal_client portal_client \
      --endpoint-priority=HTTP \
-    --url=https://raw.githubusercontent.com/IGS/portal_client/master/example_manifests/example_manifest.tsv 
+    --url=https://raw.githubusercontent.com/IGS/portal_client/master/example_manifests/example_manifest.tsv
 ```
diff --git a/example_manifests/aspera_manifest.tsv b/example_manifests/aspera_manifest.tsv
index 5210a1c..5d08343 100644
--- a/example_manifests/aspera_manifest.tsv
+++ b/example_manifests/aspera_manifest.tsv
@@ -1,2 +1,4 @@
 id	md5	size	urls
 07c6c17c2b6f11e98849ef9e212bebbd	ed8e04692fc34da01ee9c430612f6b58	3363519	fasp://aspera.hmpdacc.org/ptb/genome/microbiome/16s/hm16str/EP843963_K10_MCKD.trimmedseqset.fasta.gz
+1f5ef5664be511e98a1697c032097bbd	bd3f7931c3b642c89b0068b678cae7c8	378182	fasp://aspera.hmpdacc.org/ptb/genome/microbiome/16s/hm16str/EP368327_K90_BRCD.trimmedseqset.fasta.gz
+2fd573b64be511e9848fcf7d5558bff2	aa065a1baf32be2bd3472d2e0dcd1190	1326800	fasp://aspera.hmpdacc.org/ptb/genome/microbiome/16s/hm16str/EP129845_K60_MV1D.trimmedseqset.fasta.gz
diff --git a/lib/aspera.py b/lib/aspera.py
index b2c0c1f..511db8a 100644
--- a/lib/aspera.py
+++ b/lib/aspera.py
@@ -27,8 +27,10 @@
 ASCP_COMMAND = "ascp"
 ASCP_MIN_VERSION = '3.5'
 
-
 def is_ascp_installed():
+    """
+    Determine if the Aspera 'ascp' utility is installed and available for use.
+    """
     logger.debug("In is_ascp_installed.")
     ascp_path = shutil.which('ascp')
 
@@ -36,10 +38,9 @@ def is_ascp_installed():
     if ascp_path is not None:
         installed = True
 
-    logger.debug("Installed? {}".format(installed))
+    logger.debug("Installed? %s", installed)
     return installed
 
-# compare version numbers
 def version_cmp(v1, v2):
     """
     Compare version/release numbers.
@@ -50,13 +51,14 @@ def normalize(v):
         return [int(x) for x in re.sub(r'(\.0+)*$', '', v).split(".")]
 
     if sys.version_info[0] == 2:
-        return cmp(normalize(v1), normalize(v2))
+        # pylint: disable=E0602
+        result = cmp(normalize(v1), normalize(v2))
     else:
         nv1 = normalize(v1)
         nv2 = normalize(v2)
         result = (nv1 > nv2) - (nv1 < nv2)
 
-        return result
+    return result
 
 def get_ascp_version():
     """
@@ -109,9 +111,11 @@ def get_ascp_env(password):
     if 'ASPERA_SCP_PASS' in environment:
         logger.info("Honoring previously set ASPERA_SCP_PASS environment variable.")
     else:
-        if password != None:
+        if password is not None:
             logger.info("Setting ASPERA_SCP_PASS environment variable.")
             environment['ASPERA_SCP_PASS'] = password
+        else:
+            logger.warning("Password isn't set!")
 
     return environment
 
@@ -152,10 +156,12 @@ def run_ascp(ascp_cmd, password, keyfile=None):
             if re.match(r"^.*failed to authenticate", s_err):
                 logger.error("Aspera authentication failure.")
             else:
-                if s_err != None:
+                if s_err is not None:
                     logger.error("Unexpected STDERR from ascp: %s", s_err)
-                if s_out != None:
+
+                if s_out is not None:
                     logger.error("Unexpected STDOUT from ascp: %s", s_out)
+
     except subprocess.CalledProcessError as cpe:
         logger.error("Encountered an error when running ascp: %s", cpe)
 
@@ -187,9 +193,9 @@ def upload_file(server, username, password, local_file, remote_path,
     logger.debug("In upload_file.")
     check_ascp_version()
 
-    # check that local file exists
+    # Check that local file exists
     if not os.path.isfile(local_file):
-        logger.warn("local file " + local_file + " does not exist")
+        logger.warning("Local file %s does not exist.", local_file)
         return False
 
     remote_clause = username + "@" + server + ":" + remote_path
diff --git a/lib/manifest_processor.py b/lib/manifest_processor.py
index f768c81..30d1c59 100644
--- a/lib/manifest_processor.py
+++ b/lib/manifest_processor.py
@@ -101,7 +101,7 @@ def _get_gcp_obj(self, url, file_name):
         return result
 
     def _get_ftp_obj(self, url, file_name):
-        self.logger.debug("In _get_ftp_obj: {}".format(url))
+        self.logger.debug("In _get_ftp_obj: %s", url)
 
         if not url.startswith('ftp://'):
             self.logger.error("Detected an invalid FTP url.")
@@ -120,7 +120,7 @@ def _get_ftp_obj(self, url, file_name):
         return result
 
     def _get_http_obj(self, url, file_name):
-        self.logger.debug("In _get_http_obj: {}".format(url))
+        self.logger.debug("In _get_http_obj: %s", url)
 
         if not (url.startswith('http://') or url.startswith('https://')):
             self.logger.error("Detected an invalid HTTP/HTTPS url.")
@@ -134,12 +134,12 @@ def _get_http_obj(self, url, file_name):
             self.logger.error(e)
             result = "error"
 
-        self.logger.debug("Returning {}".format(result))
+        self.logger.debug("Returning %s", result)
 
         return result
 
     def _get_s3_obj(self, url, file_name):
-        self.logger.debug("In _get_s3_obj: {}".format(url))
+        self.logger.debug("In _get_s3_obj: %s", url)
 
         if not url.startswith('s3://'):
             self.logger.error("Detected an invalid Amazon S3 url.")
@@ -153,7 +153,7 @@ def _get_s3_obj(self, url, file_name):
             self.logger.error(e)
             result = "error"
 
-        self.logger.debug("Returning {}".format(result))
+        self.logger.debug("Returning %s", result)
 
         return result
 
@@ -201,7 +201,7 @@ def download_manifest(self, manifest, destination, priorities):
 
             # Only need to download if the file is not present
             if os.path.exists(file_name):
-                self.logger.info("File {} already exists. Skipping.".format(file_name))
+                self.logger.info("File %s already exists. Skipping.", file_name)
                 failed_files.append(0)
             else:
                 self.logger.debug("File not present. Proceeding.")
@@ -235,7 +235,7 @@ def download_manifest(self, manifest, destination, priorities):
                 # If all attempts resulted in error, move on to next file
                 if res == "error":
                     print("Skipping file ID {0} as none of the URLs {1} succeeded."
-                        .format(mfile['id'], endpoints))
+                          .format(mfile['id'], endpoints))
                     failed_files.append(2)
                     continue
 
@@ -243,7 +243,7 @@ def download_manifest(self, manifest, destination, priorities):
                     # Now that the download is complete, verify the checksum,
                     # and then establish the final file
                     if self._checksum_matches(tmp_file_name, mfile['md5']):
-                        self.logger.debug("Renaming {} to {}".format(tmp_file_name, file_name))
+                        self.logger.debug("Renaming %s to %s", tmp_file_name, file_name)
                         shutil.move(tmp_file_name, file_name)
                         failed_files.append(0)
                     else:
@@ -253,8 +253,9 @@ def download_manifest(self, manifest, destination, priorities):
                         print(msg.format(mfile['id']))
                         failed_files.append(3)
                 else:
-                    self.logger.debug("Skipping checksumming. " + \
-                                      "Renaming {} to {}".format(tmp_file_name, file_name))
+                    self.logger.debug(
+                        "Skipping checksumming. Renaming %s to %s", tmp_file_name, file_name
+                    )
                     shutil.move(tmp_file_name, file_name)
                     failed_files.append(0)
 
diff --git a/lib/portal_client.py b/lib/portal_client.py
index 7fd7ee9..b74ada0 100644
--- a/lib/portal_client.py
+++ b/lib/portal_client.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python3
 
-# The portal client downloads data from the portal instance when provided with
-# a manifest file (locally stored or at a URL/FTP endpoint)
+# The portal client downloads data from a portal instance when provided with
+# a manifest file (locally stored or at an HTTP endpoint)
 
 import argparse
-import getpass
 import logging
 import os
 import errno
@@ -17,38 +16,69 @@
 
 logger = logging.getLogger()
 
+def obtain_password():
+    """
+    Interactively obtain the user's password (securely).
+    """
+    logger.debug("In obtain_password.")
+    import getpass
+
+    print("Enter your password: (masked)")
+    password = getpass.getpass('')
+
+    return password
 
 def set_logging():
-    """ Setup logging. """
+    """
+    Setup logging.
+    """
     root = logging.getLogger()
     root.setLevel(logging.DEBUG)
     ch = logging.StreamHandler(sys.stdout)
     ch.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
     ch.setFormatter(formatter)
     root.addHandler(ch)
 
-def version():
+def get_version():
+    """
+    Determine the version of the installed portal_client.
+    """
     import pkg_resources
-    version = pkg_resources.get_distribution('portal_client').version
+
+    version = None
+
+    try:
+        version = pkg_resources.get_distribution('portal_client').version
+    except Exception:
+        logger.warning("Unable to determine version.")
+        version = "?"
+
     return version
 
 def parse_cli():
+    """
+    Establishes the CLI interface by defining the parameter names and
+    what types of data they accept. Creates the command line parser
+    and parses the command line arguments.
+    """
     parser = argparse.ArgumentParser(
-        description='Client to download files given a manifest file ' + \
+        description='Client to download data described by a manifest file ' + \
                     'generated from a portal instance.'
     )
 
     parser.add_argument(
         '--version',
         action='version',
-        version='%(prog)s ' + version()
+        version='%(prog)s ' + get_version()
     )
 
     parser.add_argument(
         '-m', '--manifest',
         type=str,
-        help='Location of a locally stored manifest file from.'
+        help='Location of a locally stored manifest file.'
     )
 
     parser.add_argument(
@@ -137,9 +167,17 @@ def parse_cli():
 
     args = parser.parse_args()
 
+    # This is later populated if the user specifies the --user argument.
+    args.password = None
+
     return args
 
 def validate_cli(args, endpoints):
+    """
+    Determine if the user has invoked the tool properly based on what
+    endpoints, if any, were specified. Different endpoints have different
+    requirements (for authentication), so invocations can vary.
+    """
     logger.debug("In validate_cli.")
     cli_error = False
 
@@ -160,11 +198,31 @@ def validate_cli(args, endpoints):
                          "--google-project-id when retrieving data from Google.\n")
         cli_error = True
 
+    if args.user is not None:
+        password = obtain_password()
+        args.password = password
+
     if cli_error:
         logger.error("Aborting execution.")
         sys.exit(1)
 
+def retry_results_msg(file_count, failure_1, failure_2, failure_3):
+    """
+    Outputs the results of those files that failed to download.
+    """
+
+    msg = "Not all files (total of {0}) were downloaded successfully. Number of failures:\n" \
+        "{1} -- no valid URL in the manifest file\n" \
+        "{2} -- URL is present in manifest, but not accessible at the location specified\n" \
+        "{3} -- MD5 checksum failed for file (file is corrupted or the wrong MD5 is associated)"
+
+    print()
+    print(msg.format(file_count, failure_1, failure_2, failure_3))
+
 def main():
+    """
+    The entrypoint into the portal_client code.
+    """
     args = parse_cli()
 
     if args.debug:
@@ -179,7 +237,7 @@ def main():
         for endpoint in endpoints:
             if endpoint not in valid_endpoints:
                 sys.stderr.write("Error: Invalid protocol. Please check " + \
-                    "the endpoint-priority option for valid entries.\n");
+                    "the endpoint-priority option for valid entries.\n")
                 sys.exit(1)
     else:
         endpoints = default_endpoint_priority
@@ -195,7 +253,7 @@ def main():
 
     username = args.user
     destination = args.destination
-    password = None
+    password = args.password
 
     # GCP Information
     client_secrets = args.client_secrets
@@ -207,7 +265,8 @@ def main():
 
     logger.debug("Creating ManifestProcessor.")
     mp = ManifestProcessor(username, password,
-            google_client_secrets=client_secrets, google_project_id=project_id)
+                           google_client_secrets=client_secrets,
+                           google_project_id=project_id)
 
     # Turn off MD5 checksumming if specified by the user
     if args.disable_validation:
@@ -253,16 +312,5 @@ def main():
                 if result.count(1) == len(result):
                     keep_trying = False
 
-# Outputs the results of those files that failed to download
-def retry_results_msg(file_count, failure_1, failure_2, failure_3):
-
-    msg = "Not all files (total of {0}) were downloaded successfully. Number of failures:\n" \
-        "{1} -- no valid URL in the manifest file\n" \
-        "{2} -- URL is present in manifest, but not accessible at the location specified\n" \
-        "{3} -- MD5 checksum failed for file (file is corrupted or the wrong MD5 is associated)"
-
-    print()
-    print(msg.format(file_count, failure_1, failure_2, failure_3))
-
 if __name__ == '__main__':
     main()
diff --git a/setup.py b/setup.py
index 4ee272e..34ebff3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,20 @@
+""" Setup for the portal_client module. """
+
 import os
 
 from setuptools import setup
 
-# Utility function to read files. Used for the long_description.
 def read(fname):
+    """
+    Utility function to read files. Used for the long_description.
+    """
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 
 setup(
     name='portal-client',
     description='Download client tool for IGS Portal servers.',
     long_description=read('DESC'),
-    version='1.4.3',
+    version='1.4.4',
     py_modules=['portal_client'],
     author='Victor F',
     author_email='victor73@github.com',