Merge branch 'dev' of github.com:CDDLeiden/QSPRPred into dev

CDDLeiden · Jan 10, 2024 · bd7c032 · bd7c032
2 parents 5217a87 + 47bb906
commit bd7c032
Show file tree

Hide file tree

Showing 4 changed files with 147 additions and 107 deletions.
diff --git a/.github/workflows/dev_ci.yml b/.github/workflows/dev_ci.yml
@@ -28,35 +28,55 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Install Clustal Omega and MAFFT
+    - name: Install Clustal Omega, MAFFT & add to PATH
       run: |
+        export HOME_DIR=$PWD
+        echo "HOME_DIR=$PWD" >> $GITHUB_ENV # used later in the workflow
         if [ "${{ runner.os }}" = "macOS" ]; then
-          wget http://www.clustal.org/omega/clustal-omega-1.2.3-macosx -O clustalo && chmod +x clustalo
-          echo "$PWD" >> $GITHUB_PATH
-          brew install mafft && unset MAFFT_BINARIES
+        wget http://www.clustal.org/omega/clustal-omega-1.2.3-macosx -O clustalo && chmod +x clustalo
+        brew install mafft && unset MAFFT_BINARIES
+        echo "$HOME_DIR" >> $GITHUB_PATH # make clustalo available in the next steps
         elif [ "${{ runner.os }}" = "Linux" ]; then
-          wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 -O clustalo && chmod +x clustalo
-          wget https://mafft.cbrc.jp/alignment/software/mafft-7.520-linux.tgz -O mafft.tgz && tar -xzvf mafft.tgz && chmod +x mafft-linux64/mafftdir/bin/mafft
-          export PATH=$PATH:$PWD/mafft-linux64/mafftdir/bin/:$PWD
-          export MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/
+        wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 -O clustalo && chmod +x clustalo
+        wget https://mafft.cbrc.jp/alignment/software/mafft-7.520-linux.tgz -O mafft.tgz && tar -xzvf mafft.tgz && chmod +x mafft-linux64/mafftdir/bin/mafft
+        echo "MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/" >> $GITHUB_ENV
+        echo "$HOME_DIR/mafft-linux64/mafftdir/bin/" >> $GITHUB_PATH
+        echo "$HOME_DIR" >> $GITHUB_PATH # make clustalo available in the next steps
+        elif [ "${{ runner.os }}" = "Windows" ]; then
+        choco install clustal-omega mafft
+        echo "::add-path::$env:ProgramFiles\Clustal Omega"
+        echo "::add-path::$env:ProgramFiles\MAFFT"
         fi
-        echo "$PWD" >> $GITHUB_PATH
+
+    - name: Print Clustal Omega & MAFFT versions
+      run: |
         clustalo --version  # For debugging clustalo version
         mafft --version  # For debugging mafft version
 
     - name: Install dependencies
       run: |
-        python --version
+        python -c "print('Python version: ' + '$(python --version)')"
+        python -c "import platform; print('System info: ', platform.system(), platform.release())" # For debugging OS version
         python -m pip install ".[full]"  --no-cache-dir
         python -c "import qsprpred; print(qsprpred.__version__)" # For debugging package version
         python -m pip install pytest
         python -m pip install jupyterlab
         python -m pip freeze # For debugging environment
     
-    - name: Run tests
-      run: pytest qsprpred
+    - name: Run pytest
+      run: pytest -xv qsprpred --junitxml=$HOME_DIR/test_report.xml # -x: stop on first failure, -v: verbose
 
-    - name: Run notebooks
-      run: |
-        cd clitest && ./run.sh
-        cd ../tutorial && ./run_all.sh
+    - name: Publish Test Report
+      uses: mikepenz/action-junit-report@v4
+      if: success() || failure() # always run even if the previous step fails
+      with:
+        report_paths: 'test_report.xml'
+
+    - name: Test CLI
+      run: cd testing/test_cli && ./run.sh
+
+    - name: Test Tutorials
+      run: cd testing/test_tutorial && ./run.sh
+
+    - name: Test Consistency
+      run: cd testing/test_consistency && ./run.sh
diff --git a/.github/workflows/pre_merge_ci.yml b/.github/workflows/pre_merge_ci.yml
@@ -24,35 +24,53 @@ jobs:
         python-version: '3.10'
 
     - name: Cache Python dependencies
-      uses: actions/cache@v4
+      uses: actions/cache@v3
       with:
         path: ~/.cache/pip
         key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
         restore-keys: |
           ${{ runner.os }}-pip-
 
-    - name: Install Clustal Omega and MAFFT
+    - name: Install Clustal Omega, MAFFT & add to PATH
       run: |
+        export HOME_DIR=$PWD
+        echo "HOME_DIR=$PWD" >> $GITHUB_ENV # used later in the workflow
         wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 -O clustalo && chmod +x clustalo
         wget https://mafft.cbrc.jp/alignment/software/mafft-7.520-linux.tgz -O mafft.tgz && tar -xzvf mafft.tgz && chmod +x mafft-linux64/mafftdir/bin/mafft
-        export PATH=$PATH:$PWD/mafft-linux64/mafftdir/bin/:$PWD
-        export MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/
-        clustalo --version  # For debugging clustalo version
-        mafft --version  # For debugging mafft version
+        echo "MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/" >> $GITHUB_ENV
+        echo "$HOME_DIR/mafft-linux64/mafftdir/bin/" >> $GITHUB_PATH
+        echo "$HOME_DIR" >> $GITHUB_PATH # make clustalo available in the next steps
 
+    - name: Print Clustal Omega & MAFFT versions
+      run: |
+          clustalo --version  # For debugging clustalo version
+          mafft --version  # For debugging mafft version
+  
     - name: Install dependencies
       run: |
-        python --version
-        python -m pip install ".[full]" 
+        python -c "print('Python version: ' + '$(python --version)')"
+        python -c "import platform; print('System info: ', platform.system(), platform.release())" # For debugging OS version
+        python -c "import platform; print(platform.system(), platform.release())" # For debugging OS version
+        python -m pip install ".[full]"
         python -c "import qsprpred; print(qsprpred.__version__)" # For debugging package version
         python -m pip install pytest
         python -m pip install jupyterlab
         python -m pip freeze # For debugging environment
     
-    - name: Run tests
-      run: pytest qsprpred
+    - name: Run pytest
+      run: pytest -xv qsprpred --junitxml=$HOME_DIR/test_report.xml # -x: stop on first failure, -v: verbose
 
-    - name: Run notebooks
-      run: |
-        cd clitest && ./run.sh
-        cd ../tutorial && ./run_all.sh
+    - name: Publish Test Report
+      uses: mikepenz/action-junit-report@v4
+      if: success() || failure() # always run even if the previous step fails
+      with:
+        report_paths: 'test_report.xml'
+
+    - name: Test CLI
+      run: cd testing/test_cli && ./run.sh
+
+    - name: Test Tutorials
+      run: cd testing/test_tutorial && ./run.sh
+
+    - name: Test Consistency
+      run: cd testing/test_consistency && ./run.sh
diff --git a/qsprpred/data/sources/papyrus/papyrus_class.py b/qsprpred/data/sources/papyrus/papyrus_class.py
@@ -21,6 +21,7 @@ class Papyrus(DataSource):
     Attributes:
         DEFAULT_DIR (str): default directory for Papyrus database and the extracted data
         dataDir (str): storage directory for Papyrus database and the extracted data
+        _papyrusDir (str): directory where the Papyrus database is located, os.path.join(dataDir, "papyrus")
         version (list): Papyrus database version
         descriptors (list, str, None): descriptors to download if not already present
         stereo (bool): use version with stereochemistry
@@ -60,6 +61,7 @@ def __init__(
                 use only plusplus version, only high quality data
         """
         self.dataDir = data_dir
+        self._papyrusDir = os.path.join(self.dataDir, "papyrus")
         self.version = version
         self.descriptors = descriptors
         self.stereo = stereo
@@ -73,8 +75,8 @@ def download(self):
         Only newly requested data is downloaded. Remove the files if you want to
         reload the data completely.
         """
-        os.makedirs(self.dataDir, exist_ok=True)
-        if not os.path.exists(os.path.join(self.dataDir, "papyrus")):
+        if not os.path.exists(self._papyrusDir):
+            os.makedirs(self.dataDir, exist_ok=True)
             logger.info("Downloading Papyrus database...")
             download_papyrus(
                 outdir=self.dataDir,
@@ -87,10 +89,8 @@ def download(self):
             )
         else:
             logger.info(
-                f"Papyrus database already"
-                f" downloaded. Using existing data. "
-                f"Delete the following folder to reload the data: "
-                f"{os.path.join(self.dataDir, 'papyrus')}"
+                "Papyrus database already downloaded. Using existing data. "
+                f"Delete the following folder to reload the data: {self._papyrusDir}"
             )
 
     def getData(
@@ -121,7 +121,7 @@ def getData(
         Returns:
             MolculeTable: the filtered data set
         """
-        logger.debug(f"Getting data from Papyrus data source...")
+        logger.debug("Getting data from Papyrus data source...")
         assert acc_keys is not None, "Please provide a list of accession keys."
         name = name or "papyrus"
         self.download()
@@ -143,7 +143,7 @@ def getData(
             plusplus=self.plusplus,
             papyrus_dir=self.dataDir,
         )
-        logger.debug(f"Finished filtering Papyrus data set.")
+        logger.debug("Finished filtering Papyrus data set.")
         logger.debug(f"Creating MoleculeTable from '{path}'.")
         ret = MoleculeTable.fromTableFile(name, path, store_dir=output_dir, **kwargs)
         logger.debug(f"Finished creating MoleculeTable from '{path}'.")
@@ -175,7 +175,9 @@ def getProteinData(
         if os.path.exists(path) and use_existing:
             return pd.read_table(path)
         else:
-            protein_data = papyrus_scripts.read_protein_set(version=self.version)
+            protein_data = papyrus_scripts.read_protein_set(
+                source_path=self.dataDir, version=self.version
+            )
             protein_data["accession"] = protein_data["target_id"].apply(
                 lambda x: x.split("_")[0]
             )