Skip to content

Commit

Permalink
Merge branch 'dev' of github.com:CDDLeiden/QSPRPred into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-sicho committed Jan 10, 2024
2 parents 5217a87 + 47bb906 commit bd7c032
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 107 deletions.
52 changes: 36 additions & 16 deletions .github/workflows/dev_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,55 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Install Clustal Omega and MAFFT
- name: Install Clustal Omega, MAFFT & add to PATH
run: |
export HOME_DIR=$PWD
echo "HOME_DIR=$PWD" >> $GITHUB_ENV # used later in the workflow
if [ "${{ runner.os }}" = "macOS" ]; then
wget http://www.clustal.org/omega/clustal-omega-1.2.3-macosx -O clustalo && chmod +x clustalo
echo "$PWD" >> $GITHUB_PATH
brew install mafft && unset MAFFT_BINARIES
wget http://www.clustal.org/omega/clustal-omega-1.2.3-macosx -O clustalo && chmod +x clustalo
brew install mafft && unset MAFFT_BINARIES
echo "$HOME_DIR" >> $GITHUB_PATH # make clustalo available in the next steps
elif [ "${{ runner.os }}" = "Linux" ]; then
wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 -O clustalo && chmod +x clustalo
wget https://mafft.cbrc.jp/alignment/software/mafft-7.520-linux.tgz -O mafft.tgz && tar -xzvf mafft.tgz && chmod +x mafft-linux64/mafftdir/bin/mafft
export PATH=$PATH:$PWD/mafft-linux64/mafftdir/bin/:$PWD
export MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/
wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 -O clustalo && chmod +x clustalo
wget https://mafft.cbrc.jp/alignment/software/mafft-7.520-linux.tgz -O mafft.tgz && tar -xzvf mafft.tgz && chmod +x mafft-linux64/mafftdir/bin/mafft
echo "MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/" >> $GITHUB_ENV
echo "$HOME_DIR/mafft-linux64/mafftdir/bin/" >> $GITHUB_PATH
echo "$HOME_DIR" >> $GITHUB_PATH # make clustalo available in the next steps
elif [ "${{ runner.os }}" = "Windows" ]; then
choco install clustal-omega mafft
echo "::add-path::$env:ProgramFiles\Clustal Omega"
echo "::add-path::$env:ProgramFiles\MAFFT"
fi
echo "$PWD" >> $GITHUB_PATH
- name: Print Clustal Omega & MAFFT versions
run: |
clustalo --version # For debugging clustalo version
mafft --version # For debugging mafft version
- name: Install dependencies
run: |
python --version
python -c "print('Python version: ' + '$(python --version)')"
python -c "import platform; print('System info: ', platform.system(), platform.release())" # For debugging OS version
python -m pip install ".[full]" --no-cache-dir
python -c "import qsprpred; print(qsprpred.__version__)" # For debugging package version
python -m pip install pytest
python -m pip install jupyterlab
python -m pip freeze # For debugging environment
- name: Run tests
run: pytest qsprpred
- name: Run pytest
run: pytest -xv qsprpred --junitxml=$HOME_DIR/test_report.xml # -x: stop on first failure, -v: verbose

- name: Run notebooks
run: |
cd clitest && ./run.sh
cd ../tutorial && ./run_all.sh
- name: Publish Test Report
uses: mikepenz/action-junit-report@v4
if: success() || failure() # always run even if the previous step fails
with:
report_paths: 'test_report.xml'

- name: Test CLI
run: cd testing/test_cli && ./run.sh

- name: Test Tutorials
run: cd testing/test_tutorial && ./run.sh

- name: Test Consistency
run: cd testing/test_consistency && ./run.sh
46 changes: 32 additions & 14 deletions .github/workflows/pre_merge_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,35 +24,53 @@ jobs:
python-version: '3.10'

- name: Cache Python dependencies
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install Clustal Omega and MAFFT
- name: Install Clustal Omega, MAFFT & add to PATH
run: |
export HOME_DIR=$PWD
echo "HOME_DIR=$PWD" >> $GITHUB_ENV # used later in the workflow
wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 -O clustalo && chmod +x clustalo
wget https://mafft.cbrc.jp/alignment/software/mafft-7.520-linux.tgz -O mafft.tgz && tar -xzvf mafft.tgz && chmod +x mafft-linux64/mafftdir/bin/mafft
export PATH=$PATH:$PWD/mafft-linux64/mafftdir/bin/:$PWD
export MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/
clustalo --version # For debugging clustalo version
mafft --version # For debugging mafft version
echo "MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/" >> $GITHUB_ENV
echo "$HOME_DIR/mafft-linux64/mafftdir/bin/" >> $GITHUB_PATH
echo "$HOME_DIR" >> $GITHUB_PATH # make clustalo available in the next steps
- name: Print Clustal Omega & MAFFT versions
run: |
clustalo --version # For debugging clustalo version
mafft --version # For debugging mafft version
- name: Install dependencies
run: |
python --version
python -m pip install ".[full]"
python -c "print('Python version: ' + '$(python --version)')"
python -c "import platform; print('System info: ', platform.system(), platform.release())" # For debugging OS version
python -c "import platform; print(platform.system(), platform.release())" # For debugging OS version
python -m pip install ".[full]"
python -c "import qsprpred; print(qsprpred.__version__)" # For debugging package version
python -m pip install pytest
python -m pip install jupyterlab
python -m pip freeze # For debugging environment
- name: Run tests
run: pytest qsprpred
- name: Run pytest
run: pytest -xv qsprpred --junitxml=$HOME_DIR/test_report.xml # -x: stop on first failure, -v: verbose

- name: Run notebooks
run: |
cd clitest && ./run.sh
cd ../tutorial && ./run_all.sh
- name: Publish Test Report
uses: mikepenz/action-junit-report@v4
if: success() || failure() # always run even if the previous step fails
with:
report_paths: 'test_report.xml'

- name: Test CLI
run: cd testing/test_cli && ./run.sh

- name: Test Tutorials
run: cd testing/test_tutorial && ./run.sh

- name: Test Consistency
run: cd testing/test_consistency && ./run.sh
20 changes: 11 additions & 9 deletions qsprpred/data/sources/papyrus/papyrus_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Papyrus(DataSource):
Attributes:
DEFAULT_DIR (str): default directory for Papyrus database and the extracted data
dataDir (str): storage directory for Papyrus database and the extracted data
_papyrusDir (str): directory where the Papyrus database is located, os.path.join(dataDir, "papyrus")
version (list): Papyrus database version
descriptors (list, str, None): descriptors to download if not already present
stereo (bool): use version with stereochemistry
Expand Down Expand Up @@ -60,6 +61,7 @@ def __init__(
use only plusplus version, only high quality data
"""
self.dataDir = data_dir
self._papyrusDir = os.path.join(self.dataDir, "papyrus")
self.version = version
self.descriptors = descriptors
self.stereo = stereo
Expand All @@ -73,8 +75,8 @@ def download(self):
Only newly requested data is downloaded. Remove the files if you want to
reload the data completely.
"""
os.makedirs(self.dataDir, exist_ok=True)
if not os.path.exists(os.path.join(self.dataDir, "papyrus")):
if not os.path.exists(self._papyrusDir):
os.makedirs(self.dataDir, exist_ok=True)
logger.info("Downloading Papyrus database...")
download_papyrus(
outdir=self.dataDir,
Expand All @@ -87,10 +89,8 @@ def download(self):
)
else:
logger.info(
f"Papyrus database already"
f" downloaded. Using existing data. "
f"Delete the following folder to reload the data: "
f"{os.path.join(self.dataDir, 'papyrus')}"
"Papyrus database already downloaded. Using existing data. "
f"Delete the following folder to reload the data: {self._papyrusDir}"
)

def getData(
Expand Down Expand Up @@ -121,7 +121,7 @@ def getData(
Returns:
MolculeTable: the filtered data set
"""
logger.debug(f"Getting data from Papyrus data source...")
logger.debug("Getting data from Papyrus data source...")
assert acc_keys is not None, "Please provide a list of accession keys."
name = name or "papyrus"
self.download()
Expand All @@ -143,7 +143,7 @@ def getData(
plusplus=self.plusplus,
papyrus_dir=self.dataDir,
)
logger.debug(f"Finished filtering Papyrus data set.")
logger.debug("Finished filtering Papyrus data set.")
logger.debug(f"Creating MoleculeTable from '{path}'.")
ret = MoleculeTable.fromTableFile(name, path, store_dir=output_dir, **kwargs)
logger.debug(f"Finished creating MoleculeTable from '{path}'.")
Expand Down Expand Up @@ -175,7 +175,9 @@ def getProteinData(
if os.path.exists(path) and use_existing:
return pd.read_table(path)
else:
protein_data = papyrus_scripts.read_protein_set(version=self.version)
protein_data = papyrus_scripts.read_protein_set(
source_path=self.dataDir, version=self.version
)
protein_data["accession"] = protein_data["target_id"].apply(
lambda x: x.split("_")[0]
)
Expand Down
Loading

0 comments on commit bd7c032

Please sign in to comment.