diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..147c6da --- /dev/null +++ b/.coveragerc @@ -0,0 +1,10 @@ +[run] +source = + ChemSpaceAL/InitializeWorkspace.py + +omit = + ChemSpaceAL/Model.py + ChemSpaceAL/Dataset.py + ChemSpaceAL/Docking.py + ChemSpaceAL/Generation.py + ChemSpaceAL/Training.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 88e67d5..d8342b9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,7 +2,7 @@ name: Python application test with pytest on: push: - branches: [ main ] + branches: [ main, sandbox ] jobs: build: @@ -10,29 +10,23 @@ jobs: runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v2 + - name: Checkout + uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 with: - python-version: '3.x' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest + pip install pandas numpy torch rdkit + pip install pytest pytest-cov - - name: Install package - run: | - python setup.py install - - - name: Run pytest - run: | - pytest --cov tests/ - - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v3 + - name: Run tests and collect coverage + run: pytest --cov=ChemSpaceAL + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4-beta env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - diff --git a/ChemSpaceAL/ALConstruction.py b/ChemSpaceAL/ALConstruction.py index 0bcc3cf..7b6a1da 100644 --- a/ChemSpaceAL/ALConstruction.py +++ b/ChemSpaceAL/ALConstruction.py @@ -1,7 +1,6 @@ import pandas as pd import numpy as np import pickle -import plotly.graph_objects as go from ChemSpaceAL.Configuration import Config from typing import Union, Dict, List, Callable, Optional, cast @@ -67,7 +66,6 @@ def _preprocess_scores_linearly( normalized = {k: v / total for k, v in negated.items()} return normalized - def _preprocess_scores_softmax( scores: Dict[int, Number], do_negation: bool = False, @@ -90,7 +88,6 @@ def _preprocess_scores_softmax( softmax = {k: v / total for k, v in exponentiate.items()} return softmax - def balance_cluster_to_n( cluster_to_n: Dict[int, int], cluster_to_len: Dict[int, int] ) -> Dict[int, int]: diff --git a/ChemSpaceAL/requirements.txt b/ChemSpaceAL/requirements.txt index 9700d27..be8fa07 100644 --- a/ChemSpaceAL/requirements.txt +++ b/ChemSpaceAL/requirements.txt @@ -1,9 +1,9 @@ numpy==1.24.3 pandas==1.5.3 prolif==2.0.1 +scikit_learn==1.3.2 PyYAML rdkit==2023.3.1 -scikit_learn==1.3.2 torch tqdm==4.64.1 -wandb==0.15.4 +wandb==0.15.4 \ No newline at end of file diff --git a/ChemSpaceAL/tests/__init__.py b/ChemSpaceAL/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ChemSpaceAL/tests/test_al_construction.py b/ChemSpaceAL/tests/test_al_construction.py new file mode 100644 index 0000000..a872a6f --- /dev/null +++ b/ChemSpaceAL/tests/test_al_construction.py @@ -0,0 +1,75 @@ +from ..ALConstruction import _preprocess_scores_linearly, _preprocess_scores_softmax +import pytest + + +@pytest.mark.parametrize( + "scores, do_negation, expected", + [ + ( + {1: 2, 2: 2, 3: 2}, + False, + {1: 1 / 3, 2: 1 / 3, 3: 1 / 3}, + ), + ( + {1: 1, 2: 0, 3: 0}, + False, + {1: 1, 2: 0, 3: 0}, + ), + ({}, False, {}), + ( + {1: 0, 2: -2, 3: -2}, + True, + {1: 0, 2: 0.5, 3: 0.5}, + ), + ], +) +def test_preprocess_scores_linearly(scores, do_negation, expected): + result = _preprocess_scores_linearly(scores, do_negation) + for k, v in expected.items(): + assert result[k] == pytest.approx(v) + + +@pytest.mark.parametrize( + "scores, do_negation, divide, divide_factor, expected", + [ + ( + {1: 2, 2: 2, 3: 2}, + False, + False, + None, + {1: 1 / 3, 2: 1 / 3, 3: 1 / 3}, + ), + ( + {1: 1, 2: 0, 3: 0}, + False, + False, + None, + {1: 0.5761168847658291, 2: 0.21194155761708544, 3: 0.21194155761708544}, + ), + ( + {1: 1, 2: 0, 3: 0}, + False, + True, + 0.5, + {1: 0.7869860421615985, 2: 0.10650697891920075, 3: 0.10650697891920075}, + ), + ( + {1: -1, 2: -1, 3: 0}, + True, + True, + 0.5, + {1: 0.4683105308334812, 2: 0.4683105308334812, 3: 0.06337893833303762}, + ), + ], +) +def test_preprocess_scores_softmax( + scores, do_negation, divide, divide_factor, expected +): + result = _preprocess_scores_softmax(scores, do_negation, divide, divide_factor) + for k, v in expected.items(): + assert result[k] == pytest.approx(v) + + +def test_preprocess_scores_softmax_exception(): + with pytest.raises(AssertionError): + _preprocess_scores_softmax({1: 2, 2: 3, 3: 4}, False, True, None) diff --git a/ChemSpaceAL/tests/test_initialize_workspace.py b/ChemSpaceAL/tests/test_initialize_workspace.py new file mode 100644 index 0000000..1a2476b --- /dev/null +++ b/ChemSpaceAL/tests/test_initialize_workspace.py @@ -0,0 +1,26 @@ +import os +import tempfile +from unittest.mock import patch +from .. import InitializeWorkspace as iw + + +def test_create_default_folders(): + # Using a temporary directory for testing + with tempfile.TemporaryDirectory() as tempdir: + iw.create_folders(base_path=tempdir) + + # Check if the main folders are created + assert os.path.exists(os.path.join(tempdir, "1_Pretraining")) + assert os.path.exists(os.path.join(tempdir, "2_Generation")) + + # Check if subfolders are created + assert os.path.exists(os.path.join(tempdir, "1_Pretraining", "datasets")) + # ... (add more assertions for other subfolders) + assert isinstance(iw.FOLDER_STRUCTURE, dict) + + +@patch("builtins.input", return_value="Y") +def test_create_folders_without_base_path(mock_input): + with tempfile.TemporaryDirectory() as tempdir: + os.chdir(tempdir) # Change working directory + iw.create_folders() diff --git a/README.md b/README.md index 65f35b2..156487d 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ -# ChemSpaceAL +ChemSpaceAL: An Efficient Active Learning Methodology Applied to Protein- Specific Molecular Generation +======================================= -## An Efficient Active Learning Methodology Applied to Protein- Specific Molecular Generation - -

-Code style: black -

+[![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![codecov](https://codecov.io/gh/batistagroup/ChemSpaceAL/graph/badge.svg?token=ROJSISYJWC)](https://codecov.io/gh/batistagroup/ChemSpaceAL) ![A description of the active learning methodology](media/toc_figure.jpg) diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..32bbacd --- /dev/null +++ b/codecov.yml @@ -0,0 +1,27 @@ +coverage: + precision: 2 + round: down + range: "0...90" + + status: + project: yes + patch: yes + changes: no + +comment: + layout: "header, diff, components" + +component_management: + default_rules: + statuses: + - type: patch + target: auto + individual_components: + - component_id: module_iw + name: InitializeWorkspace + paths: + - "ChemSpaceAL/InitializeWorkspace.py" + - component_id: module_al_construct + name: AL Training Set Construction + paths: + - "ChemSpaceAL/ALConstruction.py" diff --git a/setup.py b/setup.py index 269ec78..e9afa75 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,9 @@ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], - install_requires=required, + # install_requires=required, + # install_requires=[ + # # "pytest" + # ] python_requires=">=3.10", ) diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index 74b1c20..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,16 +0,0 @@ -from ChemSpaceAL.Configuration import Config -import os - -def test_config(): - base_path = os.getcwd() + "/PaperRuns/" - config = Config( - base_path=base_path, - cycle_prefix="model0", - al_iteration=0, - cycle_suffix="ch1", - training_fname="moses_train.csv.gz", - validation_fname="moses_test.csv.gz", - slice_data=1_000, - verbose=True, - ) - assert True