Merge pull request #78 from QuantumChemist/main

Docstrings, type-hints, documentation fixes
autoatml · Jul 5, 2024 · c5c7f27 · c5c7f27
2 parents 0b8105c + 9c349d6
commit c5c7f27
Show file tree

Hide file tree

Showing 15 changed files with 647 additions and 252 deletions.
diff --git a/README.md b/README.md
@@ -7,33 +7,6 @@
 
 `autoplex` is an evolving project and **contributions are very welcome**! To ensure that the code remains of high quality, please raise a pull request for any contributions, which will be reviewed before integration into the main branch of the code. In the beginning, Janine will take care of the reviews.
 
-# General code structure
-- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future)
-  - autoplex/submodule/job.py (any jobs defined will be inside this module)
-  - autoplex/submodule/flows.py (workflows defined will be hosted in this module)
-  - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module)
-
-# Guidelines for contributions
-- Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples).
-- Please ensure high coverage of the code based on the tests (you can test this with `coverage`).
-- Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people)
-- Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code).
-- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish.
-
-# Formatting requirements
-- Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`).
-- If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`).
-
-# Commit guidelines
-1. `pip install pre-commit`.
-2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml)
-3. Step 1 and 2 needs to be done only once in the local repository
-4. Proceed with modifying the code and adding commits as usual. This should automatically run the linters.
-5. To manually run the pre-commit hooks on all files, just use `pre-commit run --all-files`
-6. To run pre-commit on a specific file, use `pre-commit run --files path/to/your/modified/module/`
-
-Please check out atomate2 for example code (https://github.com/materialsproject/atomate2)
-
 # Setup
 
 In order to setup the mandatory prerequisites to be able to use `autoplex`, please follow the [installation guide of atomate2](https://materialsproject.github.io/atomate2/user/install.html).
@@ -71,6 +44,10 @@ Pkg.add("DataFrames")
 Pkg.add("CSV")
 ```
 
+# Contributing guidelines
+
+Please follow the [contributing guidelines](docs/dev/contributing.md)!
+
 # Workflow overview
 
 The following [Mermaid](https://mermaid.live/) diagram will give you an overview of the flows and jobs in the default autoplex workflow:

diff --git a/autoplex/benchmark/phonons/utils.py b/autoplex/benchmark/phonons/utils.py
@@ -1,16 +1,23 @@
 """Utility functions for benchmarking jobs."""
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import matplotlib.pyplot as plt
 import numpy as np
-from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine
+
+if TYPE_CHECKING:
+    from matplotlib.figure import Figure
+    from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine
 from pymatgen.phonon.plotter import PhononBSPlotter
 
 
 def get_rmse(
     ml_bs: PhononBandStructureSymmLine,
     dft_bs: PhononBandStructureSymmLine,
     q_dependent_rmse: bool = False,
-):
+) -> float | list[float]:
     """
     Compute root mean squared error (rmse) between DFT and ML phonon band-structure.
 
@@ -44,7 +51,7 @@ def rmse_qdep_plot(
     which_q_path=1,
     file_name="rms.pdf",
     img_format="pdf",
-):
+) -> plt:
     """
     Save q dependent root mean squared error plot between DFT and ML phonon band-structure.
 
@@ -94,7 +101,7 @@ def compare_plot(
     ml_bs: PhononBandStructureSymmLine,
     dft_bs: PhononBandStructureSymmLine,
     file_name: str = "band_comparison.pdf",
-):
+) -> Figure:
     """
     Save DFT and ML phonon band-structure overlay plot for visual comparison.
 

diff --git a/autoplex/data/common/utils.py b/autoplex/data/common/utils.py
@@ -85,7 +85,7 @@ def scale_cell(
     volume_scale_factor_range: list[float] | None = None,
     n_structures: int = 10,
     volume_custom_scale_factors: list[float] | None = None,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates stretched or compressed structures.
 
@@ -154,9 +154,9 @@ def scale_cell(
     return distorted_cells
 
 
-def check_distances(structure: Structure, min_distance: float = 1.5):
+def check_distances(structure: Structure, min_distance: float = 1.5) -> bool:
     """
-    Take in a pymatgen Structure object and checks distances between atoms using minimum image convention.
+    Take in a pymatgen Structure object and check minimum distances between atoms using minimum image convention.
 
     Useful after distorting cell angles and rattling to check atoms aren't too close.
 
@@ -191,7 +191,7 @@ def random_vary_angle(
     w_angle: list[float] | None = None,
     n_structures: int = 8,
     angle_max_attempts: int = 1000,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates angle-distorted structures.
 
@@ -237,10 +237,10 @@ def random_vary_angle(
             volume_custom_scale_factors=[1.03],
         )
 
-        distorted_cells = AseAtomsAdaptor.get_atoms(distorted_cells[0])
+        distorted_supercells: Atoms = AseAtomsAdaptor.get_atoms(distorted_cells[0])
 
-        # getting stretched cell out of array
-        newcell = distorted_cells.cell.cellpar()
+        # getting stretched supercell out of array
+        newcell = distorted_supercells.cell.cellpar()
 
         # current angles
         alpha = atoms_copy.cell.cellpar()[3]
@@ -287,7 +287,7 @@ def std_rattle(
     n_structures: int = 5,
     rattle_std: float = 0.01,
     rattle_seed: int = 42,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates rattled structures.
 
@@ -331,7 +331,7 @@ def mc_rattle(
     min_distance: float = 1.5,
     rattle_seed: int = 42,
     rattle_mc_n_iter: int = 10,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates rattled structures.
 
@@ -375,7 +375,7 @@ def mc_rattle(
     return [AseAtomsAdaptor.get_structure(xtal) for xtal in mc_rattle]
 
 
-def extract_base_name(filename, is_out=False):
+def extract_base_name(filename, is_out=False) -> str:
     """
     Extract the base of a file name to easier manipulate other file names.
 
@@ -401,7 +401,7 @@ def extract_base_name(filename, is_out=False):
     return "A problem with the files occurred."
 
 
-def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005):
+def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005) -> None:
     """
     Filter data outliers per energy criteria and write them into files.
 
@@ -457,7 +457,9 @@ def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005):
     )
 
 
-def filter_outlier_forces(in_file, out_file, symbol="Si", criteria: float = 0.1):
+def filter_outlier_forces(
+    in_file, out_file, symbol="Si", criteria: float = 0.1
+) -> None:
     """
     Filter data outliers per force criteria and write them into files.
 
@@ -526,13 +528,14 @@ def filter_outlier_forces(in_file, out_file, symbol="Si", criteria: float = 0.1)
     )
 
 
-# copied from libatoms GAP tutorial page and adjusted
 def energy_plot(
     in_file, out_file, ax, title: str = "Plot of energy", label: str = "energy"
-):
+) -> None:
     """
     Plot the distribution of energy per atom on the output vs the input.
 
+    Adapted and adjusted from libatoms GAP tutorial page https://libatoms.github.io/GAP/gap_fitting_tutorial.html.
+
     Parameters
     ----------
     in_file:
@@ -610,7 +613,7 @@ def force_plot(
     symbol: str = "Si",
     title: str = "Plot of force",
     label: str = "force for ",
-):
+) -> float:
     """
     Plot the distribution of force components per atom on the output vs the input.
 
@@ -700,7 +703,7 @@ def plot_energy_forces(
     species_list: list | None = None,
     train_name: str = "train.extxyz",
     test_name: str = "test.extxyz",
-):
+) -> None:
     """
     Plot energy and forces of the data.
 

diff --git a/autoplex/data/phonons/utils.py b/autoplex/data/phonons/utils.py
@@ -18,7 +18,11 @@ def ml_phonon_maker_preparation(
     bulk_relax_maker: ForceFieldRelaxMaker,
     phonon_displacement_maker: ForceFieldStaticMaker,
     static_energy_maker: ForceFieldStaticMaker,
-):
+) -> tuple[
+    ForceFieldRelaxMaker | None,
+    ForceFieldStaticMaker | None,
+    ForceFieldStaticMaker | None,
+]:
     """
     Prepare the MLPhononMaker for the respective MLIP model.
 

diff --git a/autoplex/fitting/common/jobs.py b/autoplex/fitting/common/jobs.py
@@ -1,4 +1,4 @@
-"""fitting using GAP."""
+"""General fitting jobs using several MLIPs available."""
 from __future__ import annotations
 
 from pathlib import Path
@@ -33,7 +33,7 @@ def machine_learning_fit(
     **kwargs,
 ):
     """
-    Maker for fitting potential(s).
+    Job for fitting potential(s).
 
     Parameters
     ----------