Merge pull request #56 from YuanbinLiu/test_atomate2_install

Fix mgldataset issue with file saving
autoatml · May 23, 2024 · 322a817 · 322a817
2 parents ad8f6cf + b091e5b
commit 322a817
Show file tree

Hide file tree

Showing 15 changed files with 2,822 additions and 72 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -5,7 +5,8 @@ name: Testing Linux
 
 on:
   push:
-    branches: [ main ]
+    branches:
+      - '*'
   pull_request:
     branches: [ main ]
 
@@ -31,7 +32,8 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest pytest-mock pytest-split pytest-cov
         python -m pip install types-setuptools
-        pip install .[strict]
+        python -m pip install .[strict]
+        python -m pip install ase@git+https://gitlab.com/ase/ase.git@aae51d57721847624cea569f3a2d4bb6aa5032b4
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

diff --git a/autoplex/auto/phonons/flows.py b/autoplex/auto/phonons/flows.py
@@ -193,7 +193,7 @@ def make(
 
         if pre_xyz_files is None:
             fit_input.update(
-                {"isolated_atom": {"iso_atoms_dir": [isoatoms.output["dirs"]]}}
+                {"IsolatedAtom": {"iso_atoms_dir": [isoatoms.output["dirs"]]}}
             )
 
         for ml_model in self.ml_models:

diff --git a/autoplex/auto/phonons/jobs.py b/autoplex/auto/phonons/jobs.py
@@ -162,23 +162,33 @@ def make_from_ml_model(self, structure, ml_model, **make_kwargs):
             br = self.bulk_relax_maker
             self.bulk_relax_maker = br.update_kwargs(
                 update={
-                    "potential_param_file_name": ml_model,
+                    "calculator_kwargs": {
+                        "args_str": "IP GAP",
+                        "param_filename": str(ml_model),
+                    },
                     **self.relax_maker_kwargs,
                 }
             )
+            print(self.bulk_relax_maker.calculator_kwargs)
         if self.phonon_displacement_maker is not None:
             ph_disp = self.phonon_displacement_maker
             self.phonon_displacement_maker = ph_disp.update_kwargs(
                 update={
-                    "potential_param_file_name": ml_model,
+                    "calculator_kwargs": {
+                        "args_str": "IP GAP",
+                        "param_filename": str(ml_model),
+                    },
                     **self.static_maker_kwargs,
                 }
             )
         if self.static_energy_maker is not None:
             stat_en = self.static_energy_maker
             self.static_energy_maker = stat_en.update_kwargs(
                 update={
-                    "potential_param_file_name": ml_model,
+                    "calculator_kwargs": {
+                        "args_str": "IP GAP",
+                        "param_filename": str(ml_model),
+                    },
                     **self.static_maker_kwargs,
                 }
             )

diff --git a/autoplex/data/common/flows.py b/autoplex/data/common/flows.py
@@ -110,7 +110,10 @@ def make(
         for structure in train_structure_list:
             if self.bulk_relax_maker is None:
                 self.bulk_relax_maker = GAPRelaxMaker(
-                    potential_param_file_name=potential_filename,
+                    calculator_kwargs={
+                        "args_str": "IP GAP",
+                        "param_filename": str(potential_filename),
+                    },
                     relax_cell=relax_cell,
                     steps=steps,
                 )
@@ -187,7 +190,10 @@ def static_run_and_convert(
                 }
             if self.static_energy_maker is None:
                 self.static_energy_maker = GAPRelaxMaker(
-                    potential_param_file_name=potential_filename,
+                    calculator_kwargs={
+                        "args_str": "IP GAP",
+                        "param_filename": str(potential_filename),
+                    },
                     relax_cell=False,
                     relax_kwargs=relax_kwargs,
                     steps=1,

diff --git a/autoplex/fitting/common/flows.py b/autoplex/fitting/common/flows.py
@@ -15,7 +15,7 @@
 from autoplex.fitting.common.utils import (
     data_distillation,
     get_list_of_vasp_calc_dirs,
-    split_dataset,
+    stratified_dataset_split,
     vaspoutput_2_extended_xyz,
 )
 
@@ -39,17 +39,19 @@ class MLIPFitMaker(Maker):
         Name of the flows produced by this maker.
     mlip_type: str
         Choose one specific MLIP type:
-        'GAP' | 'SNAP' | 'ACE' | 'Nequip' | 'Allegro' | 'MACE'
+        'GAP' | 'ACE' | 'Nequip' | 'M3GNet' | 'MACE'
     """
 
     name: str = "MLpotentialFit"
     mlip_type: str = "GAP"
+    mlip_hyper: dict | None = None
 
     def make(
         self,
-        species_list: list,
-        isolated_atoms_energy: list,
         fit_input: dict,
+        species_list: list | None = None,
+        isolated_atoms_energy: list | None = None,
+        isol_es: dict | None = None,
         split_ratio: float = 0.4,
         f_max: float = 40.0,
         regularization: bool = True,
@@ -60,6 +62,7 @@ def make(
         atom_wise_regularization: bool = True,
         auto_delta: bool = True,
         glue_xml: bool = False,
+        num_processes: int = 32,
         **fit_kwargs,
     ):
         """
@@ -110,19 +113,28 @@ def make(
             atom_wise_regularization=atom_wise_regularization,
         )
         jobs.append(data_prep_job)
-        gap_fit_job = machine_learning_fit(
+
+        if self.mlip_type not in ["GAP", "J-ACE", "P-ACE", "NEQUIP", "M3GNET", "MACE"]:
+            raise ValueError(
+                "Please correct the MLIP name!"
+                "The current version ONLY supports the following models: GAP, J-ACE, P-ACE, NEQUIP, M3GNET, and MACE."
+            )
+
+        mlip_fit_job = machine_learning_fit(
             database_dir=data_prep_job.output,
-            isol_es=None,
+            isol_es=isol_es,
             auto_delta=auto_delta,
             glue_xml=glue_xml,
             mlip_type=self.mlip_type,
+            mlip_hyper=self.mlip_hyper,
+            num_processes=num_processes,
             regularization=regularization,
             **fit_kwargs,
         )
-        jobs.append(gap_fit_job)  # type: ignore
+        jobs.append(mlip_fit_job)  # type: ignore
 
         # create a flow including all jobs
-        return Flow(jobs, gap_fit_job.output)
+        return Flow(jobs, mlip_fit_job.output)
 
 
 @dataclass
@@ -233,7 +245,9 @@ def make(
         )
 
         # split dataset into training and testing datasets with a ratio of 9:1
-        (train_structures, test_structures) = split_dataset(atoms, self.split_ratio)
+        (train_structures, test_structures) = stratified_dataset_split(
+            atoms, self.split_ratio
+        )
 
         # Merging database
         if pre_database_dir and os.path.exists(pre_database_dir):

diff --git a/autoplex/fitting/common/jobs.py b/autoplex/fitting/common/jobs.py
@@ -5,7 +5,14 @@
 
 from jobflow import job
 
-from autoplex.fitting.common.utils import check_convergence, gap_fitting
+from autoplex.fitting.common.utils import (
+    ace_fitting,
+    check_convergence,
+    gap_fitting,
+    m3gnet_fitting,
+    mace_fitting,
+    nequip_fitting,
+)
 
 current_dir = Path(__file__).absolute().parent
 GAP_DEFAULTS_FILE_PATH = current_dir / "gap-defaults.json"
@@ -14,14 +21,14 @@
 @job
 def machine_learning_fit(
     database_dir: str,
-    gap_para=None,
-    isol_es: None = None,
+    isol_es: dict | None = None,
     num_processes: int = 32,
     auto_delta: bool = True,
     glue_xml: bool = False,
     mlip_type: str | None = None,
     regularization: bool = True,
     HPO: bool = False,
+    mlip_hyper: dict | None = None,
     **kwargs,
 ):
     """
@@ -45,34 +52,145 @@ def machine_learning_fit(
         optional dictionary with parameters for gap fitting.
     mlip_type: str
         Choose one specific MLIP type:
-        'GAP' | 'SNAP' | 'ACE' | 'Nequip' | 'Allegro' | 'MACE'
+        'GAP' | 'ACE' | 'Nequip' | 'M3GNet' | 'MACE'
     regularization: bool
         For using sigma regularization.
     HPO: bool
         call hyperparameter optimization (HPO) or not
     """
-    if gap_para is None:
-        gap_para = {"two_body": True, "three_body": False, "soap": True}
+    if mlip_hyper is None:
+        if mlip_type == "GAP":
+            mlip_hyper = {"two_body": True, "three_body": False, "soap": True}
 
-    if mlip_type is None:
-        raise ValueError(
-            "MLIP type is not defined! "
-            "The current version supports the fitting of GAP, SNAP, ACE, Nequip, Allegro, or MACE."
-        )
+        elif mlip_type == "J-ACE":
+            mlip_hyper = {"order": 3, "totaldegree": 6, "cutoff": 2.0, "solver": "BLR"}
+
+        elif mlip_type == "NEQUIP":
+            mlip_hyper = {
+                "r_max": 4.0,
+                "num_layers": 4,
+                "l_max": 2,
+                "num_features": 32,
+                "num_basis": 8,
+                "invariant_layers": 2,
+                "invariant_neurons": 64,
+                "batch_size": 5,
+                "learning_rate": 0.005,
+                "max_epochs": 10000,
+                "default_dtype": "float32",
+                "device": "cuda",
+            }
+
+        elif mlip_type == "M3GNET":
+            mlip_hyper = {
+                "exp_name": "training",
+                "results_dir": "m3gnet_results",
+                "cutoff": 5.0,
+                "threebody_cutoff": 4.0,
+                "batch_size": 10,
+                "max_epochs": 1000,
+                "include_stresses": True,
+                "hidden_dim": 128,
+                "num_units": 128,
+                "max_l": 4,
+                "max_n": 4,
+                "device": "cuda",
+                "test_equal_to_val": True,
+            }
+
+        else:
+            mlip_hyper = {
+                "model": "MACE",
+                "config_type_weights": '{"Default":1.0}',
+                "hidden_irreps": "128x0e + 128x1o",
+                "r_max": 5.0,
+                "batch_size": 10,
+                "max_num_epochs": 1500,
+                "start_swa": 1200,
+                "ema_decay": 0.99,
+                "correlation": 3,
+                "loss": "huber",
+                "default_dtype": "float32",
+                "device": "cuda",
+            }
 
     if mlip_type == "GAP":
         train_test_error = gap_fitting(
             db_dir=database_dir,
-            include_two_body=gap_para["two_body"],
-            include_three_body=gap_para["three_body"],
-            include_soap=gap_para["soap"],
+            include_two_body=mlip_hyper["two_body"],
+            include_three_body=mlip_hyper["three_body"],
+            include_soap=mlip_hyper["soap"],
             num_processes=num_processes,
             auto_delta=auto_delta,
             glue_xml=glue_xml,
             regularization=regularization,
             fit_kwargs=kwargs,
         )
 
+    elif mlip_type == "J-ACE":
+        train_test_error = ace_fitting(
+            db_dir=database_dir,
+            order=mlip_hyper["order"],
+            totaldegree=mlip_hyper["totaldegree"],
+            cutoff=mlip_hyper["cutoff"],
+            solver=mlip_hyper["solver"],
+            isol_es=isol_es,
+            num_processes=num_processes,
+        )
+
+    elif mlip_type == "NEQUIP":
+        train_test_error = nequip_fitting(
+            db_dir=database_dir,
+            r_max=mlip_hyper["r_max"],
+            num_layers=mlip_hyper["num_layers"],
+            l_max=mlip_hyper["l_max"],
+            num_features=mlip_hyper["num_features"],
+            num_basis=mlip_hyper["num_basis"],
+            invariant_layers=mlip_hyper["invariant_layers"],
+            invariant_neurons=mlip_hyper["invariant_neurons"],
+            batch_size=mlip_hyper["batch_size"],
+            learning_rate=mlip_hyper["learning_rate"],
+            max_epochs=mlip_hyper["max_epochs"],
+            isol_es=isol_es,
+            default_dtype=mlip_hyper["default_dtype"],
+            device=mlip_hyper["device"],
+        )
+
+    elif mlip_type == "M3GNET":
+        train_test_error = m3gnet_fitting(
+            db_dir=database_dir,
+            exp_name=mlip_hyper["exp_name"],
+            results_dir=mlip_hyper["results_dir"],
+            cutoff=mlip_hyper["cutoff"],
+            threebody_cutoff=mlip_hyper["threebody_cutoff"],
+            batch_size=mlip_hyper["batch_size"],
+            max_epochs=mlip_hyper["max_epochs"],
+            include_stresses=mlip_hyper["include_stresses"],
+            hidden_dim=mlip_hyper["hidden_dim"],
+            num_units=mlip_hyper["num_units"],
+            max_l=mlip_hyper["max_l"],
+            max_n=mlip_hyper["max_n"],
+            device=mlip_hyper["device"],
+            test_equal_to_val=mlip_hyper["test_equal_to_val"],
+        )
+
+    elif mlip_type == "MACE":
+        train_test_error = mace_fitting(
+            db_dir=database_dir,
+            model=mlip_hyper["model"],
+            config_type_weights=mlip_hyper["config_type_weights"],
+            hidden_irreps=mlip_hyper["hidden_irreps"],
+            r_max=mlip_hyper["r_max"],
+            batch_size=mlip_hyper["batch_size"],
+            max_num_epochs=mlip_hyper["max_num_epochs"],
+            start_swa=mlip_hyper["start_swa"],
+            ema_decay=mlip_hyper["ema_decay"],
+            correlation=mlip_hyper["correlation"],
+            loss=mlip_hyper["loss"],
+            default_dtype=mlip_hyper["default_dtype"],
+            device=mlip_hyper["device"],
+        )
+
     check_conv = check_convergence(train_test_error["test_error"])
 
     return {