Merge branch 'develop' into fix/74-imputer-inference-mode

ecmwf · Dec 19, 2024 · f1516ed · f1516ed
2 parents 72be86f + 489a241
commit f1516ed
Show file tree

Hide file tree

Showing 27 changed files with 1,401 additions and 372 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
     -   id: python-check-blanket-noqa # Check for # noqa: all
     -   id: python-no-log-warn # Check for log.warn
 - repo: https://github.com/psf/black-pre-commit-mirror
-  rev: 24.8.0
+  rev: 24.10.0
   hooks:
   - id: black
     args: [--line-length=120]
@@ -40,7 +40,7 @@ repos:
     - --force-single-line-imports
     - --profile black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.6.9
+  rev: v0.8.1
   hooks:
   - id: ruff
     args:
@@ -59,17 +59,12 @@ repos:
   hooks:
   - id: rstfmt
     exclude: 'cli/.*' # Because we use argparse
-- repo: https://github.com/b8raoult/pre-commit-docconvert
-  rev: "0.1.5"
-  hooks:
-  - id: docconvert
-    args: ["numpy"]
 - repo: https://github.com/tox-dev/pyproject-fmt
-  rev: "2.2.4"
+  rev: "v2.5.0"
   hooks:
   - id: pyproject-fmt
 -   repo: https://github.com/jshwi/docsig # Check docstrings against function sig
-    rev: v0.64.0
+    rev: v0.65.0
     hooks:
     -   id: docsig
         args:
@@ -79,6 +74,5 @@ repos:
         - --check-protected  # Check protected methods
         - --check-class      # Check class docstrings
         - --disable=E113     # Disable empty docstrings
-        - --summary          # Print a summary
 ci:
   autoupdate_schedule: monthly
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,12 +8,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 Please add your functional changes to the appropriate section in the PR.
 Keep it human-readable, your future self will thank you!
 
-## [Unreleased](https://github.com/ecmwf/anemoi-models/compare/0.3.0...HEAD)
+## [Unreleased](https://github.com/ecmwf/anemoi-models/compare/0.4.0...HEAD)
 
-- Add synchronisation workflow
+### Added
+
+- New AnemoiModelEncProcDecHierarchical class available in models [#37](https://github.com/ecmwf/anemoi-models/pull/37)
+- Mask NaN values in training loss function [#56](https://github.com/ecmwf/anemoi-models/pull/56)
+- Added dynamic NaN masking for the imputer class with two new classes: DynamicInputImputer, DynamicConstantImputer [#89](https://github.com/ecmwf/anemoi-models/pull/89)
+- Reduced memory usage when using chunking in the mapper [#84](https://github.com/ecmwf/anemoi-models/pull/84)
+- Added `supporting_arrays` argument, which contains arrays to store in checkpoints. [#97](https://github.com/ecmwf/anemoi-models/pull/97)
+
+## [0.4.0](https://github.com/ecmwf/anemoi-models/compare/0.3.0...0.4.0) - Improvements to Model Design
 
 ### Added
 
+- Add synchronisation workflow [#60](https://github.com/ecmwf/anemoi-models/pull/60)
 - Add anemoi-transform link to documentation
 - Codeowners file
 - Pygrep precommit hooks
@@ -22,7 +31,10 @@ Keep it human-readable, your future self will thank you!
 - configurabilty of the dropout probability in the the MultiHeadSelfAttention module
 - Variable Bounding as configurable model layers [#13](https://github.com/ecmwf/anemoi-models/issues/13)
 - GraphTransformerMapperBlock chunking to reduce memory usage during inference [#46](https://github.com/ecmwf/anemoi-models/pull/46)
+- New `NamedNodesAttributes` class to handle node attributes in a more flexible way [#64](https://github.com/ecmwf/anemoi-models/pull/64)
 - Contributors file [#69](https://github.com/ecmwf/anemoi-models/pull/69)
+- Add remappers, e.g. link functions to apply during training to facilitate learning of variables with a difficult distribution [#88]
+- Added `supporting_arrays` argument, which contains arrays to store in checkpoints. [#97](https://github.com/ecmwf/anemoi-models/pull/97)
 
 ### Changed
 - Bugfixes for CI
@@ -33,6 +45,7 @@ Keep it human-readable, your future self will thank you!
 - ci: extened python versions to include 3.11 and 3.12 [#66](https://github.com/ecmwf/anemoi-models/pull/66)
 - Update copyright notice
 - Fix `__version__` import in init
+- Fix missing copyrights [#71](https://github.com/ecmwf/anemoi-models/pull/71)
 
 ### Removed
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -29,15 +29,15 @@
 
 project = "Anemoi Models"
 
-author = "ECMWF"
+author = "Anemoi contributors"
 
 year = datetime.datetime.now().year
 if year == 2024:
     years = "2024"
 else:
     years = "2024-%s" % (year,)
 
-copyright = "%s, ECMWF" % (years,)
+copyright = "%s, Anemoi contributors" % (years,)
 
 try:
     from anemoi.models._version import __version__

diff --git a/docs/modules/models.rst b/docs/modules/models.rst
@@ -13,3 +13,29 @@ encoder, processor, and decoder.
    :members:
    :no-undoc-members:
    :show-inheritance:
+
+**********************************************
+ Encoder Hierarchical processor Decoder Model
+**********************************************
+
+This model extends the standard encoder-processor-decoder architecture
+by introducing a **hierarchical processor**.
+
+Compared to the AnemoiModelEncProcDec model, this architecture requires
+a predefined list of hidden nodes, `[hidden_1, ..., hidden_n]`. These
+nodes must be sorted to match the expected flow of information `data ->
+hidden_1 -> ... -> hidden_n -> ... -> hidden_1 -> data`.
+
+A new argument is added to the configuration file:
+`enable_hierarchical_level_processing`. This argument determines whether
+a processor is added at each hierarchy level or only at the final level.
+
+By default, the number of channels for the mappers is defined as `2^n *
+config.num_channels`, where `n` represents the hierarchy level. This
+scaling ensures that the processing capacity grows proportionally with
+the depth of the hierarchy, enabling efficient handling of data.
+
+.. automodule:: anemoi.models.models.hierarchical
+   :members:
+   :no-undoc-members:
+   :show-inheritance:
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,13 +1,12 @@
-# (C) Copyright 2024 ECMWF.
+# (C) Copyright 2024 Anemoi contributors.
 #
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
-# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
-
 [build-system]
 build-backend = "setuptools.build_meta"
 
@@ -36,6 +35,7 @@ classifiers = [
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]

diff --git a/src/anemoi/models/__init__.py b/src/anemoi/models/__init__.py
@@ -1,6 +1,8 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.

diff --git a/src/anemoi/models/__main__.py b/src/anemoi/models/__main__.py
@@ -1,12 +1,11 @@
-#!/usr/bin/env python
-# (C) Copyright 2024 ECMWF.
+# (C) Copyright 2024 Anemoi contributors.
 #
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
-#
 
 from anemoi.utils.cli import cli_main
 from anemoi.utils.cli import make_parser

diff --git a/src/anemoi/models/commands/__init__.py b/src/anemoi/models/commands/__init__.py
@@ -1,12 +1,11 @@
-#!/usr/bin/env python
-# (C) Copyright 2024 ECMWF.
+# (C) Copyright 2024 Anemoi contributors.
 #
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
-#
 
 import os
 

diff --git a/src/anemoi/models/data_indices/__init__.py b/src/anemoi/models/data_indices/__init__.py
@@ -0,0 +1,8 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/anemoi/models/distributed/__init__.py b/src/anemoi/models/distributed/__init__.py
@@ -0,0 +1,8 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/anemoi/models/interface/__init__.py b/src/anemoi/models/interface/__init__.py
@@ -1,11 +1,11 @@
-# (C) Copyright 2024 ECMWF.
+# (C) Copyright 2024 Anemoi contributors.
 #
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
-#
 
 import uuid
 
@@ -37,6 +37,8 @@ class AnemoiModelInterface(torch.nn.Module):
         Statistics for the data.
     metadata : dict
         Metadata for the model.
+    supporting_arrays : dict
+        Numpy arraysto store in the checkpoint.
     data_indices : dict
         Indices for the data.
     pre_processors : Processors
@@ -48,7 +50,14 @@ class AnemoiModelInterface(torch.nn.Module):
     """
 
     def __init__(
-        self, *, config: DotDict, graph_data: HeteroData, statistics: dict, data_indices: dict, metadata: dict
+        self,
+        *,
+        config: DotDict,
+        graph_data: HeteroData,
+        statistics: dict,
+        data_indices: dict,
+        metadata: dict,
+        supporting_arrays: dict = None,
     ) -> None:
         super().__init__()
         self.config = config
@@ -57,6 +66,7 @@ def __init__(
         self.graph_data = graph_data
         self.statistics = statistics
         self.metadata = metadata
+        self.supporting_arrays = supporting_arrays if supporting_arrays is not None else {}
         self.data_indices = data_indices
         self._build_model()
 

diff --git a/src/anemoi/models/layers/__init__.py b/src/anemoi/models/layers/__init__.py
@@ -0,0 +1,8 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/anemoi/models/layers/block.py b/src/anemoi/models/layers/block.py
@@ -512,18 +512,16 @@ def forward(
             edge_attr_list, edge_index_list = sort_edges_1hop_chunks(
                 num_nodes=size, edge_attr=edges, edge_index=edge_index, num_chunks=num_chunks
             )
+            out = torch.zeros((x[1].shape[0], self.num_heads, self.out_channels_conv), device=x[1].device)
             for i in range(num_chunks):
-                out1 = self.conv(
+                out += self.conv(
                     query=query,
                     key=key,
                     value=value,
                     edge_attr=edge_attr_list[i],
                     edge_index=edge_index_list[i],
                     size=size,
                 )
-                if i == 0:
-                    out = torch.zeros_like(out1, device=out1.device)
-                out = out + out1
         else:
             out = self.conv(query=query, key=key, value=value, edge_attr=edges, edge_index=edge_index, size=size)
 

diff --git a/src/anemoi/models/layers/graph.py b/src/anemoi/models/layers/graph.py
@@ -12,6 +12,7 @@
 import torch
 from torch import Tensor
 from torch import nn
+from torch_geometric.data import HeteroData
 
 
 class TrainableTensor(nn.Module):
@@ -36,8 +37,77 @@ def __init__(self, tensor_size: int, trainable_size: int) -> None:
     def forward(self, x: Tensor, batch_size: int) -> Tensor:
         latent = [einops.repeat(x, "e f -> (repeat e) f", repeat=batch_size)]
         if self.trainable is not None:
-            latent.append(einops.repeat(self.trainable, "e f -> (repeat e) f", repeat=batch_size))
+            latent.append(einops.repeat(self.trainable.to(x.device), "e f -> (repeat e) f", repeat=batch_size))
         return torch.cat(
             latent,
             dim=-1,  # feature dimension
         )
+
+
+class NamedNodesAttributes(nn.Module):
+    """Named Nodes Attributes information.
+
+    Attributes
+    ----------
+    num_nodes : dict[str, int]
+        Number of nodes for each group of nodes.
+    attr_ndims : dict[str, int]
+        Total dimension of node attributes (non-trainable + trainable) for each group of nodes.
+    trainable_tensors : nn.ModuleDict
+        Dictionary of trainable tensors for each group of nodes.
+
+    Methods
+    -------
+    get_coordinates(self, name: str) -> Tensor
+        Get the coordinates of a set of nodes.
+    forward( self, name: str, batch_size: int) -> Tensor
+        Get the node attributes to be passed trough the graph neural network.
+    """
+
+    num_nodes: dict[str, int]
+    attr_ndims: dict[str, int]
+    trainable_tensors: dict[str, TrainableTensor]
+
+    def __init__(self, num_trainable_params: int, graph_data: HeteroData) -> None:
+        """Initialize NamedNodesAttributes."""
+        super().__init__()
+
+        self.define_fixed_attributes(graph_data, num_trainable_params)
+
+        self.trainable_tensors = nn.ModuleDict()
+        for nodes_name, nodes in graph_data.node_items():
+            self.register_coordinates(nodes_name, nodes.x)
+            self.register_tensor(nodes_name, num_trainable_params)
+
+    def define_fixed_attributes(self, graph_data: HeteroData, num_trainable_params: int) -> None:
+        """Define fixed attributes."""
+        nodes_names = list(graph_data.node_types)
+        self.num_nodes = {nodes_name: graph_data[nodes_name].num_nodes for nodes_name in nodes_names}
+        self.attr_ndims = {
+            nodes_name: 2 * graph_data[nodes_name].x.shape[1] + num_trainable_params for nodes_name in nodes_names
+        }
+
+    def register_coordinates(self, name: str, node_coords: Tensor) -> None:
+        """Register coordinates."""
+        sin_cos_coords = torch.cat([torch.sin(node_coords), torch.cos(node_coords)], dim=-1)
+        self.register_buffer(f"latlons_{name}", sin_cos_coords, persistent=True)
+
+    def get_coordinates(self, name: str) -> Tensor:
+        """Return original coordinates."""
+        sin_cos_coords = getattr(self, f"latlons_{name}")
+        ndim = sin_cos_coords.shape[1] // 2
+        sin_values = sin_cos_coords[:, :ndim]
+        cos_values = sin_cos_coords[:, ndim:]
+        return torch.atan2(sin_values, cos_values)
+
+    def register_tensor(self, name: str, num_trainable_params: int) -> None:
+        """Register a trainable tensor."""
+        self.trainable_tensors[name] = TrainableTensor(self.num_nodes[name], num_trainable_params)
+
+    def forward(self, name: str, batch_size: int) -> Tensor:
+        """Returns the node attributes to be passed trough the graph neural network.
+
+        It includes both the coordinates and the trainable parameters.
+        """
+        latlons = getattr(self, f"latlons_{name}")
+        return self.trainable_tensors[name](latlons, batch_size)
diff --git a/src/anemoi/models/layers/processor.py b/src/anemoi/models/layers/processor.py
@@ -323,6 +323,7 @@ def forward(
         *args,
         **kwargs,
     ) -> Tensor:
+
         shape_nodes = change_channels_in_shape(shard_shapes, self.num_channels)
         edge_attr = self.trainable(self.edge_attr, batch_size)