From 2f150e6b20531292bd0d1e6fea0ff0042e645bad Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Fri, 22 Nov 2024 03:59:08 +0100
Subject: [PATCH] make it more keras only, working also with sum rules

update keras limits

test with newer tf
---
 conda-recipe/meta.yaml                        |  5 +-
 .../n3fit/backends/keras_backend/MetaModel.py | 11 +--
 .../backends/keras_backend/constraints.py     |  5 +-
 .../backends/keras_backend/operations.py      | 80 ++++++++++++++-----
 n3fit/src/n3fit/model_gen.py                  | 10 +--
 n3fit/src/n3fit/model_trainer.py              | 26 +++---
 n3fit/src/n3fit/performfit.py                 |  3 -
 n3fit/src/n3fit/tests/test_backend.py         | 51 ++++++++----
 n3fit/src/n3fit/tests/test_layers.py          |  4 +-
 pyproject.toml                                |  1 +
 10 files changed, 124 insertions(+), 72 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index cb93a7be0b..39df92d148 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -19,7 +19,8 @@ requirements:
         - pip
     run:
         - python >=3.9,<3.13
-        - tensorflow >=2.10,<2.17 # 2.17 works ok but the conda-forge package for macos doesn't
+        - tensorflow >=2.10
+        - keras >=3.1
         - psutil # to ensure n3fit affinity is with the right processors
         - hyperopt
         - mongodb
@@ -29,7 +30,7 @@ requirements:
         - numpy
         - pkg-config
         - reportengine
-        - matplotlib >=3.3.0,<3.8  # see https://github.com/NNPDF/nnpdf/pull/1809
+        - matplotlib >=3.3.0 
         - blessings >=1.7
         - scipy >=0.19.1
         - pandas
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
index 82ac7216b5..f1cdbc418a 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -17,12 +17,6 @@
 
 import n3fit.backends.keras_backend.operations as op
 
-# We need a function to transform tensors to numpy/python primitives
-if K.backend() == "torch":
-    _to_numpy_or_python_type = lambda ret: {k: i.detach().numpy() for k, i in ret.items()}
-else:
-    _to_numpy_or_python_type = lambda ret: {k: i.numpy() for k, i in ret.items()}
-
 # Starting with TF 2.16, a memory leak in TF https://github.com/tensorflow/tensorflow/issues/64170
 # makes jit compilation unusable in GPU.
 # Before TF 2.16 it was set to `False` by default. From 2.16 onwards, it is set to `True`
@@ -119,6 +113,7 @@ def __init__(self, input_tensors, output_tensors, scaler=None, input_values=None
         self.compute_losses_function = None
         self._scaler = scaler
 
+    # @tf.autograph.experimental.do_not_convert
     def _parse_input(self, extra_input=None):
         """Returns the input data the model was compiled with.
         Introduces the extra_input in the places asigned to the placeholders.
@@ -225,7 +220,7 @@ def compute_losses(self):
                 inputs[k] = v[:1]
 
             # Compile a evaluation function
-
+            @op.decorator_compiler
             def losses_fun():
                 predictions = self(inputs)
                 # If we only have one dataset the output changes
@@ -241,7 +236,7 @@ def losses_fun():
 
         # The output of this function is to be used by python (and numpy)
         # so we need to convert the tensors
-        return _to_numpy_or_python_type(ret)
+        return op.dict_to_numpy_or_python(ret)
 
     def compile(
         self,
diff --git a/n3fit/src/n3fit/backends/keras_backend/constraints.py b/n3fit/src/n3fit/backends/keras_backend/constraints.py
index 57d588716b..bb6d85ff4b 100644
--- a/n3fit/src/n3fit/backends/keras_backend/constraints.py
+++ b/n3fit/src/n3fit/backends/keras_backend/constraints.py
@@ -3,6 +3,7 @@
 """
 
 from keras import backend as K
+from keras import ops as Kops
 from keras.constraints import MinMaxNorm
 
 
@@ -16,8 +17,8 @@ def __init__(self, min_value, max_value, **kwargs):
         super().__init__(min_value=min_value, max_value=max_value, axis=1, **kwargs)
 
     def __call__(self, w):
-        norms = K.sum(w, axis=self.axis, keepdims=True)
+        norms = Kops.sum(w, axis=self.axis, keepdims=True)
         desired = (
-            self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms
+            self.rate * Kops.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms
         )
         return w * desired / (K.epsilon() + norms)
diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py
index 006888f923..f521b0536e 100644
--- a/n3fit/src/n3fit/backends/keras_backend/operations.py
+++ b/n3fit/src/n3fit/backends/keras_backend/operations.py
@@ -32,14 +32,21 @@
 from keras.layers import multiply as keras_multiply
 from keras.layers import subtract as keras_subtract
 import numpy as np
-import tensorflow as tf
 
 from validphys.convolution import OP
 
+# Backend dependent functions and operations
+if K.backend() == "torch":
+    tensor_to_numpy_or_python = lambda x: x.detach().numpy()
+    decorator_compiler = lambda f: f
+else:
+    tensor_to_numpy_or_python = lambda x: x.numpy()
+    lambda ret: {k: i.numpy() for k, i in ret.items()}
+    import tensorflow as tf
 
-def evaluate(tensor):
-    """Evaluate input tensor using the backend"""
-    return K.eval(tensor)
+    decorator_compiler = tf.function
+
+dict_to_numpy_or_python = lambda ret: {k: tensor_to_numpy_or_python(i) for k, i in ret.items()}
 
 
 def as_layer(operation, op_args=None, op_kwargs=None, **kwargs):
@@ -269,21 +276,6 @@ def pow(tensor, power):
     return Kops.power(tensor, power)
 
 
-def op_log(o_tensor, **kwargs):
-    """
-    Computes the logarithm of the input
-    """
-    return Kops.log(o_tensor)
-
-
-def sum(*args, **kwargs):
-    """
-    Computes the sum of the elements of the tensor
-    see full `docs <https://www.tensorflow.org/api_docs/python/tf/keras/backend/sum>`_
-    """
-    return Kops.sum(*args, **kwargs)
-
-
 def scatter_to_one(values, indices, output_shape):
     """
     Like scatter_nd initialized to one instead of zero
@@ -332,6 +324,54 @@ def backend_function(fun_name, *args, **kwargs):
     return fun(*args, **kwargs)
 
 
+def tensor_splitter(ishape, split_sizes, axis=2, name="splitter"):
+    """
+    Generates a Lambda layer to apply the split operation to a given tensor shape.
+    This wrapper cannot split along the batch index (axis=0).
+
+    Parameters
+    ----------
+        ishape: list(int)
+            input shape of the tensor that will be split
+        split_sizes: list(int)
+            size of each chunk
+        axis: int
+            axis along which the split will be applied
+        name: str
+            name of the layer
+    Returns
+    -------
+        sp_layer: layer
+            a keras layer that applies the split operation upon call
+    """
+    if axis < 1:
+        raise ValueError("tensor_splitter wrapper can only split along non-batch dimensions")
+
+    # Check that we can indeed split this
+    if ishape[axis] != np.sum(split_sizes):
+        raise ValueError(
+            f"Cannot split tensor of shape {ishape} along axis {axis} in chunks of {split_sizes}"
+        )
+
+    # Output shape of each split
+    oshapes = []
+    # Indices at which to put the splits
+    # NB: tensorflow's split function would've taken the split_sizes directly
+    # keras instead takes the index at where to split
+    indices = []
+    current_idx = 0
+
+    for xsize in split_sizes:
+        current_idx += xsize
+        indices.append(current_idx)
+        oshapes.append((*ishape[1:axis], xsize, *ishape[axis + 1 :]))
+
+    sp_layer = keras_Lambda(
+        lambda x: Kops.split(x, indices, axis=axis), output_shape=oshapes, name=name
+    )
+    return sp_layer
+
+
 expand_dims = Kops.expand_dims
 absolute = Kops.absolute
 tanh = Kops.tanh
@@ -339,3 +379,5 @@ def backend_function(fun_name, *args, **kwargs):
 split = Kops.split
 gather = Kops.take
 take = Kops.take
+sum = Kops.sum
+op_log = Kops.log
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index 852f93caf3..5c7113bdf7 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -99,13 +99,11 @@ def _generate_experimental_layer(self, pdf):
         the input PDF is evaluated in all points that the experiment needs and needs to be split
         """
         if len(self.dataset_xsizes) > 1:
-            splitting_layer = op.as_layer(
-                op.split,
-                op_args=[self.dataset_xsizes],
-                op_kwargs={"axis": 2},
-                name=f"{self.name}_split",
+
+            sp_layer = op.tensor_splitter(
+                pdf.shape, self.dataset_xsizes, axis=2, name=f"{self.name}_split"
             )
-            sp_pdf = splitting_layer(pdf)
+            sp_pdf = sp_layer(pdf)
             output_layers = [obs(p) for obs, p in zip(self.observables, sp_pdf)]
         else:
             output_layers = [obs(pdf) for obs in self.observables]
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 018995b98b..d864d2c6e5 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -40,6 +40,9 @@
 # Each how many epochs do we increase the integrability Lagrange Multiplier
 PUSH_INTEGRABILITY_EACH = 100
 
+# Final number of flavours
+FLAVOURS = 14
+
 # See ModelTrainer::_xgrid_generation for the definition of each field and how they are generated
 InputInfo = namedtuple("InputInfo", ["input", "split", "idx"])
 
@@ -357,19 +360,10 @@ def _xgrid_generation(self):
         # The PDF model is called with a concatenation of all inputs
         # however, each output layer might require a different subset, this is achieved by
         # splitting back the output
-
-        output_shape = []
-        indices = []
-        current_idx = 0
-        for itensor in inputs_unique:
-            isize = itensor.shape[1]
-            current_idx += isize
-            # Tell keras where to split the tensor
-            indices.append(current_idx)
-            # (number of replica, xgrid size, flavours)
-            output_shape.append((1, isize, 14))
-
-        sp_layer = Lambda(lambda x: op.split(x, indices, axis=2), output_shape=output_shape)
+        # Input shape: (batch size, replicas, input array, flavours)
+        ishape = (1, len(self.replicas), input_arr.shape[0], FLAVOURS)
+        xsizes = [i.shape[1] for i in inputs_unique]
+        sp_layer = op.tensor_splitter(ishape, xsizes, axis=2, name="splitter")
 
         return InputInfo(input_layer, sp_layer, inputs_idx)
 
@@ -947,8 +941,10 @@ def hyperparametrizable(self, params):
             )
 
             if photons:
-                if self._scaler: # select only the non-scaled input
-                    pdf_model.get_layer("add_photon").register_photon(xinput.input.tensor_content[:,:,1:])
+                if self._scaler:  # select only the non-scaled input
+                    pdf_model.get_layer("add_photon").register_photon(
+                        xinput.input.tensor_content[:, :, 1:]
+                    )
                 else:
                     pdf_model.get_layer("add_photon").register_photon(xinput.input.tensor_content)
 
diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py
index 04703ef924..7e91c1b5ca 100644
--- a/n3fit/src/n3fit/performfit.py
+++ b/n3fit/src/n3fit/performfit.py
@@ -3,11 +3,8 @@
 """
 
 # Backend-independent imports
-import copy
 import logging
 
-import numpy as np
-
 import n3fit.checks
 from n3fit.vpinterface import N3PDF
 
diff --git a/n3fit/src/n3fit/tests/test_backend.py b/n3fit/src/n3fit/tests/test_backend.py
index eaae5667c8..e464ae2384 100644
--- a/n3fit/src/n3fit/tests/test_backend.py
+++ b/n3fit/src/n3fit/tests/test_backend.py
@@ -2,8 +2,11 @@
     This module tests the mathematical functions in the n3fit backend
     and ensures they do the same thing as their numpy counterparts
 """
+
 import operator
+
 import numpy as np
+
 from n3fit.backends import operations as op
 
 # General parameters
@@ -24,14 +27,14 @@
 
 
 def are_equal(result, reference, threshold=THRESHOLD):
-    """ checks the difference between array `reference` and tensor `result` is
-    below `threshold` for all elements """
-    res = op.evaluate(result)
+    """checks the difference between array `reference` and tensor `result` is
+    below `threshold` for all elements"""
+    res = op.tensor_to_numpy_or_python(result)
     assert np.allclose(res, reference, atol=threshold)
 
 
 def numpy_check(backend_op, python_op, mode="same"):
-    """ Receives a backend operation (`backend_op`) and a python operation
+    """Receives a backend operation (`backend_op`) and a python operation
     `python_op` and asserts that, applied to two random arrays, the result
     is the same.
     The option `mode` selects the two arrays to be tested and accepts the following
@@ -53,7 +56,28 @@ def numpy_check(backend_op, python_op, mode="same"):
         arrays = [ARR1, ARR2, ARR1, ARR1]
     elif mode == "twenty":
         tensors = [T1, T2, T1, T1, T1, T1, T1, T1, T1, T1, T1, T2, T1, T1, T1, T1, T1, T1, T1, T1]
-        arrays = [ARR1, ARR2, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR2, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1]
+        arrays = [
+            ARR1,
+            ARR2,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR2,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+            ARR1,
+        ]
     elif mode == "ten":
         tensors = [T1, T2, T1, T1, T1, T1, T1, T1, T1, T1]
         arrays = [ARR1, ARR2, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1]
@@ -98,13 +122,16 @@ def test_c_to_py_fun():
     numpy_check(op_smp, reference, "four")
     # COM
     op_com = op.c_to_py_fun("COM")
-    reference = lambda x, y, z, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t : (x + y + z + d + e + f + g + h + i + j) / (k + l + m + n + o + p + q + r + s + t)
+    reference = lambda x, y, z, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t: (
+        x + y + z + d + e + f + g + h + i + j
+    ) / (k + l + m + n + o + p + q + r + s + t)
     numpy_check(op_com, reference, "twenty")
     # SMT
     op_smt = op.c_to_py_fun("SMT")
-    reference = lambda x, y, z, d, e, f, g, h, i, j : (x + y + z + d + e + f + g + h + i + j)
+    reference = lambda x, y, z, d, e, f, g, h, i, j: (x + y + z + d + e + f + g + h + i + j)
     numpy_check(op_smt, reference, "ten")
 
+
 # Tests operations
 def test_op_multiply():
     numpy_check(op.op_multiply, operator.mul)
@@ -122,17 +149,11 @@ def test_flatten():
     numpy_check(op.flatten, np.ndarray.flatten, mode=(T3, [ARR3]))
 
 
-def test_boolean_mask():
-    bools = np.random.randint(0, 2, DIM, dtype=bool)
-    np_result = ARR1[bools]
-    tf_bools = op.numpy_to_tensor(bools)
-    tf_result = op.boolean_mask(T1, tf_bools, axis=0)
-    are_equal(np_result, tf_result)
-
 def test_tensor_product():
     np_result = np.tensordot(ARR3, ARR1, axes=1)
     tf_result = op.tensor_product(T3, T1, axes=1)
-    are_equal(np_result, tf_result)
+    are_equal(tf_result, np_result)
+
 
 def test_sum():
     numpy_check(op.sum, np.sum, mode='single')
diff --git a/n3fit/src/n3fit/tests/test_layers.py b/n3fit/src/n3fit/tests/test_layers.py
index 8615414c2f..84ef8c8eaf 100644
--- a/n3fit/src/n3fit/tests/test_layers.py
+++ b/n3fit/src/n3fit/tests/test_layers.py
@@ -169,7 +169,7 @@ def test_DIS():
         kp = op.numpy_to_tensor([[pdf]])  # add batch and replica dimension
         # generate the n3fit results
         result_tensor = obs_layer(kp)
-        result = op.evaluate(result_tensor)
+        result = op.tensor_to_numpy_or_python(result_tensor)
         # Compute the numpy version of this layer
         all_masks = obs_layer.all_masks
         if len(all_masks) < nfk:
@@ -195,7 +195,7 @@ def test_DY():
         kp = op.numpy_to_tensor([[pdf]])  # add batch and replica dimension
         # generate the n3fit results
         result_tensor = obs_layer(kp)
-        result = op.evaluate(result_tensor)
+        result = op.tensor_to_numpy_or_python(result_tensor)
         # Compute the numpy version of this layer
         all_masks = obs_layer.all_masks
         if len(all_masks) < nfk:
diff --git a/pyproject.toml b/pyproject.toml
index 3b659e35a5..28b19d67cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,7 @@ reportengine = { git = "https://github.com/NNPDF/reportengine" }
 # Fit
 psutil = "*"
 tensorflow = "*"
+keras = "^3.1"
 eko = "^0.14.1"
 joblib = "*"
 # Hyperopt