From 2f150e6b20531292bd0d1e6fea0ff0042e645bad Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 22 Nov 2024 03:59:08 +0100 Subject: [PATCH] make it more keras only, working also with sum rules update keras limits test with newer tf --- conda-recipe/meta.yaml | 5 +- .../n3fit/backends/keras_backend/MetaModel.py | 11 +-- .../backends/keras_backend/constraints.py | 5 +- .../backends/keras_backend/operations.py | 80 ++++++++++++++----- n3fit/src/n3fit/model_gen.py | 10 +-- n3fit/src/n3fit/model_trainer.py | 26 +++--- n3fit/src/n3fit/performfit.py | 3 - n3fit/src/n3fit/tests/test_backend.py | 51 ++++++++---- n3fit/src/n3fit/tests/test_layers.py | 4 +- pyproject.toml | 1 + 10 files changed, 124 insertions(+), 72 deletions(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index cb93a7be0b..39df92d148 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -19,7 +19,8 @@ requirements: - pip run: - python >=3.9,<3.13 - - tensorflow >=2.10,<2.17 # 2.17 works ok but the conda-forge package for macos doesn't + - tensorflow >=2.10 + - keras >=3.1 - psutil # to ensure n3fit affinity is with the right processors - hyperopt - mongodb @@ -29,7 +30,7 @@ requirements: - numpy - pkg-config - reportengine - - matplotlib >=3.3.0,<3.8 # see https://github.com/NNPDF/nnpdf/pull/1809 + - matplotlib >=3.3.0 - blessings >=1.7 - scipy >=0.19.1 - pandas diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 82ac7216b5..f1cdbc418a 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -17,12 +17,6 @@ import n3fit.backends.keras_backend.operations as op -# We need a function to transform tensors to numpy/python primitives -if K.backend() == "torch": - _to_numpy_or_python_type = lambda ret: {k: i.detach().numpy() for k, i in ret.items()} -else: - _to_numpy_or_python_type = lambda ret: {k: i.numpy() for k, i in ret.items()} - # Starting with TF 2.16, a memory leak in TF https://github.com/tensorflow/tensorflow/issues/64170 # makes jit compilation unusable in GPU. # Before TF 2.16 it was set to `False` by default. From 2.16 onwards, it is set to `True` @@ -119,6 +113,7 @@ def __init__(self, input_tensors, output_tensors, scaler=None, input_values=None self.compute_losses_function = None self._scaler = scaler + # @tf.autograph.experimental.do_not_convert def _parse_input(self, extra_input=None): """Returns the input data the model was compiled with. Introduces the extra_input in the places asigned to the placeholders. @@ -225,7 +220,7 @@ def compute_losses(self): inputs[k] = v[:1] # Compile a evaluation function - + @op.decorator_compiler def losses_fun(): predictions = self(inputs) # If we only have one dataset the output changes @@ -241,7 +236,7 @@ def losses_fun(): # The output of this function is to be used by python (and numpy) # so we need to convert the tensors - return _to_numpy_or_python_type(ret) + return op.dict_to_numpy_or_python(ret) def compile( self, diff --git a/n3fit/src/n3fit/backends/keras_backend/constraints.py b/n3fit/src/n3fit/backends/keras_backend/constraints.py index 57d588716b..bb6d85ff4b 100644 --- a/n3fit/src/n3fit/backends/keras_backend/constraints.py +++ b/n3fit/src/n3fit/backends/keras_backend/constraints.py @@ -3,6 +3,7 @@ """ from keras import backend as K +from keras import ops as Kops from keras.constraints import MinMaxNorm @@ -16,8 +17,8 @@ def __init__(self, min_value, max_value, **kwargs): super().__init__(min_value=min_value, max_value=max_value, axis=1, **kwargs) def __call__(self, w): - norms = K.sum(w, axis=self.axis, keepdims=True) + norms = Kops.sum(w, axis=self.axis, keepdims=True) desired = ( - self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms + self.rate * Kops.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms ) return w * desired / (K.epsilon() + norms) diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 006888f923..f521b0536e 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -32,14 +32,21 @@ from keras.layers import multiply as keras_multiply from keras.layers import subtract as keras_subtract import numpy as np -import tensorflow as tf from validphys.convolution import OP +# Backend dependent functions and operations +if K.backend() == "torch": + tensor_to_numpy_or_python = lambda x: x.detach().numpy() + decorator_compiler = lambda f: f +else: + tensor_to_numpy_or_python = lambda x: x.numpy() + lambda ret: {k: i.numpy() for k, i in ret.items()} + import tensorflow as tf -def evaluate(tensor): - """Evaluate input tensor using the backend""" - return K.eval(tensor) + decorator_compiler = tf.function + +dict_to_numpy_or_python = lambda ret: {k: tensor_to_numpy_or_python(i) for k, i in ret.items()} def as_layer(operation, op_args=None, op_kwargs=None, **kwargs): @@ -269,21 +276,6 @@ def pow(tensor, power): return Kops.power(tensor, power) -def op_log(o_tensor, **kwargs): - """ - Computes the logarithm of the input - """ - return Kops.log(o_tensor) - - -def sum(*args, **kwargs): - """ - Computes the sum of the elements of the tensor - see full `docs `_ - """ - return Kops.sum(*args, **kwargs) - - def scatter_to_one(values, indices, output_shape): """ Like scatter_nd initialized to one instead of zero @@ -332,6 +324,54 @@ def backend_function(fun_name, *args, **kwargs): return fun(*args, **kwargs) +def tensor_splitter(ishape, split_sizes, axis=2, name="splitter"): + """ + Generates a Lambda layer to apply the split operation to a given tensor shape. + This wrapper cannot split along the batch index (axis=0). + + Parameters + ---------- + ishape: list(int) + input shape of the tensor that will be split + split_sizes: list(int) + size of each chunk + axis: int + axis along which the split will be applied + name: str + name of the layer + Returns + ------- + sp_layer: layer + a keras layer that applies the split operation upon call + """ + if axis < 1: + raise ValueError("tensor_splitter wrapper can only split along non-batch dimensions") + + # Check that we can indeed split this + if ishape[axis] != np.sum(split_sizes): + raise ValueError( + f"Cannot split tensor of shape {ishape} along axis {axis} in chunks of {split_sizes}" + ) + + # Output shape of each split + oshapes = [] + # Indices at which to put the splits + # NB: tensorflow's split function would've taken the split_sizes directly + # keras instead takes the index at where to split + indices = [] + current_idx = 0 + + for xsize in split_sizes: + current_idx += xsize + indices.append(current_idx) + oshapes.append((*ishape[1:axis], xsize, *ishape[axis + 1 :])) + + sp_layer = keras_Lambda( + lambda x: Kops.split(x, indices, axis=axis), output_shape=oshapes, name=name + ) + return sp_layer + + expand_dims = Kops.expand_dims absolute = Kops.absolute tanh = Kops.tanh @@ -339,3 +379,5 @@ def backend_function(fun_name, *args, **kwargs): split = Kops.split gather = Kops.take take = Kops.take +sum = Kops.sum +op_log = Kops.log diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 852f93caf3..5c7113bdf7 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -99,13 +99,11 @@ def _generate_experimental_layer(self, pdf): the input PDF is evaluated in all points that the experiment needs and needs to be split """ if len(self.dataset_xsizes) > 1: - splitting_layer = op.as_layer( - op.split, - op_args=[self.dataset_xsizes], - op_kwargs={"axis": 2}, - name=f"{self.name}_split", + + sp_layer = op.tensor_splitter( + pdf.shape, self.dataset_xsizes, axis=2, name=f"{self.name}_split" ) - sp_pdf = splitting_layer(pdf) + sp_pdf = sp_layer(pdf) output_layers = [obs(p) for obs, p in zip(self.observables, sp_pdf)] else: output_layers = [obs(pdf) for obs in self.observables] diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 018995b98b..d864d2c6e5 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -40,6 +40,9 @@ # Each how many epochs do we increase the integrability Lagrange Multiplier PUSH_INTEGRABILITY_EACH = 100 +# Final number of flavours +FLAVOURS = 14 + # See ModelTrainer::_xgrid_generation for the definition of each field and how they are generated InputInfo = namedtuple("InputInfo", ["input", "split", "idx"]) @@ -357,19 +360,10 @@ def _xgrid_generation(self): # The PDF model is called with a concatenation of all inputs # however, each output layer might require a different subset, this is achieved by # splitting back the output - - output_shape = [] - indices = [] - current_idx = 0 - for itensor in inputs_unique: - isize = itensor.shape[1] - current_idx += isize - # Tell keras where to split the tensor - indices.append(current_idx) - # (number of replica, xgrid size, flavours) - output_shape.append((1, isize, 14)) - - sp_layer = Lambda(lambda x: op.split(x, indices, axis=2), output_shape=output_shape) + # Input shape: (batch size, replicas, input array, flavours) + ishape = (1, len(self.replicas), input_arr.shape[0], FLAVOURS) + xsizes = [i.shape[1] for i in inputs_unique] + sp_layer = op.tensor_splitter(ishape, xsizes, axis=2, name="splitter") return InputInfo(input_layer, sp_layer, inputs_idx) @@ -947,8 +941,10 @@ def hyperparametrizable(self, params): ) if photons: - if self._scaler: # select only the non-scaled input - pdf_model.get_layer("add_photon").register_photon(xinput.input.tensor_content[:,:,1:]) + if self._scaler: # select only the non-scaled input + pdf_model.get_layer("add_photon").register_photon( + xinput.input.tensor_content[:, :, 1:] + ) else: pdf_model.get_layer("add_photon").register_photon(xinput.input.tensor_content) diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 04703ef924..7e91c1b5ca 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -3,11 +3,8 @@ """ # Backend-independent imports -import copy import logging -import numpy as np - import n3fit.checks from n3fit.vpinterface import N3PDF diff --git a/n3fit/src/n3fit/tests/test_backend.py b/n3fit/src/n3fit/tests/test_backend.py index eaae5667c8..e464ae2384 100644 --- a/n3fit/src/n3fit/tests/test_backend.py +++ b/n3fit/src/n3fit/tests/test_backend.py @@ -2,8 +2,11 @@ This module tests the mathematical functions in the n3fit backend and ensures they do the same thing as their numpy counterparts """ + import operator + import numpy as np + from n3fit.backends import operations as op # General parameters @@ -24,14 +27,14 @@ def are_equal(result, reference, threshold=THRESHOLD): - """ checks the difference between array `reference` and tensor `result` is - below `threshold` for all elements """ - res = op.evaluate(result) + """checks the difference between array `reference` and tensor `result` is + below `threshold` for all elements""" + res = op.tensor_to_numpy_or_python(result) assert np.allclose(res, reference, atol=threshold) def numpy_check(backend_op, python_op, mode="same"): - """ Receives a backend operation (`backend_op`) and a python operation + """Receives a backend operation (`backend_op`) and a python operation `python_op` and asserts that, applied to two random arrays, the result is the same. The option `mode` selects the two arrays to be tested and accepts the following @@ -53,7 +56,28 @@ def numpy_check(backend_op, python_op, mode="same"): arrays = [ARR1, ARR2, ARR1, ARR1] elif mode == "twenty": tensors = [T1, T2, T1, T1, T1, T1, T1, T1, T1, T1, T1, T2, T1, T1, T1, T1, T1, T1, T1, T1] - arrays = [ARR1, ARR2, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR2, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1] + arrays = [ + ARR1, + ARR2, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR2, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ARR1, + ] elif mode == "ten": tensors = [T1, T2, T1, T1, T1, T1, T1, T1, T1, T1] arrays = [ARR1, ARR2, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1, ARR1] @@ -98,13 +122,16 @@ def test_c_to_py_fun(): numpy_check(op_smp, reference, "four") # COM op_com = op.c_to_py_fun("COM") - reference = lambda x, y, z, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t : (x + y + z + d + e + f + g + h + i + j) / (k + l + m + n + o + p + q + r + s + t) + reference = lambda x, y, z, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t: ( + x + y + z + d + e + f + g + h + i + j + ) / (k + l + m + n + o + p + q + r + s + t) numpy_check(op_com, reference, "twenty") # SMT op_smt = op.c_to_py_fun("SMT") - reference = lambda x, y, z, d, e, f, g, h, i, j : (x + y + z + d + e + f + g + h + i + j) + reference = lambda x, y, z, d, e, f, g, h, i, j: (x + y + z + d + e + f + g + h + i + j) numpy_check(op_smt, reference, "ten") + # Tests operations def test_op_multiply(): numpy_check(op.op_multiply, operator.mul) @@ -122,17 +149,11 @@ def test_flatten(): numpy_check(op.flatten, np.ndarray.flatten, mode=(T3, [ARR3])) -def test_boolean_mask(): - bools = np.random.randint(0, 2, DIM, dtype=bool) - np_result = ARR1[bools] - tf_bools = op.numpy_to_tensor(bools) - tf_result = op.boolean_mask(T1, tf_bools, axis=0) - are_equal(np_result, tf_result) - def test_tensor_product(): np_result = np.tensordot(ARR3, ARR1, axes=1) tf_result = op.tensor_product(T3, T1, axes=1) - are_equal(np_result, tf_result) + are_equal(tf_result, np_result) + def test_sum(): numpy_check(op.sum, np.sum, mode='single') diff --git a/n3fit/src/n3fit/tests/test_layers.py b/n3fit/src/n3fit/tests/test_layers.py index 8615414c2f..84ef8c8eaf 100644 --- a/n3fit/src/n3fit/tests/test_layers.py +++ b/n3fit/src/n3fit/tests/test_layers.py @@ -169,7 +169,7 @@ def test_DIS(): kp = op.numpy_to_tensor([[pdf]]) # add batch and replica dimension # generate the n3fit results result_tensor = obs_layer(kp) - result = op.evaluate(result_tensor) + result = op.tensor_to_numpy_or_python(result_tensor) # Compute the numpy version of this layer all_masks = obs_layer.all_masks if len(all_masks) < nfk: @@ -195,7 +195,7 @@ def test_DY(): kp = op.numpy_to_tensor([[pdf]]) # add batch and replica dimension # generate the n3fit results result_tensor = obs_layer(kp) - result = op.evaluate(result_tensor) + result = op.tensor_to_numpy_or_python(result_tensor) # Compute the numpy version of this layer all_masks = obs_layer.all_masks if len(all_masks) < nfk: diff --git a/pyproject.toml b/pyproject.toml index 3b659e35a5..28b19d67cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ reportengine = { git = "https://github.com/NNPDF/reportengine" } # Fit psutil = "*" tensorflow = "*" +keras = "^3.1" eko = "^0.14.1" joblib = "*" # Hyperopt