diff --git a/n3fit/src/n3fit/backends/keras_backend/internal_state.py b/n3fit/src/n3fit/backends/keras_backend/internal_state.py index 3aa940fd5c..e43f8c1863 100644 --- a/n3fit/src/n3fit/backends/keras_backend/internal_state.py +++ b/n3fit/src/n3fit/backends/keras_backend/internal_state.py @@ -2,6 +2,7 @@ Library of functions that modify the internal state of Keras/Tensorflow """ import os + import psutil # Despite the current default being tf-eigen, the option below seems to have a positive impact @@ -9,14 +10,14 @@ # Reduce tensorflow verbosity os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "1") -import random as rn import logging +import random as rn + import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import backend as K - log = logging.getLogger(__name__) @@ -28,7 +29,7 @@ def set_eager(flag=True): tf.config.run_functions_eagerly(flag) -def set_number_of_cores(max_cores=None): +def set_number_of_cores(max_cores=None, max_threads=None): """ Set the maximum number of cores and threads per core to be used by TF. It defaults to the number of physical cores @@ -56,9 +57,21 @@ def set_number_of_cores(max_cores=None): # In any case, we never want to get above the number provided by the user if max_cores is not None: cores = min(cores, max_cores) + + threads = tpc * 2 + if max_threads is not None: + threads = min(max_threads, threads) + log.info("Setting the number of cores to: %d", cores) - tf.config.threading.set_inter_op_parallelism_threads(tpc * 2) - tf.config.threading.set_intra_op_parallelism_threads(cores) + try: + tf.config.threading.set_inter_op_parallelism_threads(threads) + tf.config.threading.set_intra_op_parallelism_threads(cores) + except RuntimeError: + # If pdfflow is being used, tensorflow will already be initialized by pdfflow + # maybe it would be good to drop completely pdfflow before starting the fit? (TODO ?) + log.warning( + "Could not set tensorflow parallelism settings from n3fit, maybe has already been initialized?" + ) def clear_backend_state(): @@ -115,13 +128,12 @@ def set_initial_state(debug=False, external_seed=None, max_cores=None): # Set the number of cores depending on the user choice of max_cores # if debug mode and no number of cores set by the user, set to 1 + threads = None # auto if debug and max_cores is None: keras.utils.set_random_seed(7331) + threads = 1 tf.config.experimental.enable_op_determinism() - tf.config.threading.set_inter_op_parallelism_threads(1) - tf.config.threading.set_intra_op_parallelism_threads(1) - else: - set_number_of_cores(max_cores=max_cores) + set_number_of_cores(max_cores=max_cores, max_threads=threads) # Once again, if in debug mode or external_seed set, set also the TF seed if debug or external_seed: diff --git a/validphys2/src/validphys/app.py b/validphys2/src/validphys/app.py index 19f9f1d41f..45cbc008bf 100644 --- a/validphys2/src/validphys/app.py +++ b/validphys2/src/validphys/app.py @@ -14,11 +14,10 @@ import os import sys -import lhapdf - from reportengine import app from validphys import mplstyles, uploadutils from validphys.config import Config, Environment +from validphys.lhapdf_compatibility import lhapdf providers = [ "validphys.results", diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index 1e4a33f103..8ccecdec43 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -9,7 +9,6 @@ import platform import tempfile -import lhapdf from matplotlib import scale as mscale from reportengine.checks import CheckError, check, make_argcheck, make_check @@ -71,7 +70,7 @@ def check_can_save_grid(ns, **kwags): if not ns['installgrid']: return - write_path = lhapdf.paths()[-1] + write_path = lhaindex.get_lha_datapaths() try: tempfile.TemporaryFile(dir=write_path) except OSError as e: diff --git a/validphys2/src/validphys/lhaindex.py b/validphys2/src/validphys/lhaindex.py index cd8df6401e..ff8943f4c7 100644 --- a/validphys2/src/validphys/lhaindex.py +++ b/validphys2/src/validphys/lhaindex.py @@ -12,9 +12,8 @@ import os.path as osp import re -import lhapdf - from reportengine.compat import yaml +from validphys.lhapdf_compatibility import lhapdf _indexes_to_names = None _names_to_indexes = None @@ -25,7 +24,7 @@ def expand_index_names(globstr): def expand_local_names(globstr): - paths = get_lha_paths() + paths = lhapdf.paths() return [ name for path in paths @@ -51,7 +50,7 @@ def get_indexes_to_names(): def finddir(name): - for path in get_lha_paths(): + for path in lhapdf.paths(): d = osp.join(path, name) if osp.isdir(d): return d @@ -60,7 +59,7 @@ def finddir(name): def isinstalled(name): """Check that name exists in LHAPDF dir""" - return name and any(osp.isdir(osp.join(path, name)) for path in get_lha_paths()) + return name and any(osp.isdir(osp.join(path, name)) for path in lhapdf.paths()) def get_names_to_indexes(): @@ -88,7 +87,7 @@ def get_pdf_name(index): def parse_index(index_file): d = {} - name_re = '(\d+)\s+(\S+)' + name_re = r'(\d+)\s+(\S+)' with open(index_file) as localfile: for line in localfile.readlines(): m = re.match(name_re, line) @@ -116,7 +115,7 @@ def as_from_name(name): def infofilename(name): - for path in get_lha_paths(): + for path in lhapdf.paths(): info = osp.join(path, name, name + '.info') if osp.exists(info): return info @@ -130,12 +129,8 @@ def parse_info(name): return result -def get_lha_paths(): - return lhapdf.paths() - - def get_lha_datapath(): - return get_lha_paths()[-1] + return lhapdf.paths()[-1] def get_index_path(folder=None): diff --git a/validphys2/src/validphys/lhapdf_compatibility.py b/validphys2/src/validphys/lhapdf_compatibility.py new file mode 100644 index 0000000000..5a04b3902d --- /dev/null +++ b/validphys2/src/validphys/lhapdf_compatibility.py @@ -0,0 +1,125 @@ +""" + Module for LHAPDF compatibility backends + + If LHAPDF is installed, the module will transparently hand over everything to LHAPDF + if LHAPDF is not available, it will try to use a combination of the packages + `lhapdf-management` and `pdfflow` + which cover all the features of LHAPDF used during the fit (and likely most of validphys) +""" +from functools import cached_property + +import numpy as np + +try: + import lhapdf + + USING_LHAPDF = True +except ModuleNotFoundError: + import logging + + import lhapdf_management as lhapdf + + log = logging.getLogger(__name__) + log.warning("LHAPDF was not found, using an alternative backend") + + USING_LHAPDF = False + + +class _PDFFlowPDF: + """Wrapper around the PDFFlow PDF so that it can be used as an LHAPDF + set by validphys + Takes as input a pdf_meta object (which is a PDFset from lhapdf_management + and which knows where the PDF needs to be loaded from) and a single member + + Loading the PDF is done in a lazy manner since most of the time only a few members are needed. + + Since PDFFlow is only utilized to load the PDF for interpolation, the import is delayed until + the first call to `mkPDF`. This allows the usage of most of validphys without tensorflow. + """ + + def __init__(self, pdf_meta, member): + if USING_LHAPDF: + raise ValueError("PDFFlow should not be instantiated when using LHAPDF") + + self._pdf_meta = pdf_meta + self._m = member + self._pdf = None + self._flavors = self._pdf_meta.info["Flavors"] + + @cached_property + def pdf(self): + # Don't import PDF Flow until you really needed it + import pdfflow + + if self._pdf is None: + pdf_def = f"{self._pdf_meta.name}/{self._m}" + self._pdf = pdfflow.mkPDF(pdf_def, self._pdf_meta.path.parent) + return self._pdf + + def flavors(self): + return self._flavors + + def _xfxQ_all_pid(self, x, q): + x = np.atleast_1d(x) + q = np.atleast_1d(q) + + res = self.pdf.py_xfxQ2_allpid(x, q**2).numpy() + return dict(zip(self._flavors, res.T)) + + def xfxQ(self, a, b, c=None): + """Wrapper for the LHAPDF xfxQ function + This is an overloaded function in LHAPDF so depending + on the number of arguments we will do: + xfxQ(flavours, x, Q) + or + xfxQ(x, q) + + All of x/q/flavours can be either a scalar or an array + """ + if c is None: + return self._xfxQ_all_pid(a, b) + + # PDFFlow doesn't allow to ask for flavours that do not exist + # so let us retrieve all and return 0s for non existing flavs + ret_dict = self.xfxQ(b, c) + zeros = np.zeros_like(b) + + if isinstance(a, int): + return ret_dict.get(a, zeros) + return [ret_dict.get(i, zeros) for i in a] + + def xfxQ2(self, a, b, c=None): + """Wrapper for LHAPDF xfxQ2 function, like xfxQ for Q2""" + if c is None: + return self.xfxQ(a, np.sqrt(b)) + return self.xfxQ(a, b, np.sqrt(c)) + + +def make_pdf(pdf_name, member=None): + """Load a PDF + if member is given, load the single member otherwise, load the entire set as a list + + if LHAPDF is provided, it returns LHAPDF PDF instances + otherwise it returns and object which is _compatible_ with LHAPDF + for lhapdf functions for the selected backend + + Parameters: + ---------- + pdf_name: str + name of the PDF to load + member: int + index of the member of the PDF to load + + Returns: + ------- + list(pdf_sets) + """ + if USING_LHAPDF: + if member is None: + return lhapdf.mkPDFs(pdf_name) + return [lhapdf.mkPDF(pdf_name, member)] + + pdf_meta = lhapdf.load_pdf_meta(pdf_name) + if member is None: + return [_PDFFlowPDF(pdf_meta, m) for m in range(len(pdf_meta))] + return [_PDFFlowPDF(pdf_meta, member)] diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py index 860a4fcf39..20da8ddc76 100644 --- a/validphys2/src/validphys/lhapdfset.py +++ b/validphys2/src/validphys/lhapdfset.py @@ -28,9 +28,10 @@ """ import logging -import lhapdf import numpy as np +from validphys.lhapdf_compatibility import make_pdf + log = logging.getLogger(__name__) @@ -46,9 +47,9 @@ def __init__(self, name, error_type): self._error_type = error_type if self.is_t0: # If at this point we already know this is a T0 set, load only the CV - self._lhapdf_set = [lhapdf.mkPDF(name)] + self._lhapdf_set = make_pdf(name, 0) else: - self._lhapdf_set = lhapdf.mkPDFs(name) + self._lhapdf_set = make_pdf(name) self._flavors = None @property diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index 6be9187318..92c767cccc 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -8,7 +8,6 @@ import pathlib import shutil -import lhapdf import numpy as np import pandas as pd @@ -137,9 +136,7 @@ def big_matrix(gridlist): and the central value""" central_value = gridlist[0] X = pd.concat( - gridlist[1:], - axis=1, - keys=range(1, len(gridlist) + 1), # avoid confusion with rep0 + gridlist[1:], axis=1, keys=range(1, len(gridlist) + 1) # avoid confusion with rep0 ).subtract(central_value, axis=0) if np.any(X.isnull()) or X.shape[0] != len(central_value): raise ValueError("Incompatible grid specifications") @@ -148,11 +145,7 @@ def big_matrix(gridlist): def rep_matrix(gridlist): """Return a properly indexes matrix of all the members""" - X = pd.concat( - gridlist, - axis=1, - keys=range(1, len(gridlist) + 1), # avoid confusion with rep0 - ) + X = pd.concat(gridlist, axis=1, keys=range(1, len(gridlist) + 1)) # avoid confusion with rep0 if np.ravel(pd.isnull(X)).any(): raise ValueError("Found null values in grid") return X @@ -239,6 +232,7 @@ def new_pdf_from_indexes( files directly. It is slower and will call LHAPDF to fill the grids, but works for sets where the replicas have different grids. """ + import lhapdf if extra_fields is not None: raise NotImplementedError() @@ -303,7 +297,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): # preparing output folder neig = V.shape[1] - base = pathlib.Path(lhapdf.paths()[-1]) / pdf.name + base = pathlib.Path(lhaindex.get_lha_datapath()) / pdf.name if set_name is None: set_name = pdf.name + "_hessian_" + str(neig) if folder is None: @@ -314,8 +308,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): if os.path.exists(set_root): shutil.rmtree(set_root) log.warning( - "Target directory for new PDF, %s, already exists. Removing contents.", - set_root, + "Target directory for new PDF, %s, already exists. Removing contents.", set_root ) os.makedirs(os.path.join(set_root)) @@ -336,10 +329,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): yaml.dump(extra_fields, out, default_flow_style=False) _headers, grids = load_all_replicas(pdf) - result = (big_matrix(grids).dot(V)).add( - grids[0], - axis=0, - ) + result = (big_matrix(grids).dot(V)).add(grids[0], axis=0) hess_header = b"PdfType: error\nFormat: lhagrid1\n" for column in result.columns: write_replica(column + 1, set_root, hess_header, result[column])