From 0d2601d5980e8579e27f53b1c9432deaa50f9870 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 27 Nov 2023 10:22:23 +0100 Subject: [PATCH] rebase this branch on top of the pyproject.toml --- n3fit/requirements.txt | 23 ------------ n3fit/runcards/examples/Basic_runcard.yml | 4 ++- .../backends/keras_backend/internal_state.py | 30 +++++++++------- .../src/validphys/lhapdf_compatibility.py | 36 +++++++++++++------ validphys2/src/validphys/lhio.py | 22 ++++-------- validphys2/src/validphys/photon/compute.py | 1 - 6 files changed, 52 insertions(+), 64 deletions(-) delete mode 100644 n3fit/requirements.txt diff --git a/n3fit/requirements.txt b/n3fit/requirements.txt deleted file mode 100644 index a1abbddde0..0000000000 --- a/n3fit/requirements.txt +++ /dev/null @@ -1,23 +0,0 @@ -# n3fit -tensorflow -psutil -# evolven3fit -eko -# validphys -pineappl -reportengine -validobj -prompt_toolkit -## hyperopt -hyperopt -seaborn - -# not available from pypi -# lhapdf -# instead install: -pdfflow -lhapdf_management - -# if lhapdf_management needs to be initialized: -# LHAPDF_DATA_PATH=$(python -c 'from pathlib import Path ; from sys import prefix ; print(Path(prefix) / "share" / "LHAPDF")' ; lhapdf-management update - diff --git a/n3fit/runcards/examples/Basic_runcard.yml b/n3fit/runcards/examples/Basic_runcard.yml index aaf7a313aa..ab14cdeb71 100644 --- a/n3fit/runcards/examples/Basic_runcard.yml +++ b/n3fit/runcards/examples/Basic_runcard.yml @@ -10,6 +10,8 @@ description: Basic runcard # ewk: apply ewk k-factors # sys: systematics treatment (see systypes) dataset_inputs: +- { dataset: SLACP_dwsh, frac: 0.5} +- { dataset: NMCPD_dw, frac: 0.5 } - { dataset: ATLASZPT8TEVMDIST, frac: 0.75, sys: 10, cfac: [QCD] } ############################################################ @@ -29,7 +31,7 @@ datacuts: ############################################################ theory: - theoryid: 400 # database id + theoryid: 200 # database id sampling: separate_multiplicative: true diff --git a/n3fit/src/n3fit/backends/keras_backend/internal_state.py b/n3fit/src/n3fit/backends/keras_backend/internal_state.py index c1d1ee66d0..f946674072 100644 --- a/n3fit/src/n3fit/backends/keras_backend/internal_state.py +++ b/n3fit/src/n3fit/backends/keras_backend/internal_state.py @@ -27,7 +27,7 @@ def set_eager(flag=True): tf.config.run_functions_eagerly(flag) -def set_number_of_cores(max_cores=None): +def set_number_of_cores(max_cores=None, max_threads=None): """ Set the maximum number of cores and threads per core to be used by TF. It defaults to the number of physical cores @@ -38,11 +38,6 @@ def set_number_of_cores(max_cores=None): max_cores: int Maximum number of cores to be used """ - try: - import lhapdf - except ModuleNotFoundError: - # If LHAPDF is not working then that means we already have initialized tensorflow at this point - return # Find how many cores we have and how many threads per core cores = psutil.cpu_count(logical=False) logical = psutil.cpu_count(logical=True) @@ -60,9 +55,21 @@ def set_number_of_cores(max_cores=None): # In any case, we never want to get above the number provided by the user if max_cores is not None: cores = min(cores, max_cores) + + threads = tpc * 2 + if max_threads is not None: + threads = min(max_threads, threads) + log.info("Setting the number of cores to: %d", cores) - tf.config.threading.set_inter_op_parallelism_threads(tpc * 2) - tf.config.threading.set_intra_op_parallelism_threads(cores) + try: + tf.config.threading.set_inter_op_parallelism_threads(threads) + tf.config.threading.set_intra_op_parallelism_threads(cores) + except RuntimeError: + # If pdfflow is being used, TF will already be initialized by tensorflow + # maybe it would be good to drop completely pdfflow before starting the fit? (TODO ?) + log.warning( + "Could not set TF parallelism settings from n3fit, maybe has already been initialized?" + ) def clear_backend_state(): @@ -119,11 +126,10 @@ def set_initial_state(debug=False, external_seed=None, max_cores=None): # Set the number of cores depending on the user choice of max_cores # if debug mode and no number of cores set by the user, set to 1 + threads = None # auto if debug and max_cores is None: - tf.config.threading.set_inter_op_parallelism_threads(1) - tf.config.threading.set_intra_op_parallelism_threads(1) - else: - set_number_of_cores(max_cores=max_cores) + threads = 1 + set_number_of_cores(max_cores=max_cores, max_threads=threads) # Once again, if in debug mode or external_seed set, set also the TF seed if debug or external_seed: diff --git a/validphys2/src/validphys/lhapdf_compatibility.py b/validphys2/src/validphys/lhapdf_compatibility.py index fae39ec2ff..e7deae564a 100644 --- a/validphys2/src/validphys/lhapdf_compatibility.py +++ b/validphys2/src/validphys/lhapdf_compatibility.py @@ -9,6 +9,7 @@ Eventually this module will allow us to transition to an under-development python/rust PDF interpolation library. """ +from functools import cached_property import numpy as np try: @@ -19,7 +20,6 @@ import logging import lhapdf_management as lhapdf - import pdfflow log = logging.getLogger(__name__) log.warning("LHAPDF was not found, using an alternative backend") @@ -34,6 +34,9 @@ class _PDFFlowPDF: and which knows _where_ the PDF needs to be loaded from) and a single member Loading the PDF is done in a lazy manner since most of the time only a few members are needed. + + Since PDFFlow is only utilized to load the PDF for interpolation, the import is delayed until + the first call to `mkPDF`. This allows the usage of most of validphys without tensorflow. """ def __init__(self, pdf_meta, member): @@ -43,26 +46,27 @@ def __init__(self, pdf_meta, member): self._pdf_meta = pdf_meta self._m = member self._pdf = None + self._flavors = self._pdf_meta.info["Flavors"] - @property + @cached_property def pdf(self): + # Don't import PDF Flow until you really needed it + import pdfflow + if self._pdf is None: pdf_def = f"{self._pdf_meta.name}/{self._m}" self._pdf = pdfflow.mkPDF(pdf_def, self._pdf_meta.path.parent) return self._pdf - @property def flavors(self): - return self._pdf_meta.info["Flavors"] + return self._flavors def _xfxQ_all_pid(self, x, q): - if isinstance(x, float): - x = np.array([x]) - if isinstance(q, float): - q = np.array([q]) + x = np.atleast_1d(x) + q = np.atleast_1d(q) res = self.pdf.py_xfxQ2_allpid(x, q**2).numpy() - return dict(zip(self.flavors, res.T)) + return dict(zip(self._flavors, res.T)) def xfxQ(self, a, b, c=None): """Wrapper for the LHAPDF xfxQ function @@ -72,16 +76,26 @@ def xfxQ(self, a, b, c=None): or xfxQ(x, q) - And x/q/flavours can be either an scalar or an array + All of x/q/flavours can be either a scalar or an array """ if c is None: return self._xfxQ_all_pid(a, b) # PDFFlow doesn't allow to ask for flavours that do not exist + # so let us retrieve all and return 0s for non existing flavs ret_dict = self.xfxQ(b, c) zeros = np.zeros_like(b) + + if isinstance(a, int): + return ret_dict.get(a, zeros) return [ret_dict.get(i, zeros) for i in a] + def xfxQ2(self, a, b, c=None): + """Wrapper for LHAPDF xfxQ2 function, like xfxQ for Q2""" + if c is None: + return self.xfxQ(a, np.sqrt(b)) + return self.xfxQ(a, b, np.sqrt(c)) + def make_pdf(pdf_name, member=None): """Load a PDF @@ -109,5 +123,5 @@ def make_pdf(pdf_name, member=None): pdf_meta = lhapdf.load_pdf_meta(pdf_name) if member is None: - return [_PDFFlowPDF(pdf_meta, m) for m in len(pdf_meta)] + return [_PDFFlowPDF(pdf_meta, m) for m in range(len(pdf_meta))] return [_PDFFlowPDF(pdf_meta, member)] diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py index 6be9187318..9fc1804df3 100644 --- a/validphys2/src/validphys/lhio.py +++ b/validphys2/src/validphys/lhio.py @@ -8,7 +8,6 @@ import pathlib import shutil -import lhapdf import numpy as np import pandas as pd @@ -137,9 +136,7 @@ def big_matrix(gridlist): and the central value""" central_value = gridlist[0] X = pd.concat( - gridlist[1:], - axis=1, - keys=range(1, len(gridlist) + 1), # avoid confusion with rep0 + gridlist[1:], axis=1, keys=range(1, len(gridlist) + 1) # avoid confusion with rep0 ).subtract(central_value, axis=0) if np.any(X.isnull()) or X.shape[0] != len(central_value): raise ValueError("Incompatible grid specifications") @@ -148,11 +145,7 @@ def big_matrix(gridlist): def rep_matrix(gridlist): """Return a properly indexes matrix of all the members""" - X = pd.concat( - gridlist, - axis=1, - keys=range(1, len(gridlist) + 1), # avoid confusion with rep0 - ) + X = pd.concat(gridlist, axis=1, keys=range(1, len(gridlist) + 1)) # avoid confusion with rep0 if np.ravel(pd.isnull(X)).any(): raise ValueError("Found null values in grid") return X @@ -239,6 +232,7 @@ def new_pdf_from_indexes( files directly. It is slower and will call LHAPDF to fill the grids, but works for sets where the replicas have different grids. """ + import lhapdf if extra_fields is not None: raise NotImplementedError() @@ -303,7 +297,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): # preparing output folder neig = V.shape[1] - base = pathlib.Path(lhapdf.paths()[-1]) / pdf.name + base = pathlib.Path(lhaindex.get_lha_paths()[-1]) / pdf.name if set_name is None: set_name = pdf.name + "_hessian_" + str(neig) if folder is None: @@ -314,8 +308,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): if os.path.exists(set_root): shutil.rmtree(set_root) log.warning( - "Target directory for new PDF, %s, already exists. Removing contents.", - set_root, + "Target directory for new PDF, %s, already exists. Removing contents.", set_root ) os.makedirs(os.path.join(set_root)) @@ -336,10 +329,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None): yaml.dump(extra_fields, out, default_flow_style=False) _headers, grids = load_all_replicas(pdf) - result = (big_matrix(grids).dot(V)).add( - grids[0], - axis=0, - ) + result = (big_matrix(grids).dot(V)).add(grids[0], axis=0) hess_header = b"PdfType: error\nFormat: lhagrid1\n" for column in result.columns: write_replica(column + 1, set_root, hess_header, result[column]) diff --git a/validphys2/src/validphys/photon/compute.py b/validphys2/src/validphys/photon/compute.py index 4d67301908..fdc18e2f95 100644 --- a/validphys2/src/validphys/photon/compute.py +++ b/validphys2/src/validphys/photon/compute.py @@ -50,7 +50,6 @@ class Photon: """Photon class computing the photon array with the LuxQED approach.""" def __init__(self, theoryid, lux_params, replicas): - import fiatlux theory = theoryid.get_description() fiatlux_runcard = FIATLUX_DEFAULT fiatlux_runcard["qed_running"] = bool(np.isclose(theory["Qedref"], theory["Qref"]))