From 0d2601d5980e8579e27f53b1c9432deaa50f9870 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Mon, 27 Nov 2023 10:22:23 +0100
Subject: [PATCH] rebase this branch on top of the pyproject.toml

---
 n3fit/requirements.txt                        | 23 ------------
 n3fit/runcards/examples/Basic_runcard.yml     |  4 ++-
 .../backends/keras_backend/internal_state.py  | 30 +++++++++-------
 .../src/validphys/lhapdf_compatibility.py     | 36 +++++++++++++------
 validphys2/src/validphys/lhio.py              | 22 ++++--------
 validphys2/src/validphys/photon/compute.py    |  1 -
 6 files changed, 52 insertions(+), 64 deletions(-)
 delete mode 100644 n3fit/requirements.txt

diff --git a/n3fit/requirements.txt b/n3fit/requirements.txt
deleted file mode 100644
index a1abbddde0..0000000000
--- a/n3fit/requirements.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# n3fit
-tensorflow
-psutil
-# evolven3fit
-eko
-# validphys
-pineappl
-reportengine
-validobj
-prompt_toolkit
-## hyperopt
-hyperopt
-seaborn
-
-# not available from pypi
-# lhapdf
-# instead install:
-pdfflow
-lhapdf_management
-
-# if lhapdf_management needs to be initialized:
-# LHAPDF_DATA_PATH=$(python -c 'from pathlib import Path ; from sys import prefix ; print(Path(prefix) / "share" / "LHAPDF")' ; lhapdf-management update
-
diff --git a/n3fit/runcards/examples/Basic_runcard.yml b/n3fit/runcards/examples/Basic_runcard.yml
index aaf7a313aa..ab14cdeb71 100644
--- a/n3fit/runcards/examples/Basic_runcard.yml
+++ b/n3fit/runcards/examples/Basic_runcard.yml
@@ -10,6 +10,8 @@ description: Basic runcard
 # ewk: apply ewk k-factors
 # sys: systematics treatment (see systypes)
 dataset_inputs:
+- { dataset: SLACP_dwsh, frac: 0.5}
+- { dataset: NMCPD_dw, frac: 0.5 }
 - { dataset: ATLASZPT8TEVMDIST, frac: 0.75, sys: 10, cfac: [QCD] }
 
 ############################################################
@@ -29,7 +31,7 @@ datacuts:
 
 ############################################################
 theory:
-  theoryid: 400       # database id
+  theoryid: 200       # database id
 
 sampling:
   separate_multiplicative: true
diff --git a/n3fit/src/n3fit/backends/keras_backend/internal_state.py b/n3fit/src/n3fit/backends/keras_backend/internal_state.py
index c1d1ee66d0..f946674072 100644
--- a/n3fit/src/n3fit/backends/keras_backend/internal_state.py
+++ b/n3fit/src/n3fit/backends/keras_backend/internal_state.py
@@ -27,7 +27,7 @@ def set_eager(flag=True):
     tf.config.run_functions_eagerly(flag)
 
 
-def set_number_of_cores(max_cores=None):
+def set_number_of_cores(max_cores=None, max_threads=None):
     """
     Set the maximum number of cores and threads per core to be used by TF.
     It defaults to the number of physical cores
@@ -38,11 +38,6 @@ def set_number_of_cores(max_cores=None):
         max_cores: int
             Maximum number of cores to be used
     """
-    try:
-        import lhapdf
-    except ModuleNotFoundError:
-        # If LHAPDF is not working then that means we already have initialized tensorflow at this point
-        return
     # Find how many cores we have and how many threads per core
     cores = psutil.cpu_count(logical=False)
     logical = psutil.cpu_count(logical=True)
@@ -60,9 +55,21 @@ def set_number_of_cores(max_cores=None):
     # In any case, we never want to get above the number provided by the user
     if max_cores is not None:
         cores = min(cores, max_cores)
+
+    threads = tpc * 2
+    if max_threads is not None:
+        threads = min(max_threads, threads)
+
     log.info("Setting the number of cores to: %d", cores)
-    tf.config.threading.set_inter_op_parallelism_threads(tpc * 2)
-    tf.config.threading.set_intra_op_parallelism_threads(cores)
+    try:
+        tf.config.threading.set_inter_op_parallelism_threads(threads)
+        tf.config.threading.set_intra_op_parallelism_threads(cores)
+    except RuntimeError:
+        # If pdfflow is being used, TF will already be initialized by tensorflow
+        # maybe it would be good to drop completely pdfflow before starting the fit? (TODO ?)
+        log.warning(
+            "Could not set TF parallelism settings from n3fit, maybe has already been initialized?"
+        )
 
 
 def clear_backend_state():
@@ -119,11 +126,10 @@ def set_initial_state(debug=False, external_seed=None, max_cores=None):
 
     # Set the number of cores depending on the user choice of max_cores
     # if debug mode and no number of cores set by the user, set to 1
+    threads = None  # auto
     if debug and max_cores is None:
-        tf.config.threading.set_inter_op_parallelism_threads(1)
-        tf.config.threading.set_intra_op_parallelism_threads(1)
-    else:
-        set_number_of_cores(max_cores=max_cores)
+        threads = 1
+    set_number_of_cores(max_cores=max_cores, max_threads=threads)
 
     # Once again, if in debug mode or external_seed set, set also the TF seed
     if debug or external_seed:
diff --git a/validphys2/src/validphys/lhapdf_compatibility.py b/validphys2/src/validphys/lhapdf_compatibility.py
index fae39ec2ff..e7deae564a 100644
--- a/validphys2/src/validphys/lhapdf_compatibility.py
+++ b/validphys2/src/validphys/lhapdf_compatibility.py
@@ -9,6 +9,7 @@
     Eventually this module will allow us to transition to an under-development python/rust
     PDF interpolation library.
 """
+from functools import cached_property
 import numpy as np
 
 try:
@@ -19,7 +20,6 @@
     import logging
 
     import lhapdf_management as lhapdf
-    import pdfflow
 
     log = logging.getLogger(__name__)
     log.warning("LHAPDF was not found, using an alternative backend")
@@ -34,6 +34,9 @@ class _PDFFlowPDF:
     and which knows _where_ the PDF needs to be loaded from) and a single member
 
     Loading the PDF is done in a lazy manner since most of the time only a few members are needed.
+
+    Since PDFFlow is only utilized to load the PDF for interpolation, the import is delayed until
+    the first call to `mkPDF`. This allows the usage of most of validphys without tensorflow.
     """
 
     def __init__(self, pdf_meta, member):
@@ -43,26 +46,27 @@ def __init__(self, pdf_meta, member):
         self._pdf_meta = pdf_meta
         self._m = member
         self._pdf = None
+        self._flavors = self._pdf_meta.info["Flavors"]
 
-    @property
+    @cached_property
     def pdf(self):
+        # Don't import PDF Flow until you really needed it
+        import pdfflow
+
         if self._pdf is None:
             pdf_def = f"{self._pdf_meta.name}/{self._m}"
             self._pdf = pdfflow.mkPDF(pdf_def, self._pdf_meta.path.parent)
         return self._pdf
 
-    @property
     def flavors(self):
-        return self._pdf_meta.info["Flavors"]
+        return self._flavors
 
     def _xfxQ_all_pid(self, x, q):
-        if isinstance(x, float):
-            x = np.array([x])
-        if isinstance(q, float):
-            q = np.array([q])
+        x = np.atleast_1d(x)
+        q = np.atleast_1d(q)
 
         res = self.pdf.py_xfxQ2_allpid(x, q**2).numpy()
-        return dict(zip(self.flavors, res.T))
+        return dict(zip(self._flavors, res.T))
 
     def xfxQ(self, a, b, c=None):
         """Wrapper for the LHAPDF xfxQ function
@@ -72,16 +76,26 @@ def xfxQ(self, a, b, c=None):
         or
             xfxQ(x, q)
 
-        And x/q/flavours can be either an scalar or an array
+        All of x/q/flavours can be either a scalar or an array
         """
         if c is None:
             return self._xfxQ_all_pid(a, b)
 
         # PDFFlow doesn't allow to ask for flavours that do not exist
+        # so let us retrieve all and return 0s for non existing flavs
         ret_dict = self.xfxQ(b, c)
         zeros = np.zeros_like(b)
+
+        if isinstance(a, int):
+            return ret_dict.get(a, zeros)
         return [ret_dict.get(i, zeros) for i in a]
 
+    def xfxQ2(self, a, b, c=None):
+        """Wrapper for LHAPDF xfxQ2 function, like xfxQ for Q2"""
+        if c is None:
+            return self.xfxQ(a, np.sqrt(b))
+        return self.xfxQ(a, b, np.sqrt(c))
+
 
 def make_pdf(pdf_name, member=None):
     """Load a PDF
@@ -109,5 +123,5 @@ def make_pdf(pdf_name, member=None):
 
     pdf_meta = lhapdf.load_pdf_meta(pdf_name)
     if member is None:
-        return [_PDFFlowPDF(pdf_meta, m) for m in len(pdf_meta)]
+        return [_PDFFlowPDF(pdf_meta, m) for m in range(len(pdf_meta))]
     return [_PDFFlowPDF(pdf_meta, member)]
diff --git a/validphys2/src/validphys/lhio.py b/validphys2/src/validphys/lhio.py
index 6be9187318..9fc1804df3 100644
--- a/validphys2/src/validphys/lhio.py
+++ b/validphys2/src/validphys/lhio.py
@@ -8,7 +8,6 @@
 import pathlib
 import shutil
 
-import lhapdf
 import numpy as np
 import pandas as pd
 
@@ -137,9 +136,7 @@ def big_matrix(gridlist):
     and the central value"""
     central_value = gridlist[0]
     X = pd.concat(
-        gridlist[1:],
-        axis=1,
-        keys=range(1, len(gridlist) + 1),  # avoid confusion with rep0
+        gridlist[1:], axis=1, keys=range(1, len(gridlist) + 1)  # avoid confusion with rep0
     ).subtract(central_value, axis=0)
     if np.any(X.isnull()) or X.shape[0] != len(central_value):
         raise ValueError("Incompatible grid specifications")
@@ -148,11 +145,7 @@ def big_matrix(gridlist):
 
 def rep_matrix(gridlist):
     """Return a properly indexes matrix of all the members"""
-    X = pd.concat(
-        gridlist,
-        axis=1,
-        keys=range(1, len(gridlist) + 1),  # avoid confusion with rep0
-    )
+    X = pd.concat(gridlist, axis=1, keys=range(1, len(gridlist) + 1))  # avoid confusion with rep0
     if np.ravel(pd.isnull(X)).any():
         raise ValueError("Found null values in grid")
     return X
@@ -239,6 +232,7 @@ def new_pdf_from_indexes(
         files directly. It is slower and will call LHAPDF to fill the grids,
         but works for sets where the replicas have different grids.
     """
+    import lhapdf
 
     if extra_fields is not None:
         raise NotImplementedError()
@@ -303,7 +297,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None):
     # preparing output folder
     neig = V.shape[1]
 
-    base = pathlib.Path(lhapdf.paths()[-1]) / pdf.name
+    base = pathlib.Path(lhaindex.get_lha_paths()[-1]) / pdf.name
     if set_name is None:
         set_name = pdf.name + "_hessian_" + str(neig)
     if folder is None:
@@ -314,8 +308,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None):
     if os.path.exists(set_root):
         shutil.rmtree(set_root)
         log.warning(
-            "Target directory for new PDF, %s, already exists. Removing contents.",
-            set_root,
+            "Target directory for new PDF, %s, already exists. Removing contents.", set_root
         )
     os.makedirs(os.path.join(set_root))
 
@@ -336,10 +329,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None):
             yaml.dump(extra_fields, out, default_flow_style=False)
 
     _headers, grids = load_all_replicas(pdf)
-    result = (big_matrix(grids).dot(V)).add(
-        grids[0],
-        axis=0,
-    )
+    result = (big_matrix(grids).dot(V)).add(grids[0], axis=0)
     hess_header = b"PdfType: error\nFormat: lhagrid1\n"
     for column in result.columns:
         write_replica(column + 1, set_root, hess_header, result[column])
diff --git a/validphys2/src/validphys/photon/compute.py b/validphys2/src/validphys/photon/compute.py
index 4d67301908..fdc18e2f95 100644
--- a/validphys2/src/validphys/photon/compute.py
+++ b/validphys2/src/validphys/photon/compute.py
@@ -50,7 +50,6 @@ class Photon:
     """Photon class computing the photon array with the LuxQED approach."""
 
     def __init__(self, theoryid, lux_params, replicas):
-        import fiatlux
         theory = theoryid.get_description()
         fiatlux_runcard = FIATLUX_DEFAULT
         fiatlux_runcard["qed_running"] = bool(np.isclose(theory["Qedref"], theory["Qref"]))