Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an lhapdf_compatibility module for LHAPDF #1799

Merged
merged 6 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions n3fit/src/n3fit/backends/keras_backend/internal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,22 @@
Library of functions that modify the internal state of Keras/Tensorflow
"""
import os

import psutil

# Despite the current default being tf-eigen, the option below seems to have a positive impact
os.environ.setdefault("KMP_BLOCKTIME", "0")

# Reduce tensorflow verbosity
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "1")
import random as rn
import logging
import random as rn

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K


log = logging.getLogger(__name__)


Expand All @@ -28,7 +29,7 @@ def set_eager(flag=True):
tf.config.run_functions_eagerly(flag)


def set_number_of_cores(max_cores=None):
def set_number_of_cores(max_cores=None, max_threads=None):
"""
Set the maximum number of cores and threads per core to be used by TF.
It defaults to the number of physical cores
Expand Down Expand Up @@ -56,9 +57,21 @@ def set_number_of_cores(max_cores=None):
# In any case, we never want to get above the number provided by the user
if max_cores is not None:
cores = min(cores, max_cores)

threads = tpc * 2
if max_threads is not None:
threads = min(max_threads, threads)

log.info("Setting the number of cores to: %d", cores)
tf.config.threading.set_inter_op_parallelism_threads(tpc * 2)
tf.config.threading.set_intra_op_parallelism_threads(cores)
try:
tf.config.threading.set_inter_op_parallelism_threads(threads)
tf.config.threading.set_intra_op_parallelism_threads(cores)
except RuntimeError:
# If pdfflow is being used, tensorflow will already be initialized by pdfflow
# maybe it would be good to drop completely pdfflow before starting the fit? (TODO ?)
log.warning(
"Could not set tensorflow parallelism settings from n3fit, maybe has already been initialized?"
)


def clear_backend_state():
Expand Down Expand Up @@ -115,13 +128,12 @@ def set_initial_state(debug=False, external_seed=None, max_cores=None):

# Set the number of cores depending on the user choice of max_cores
# if debug mode and no number of cores set by the user, set to 1
threads = None # auto
if debug and max_cores is None:
keras.utils.set_random_seed(7331)
threads = 1
tf.config.experimental.enable_op_determinism()
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)
else:
set_number_of_cores(max_cores=max_cores)
set_number_of_cores(max_cores=max_cores, max_threads=threads)

# Once again, if in debug mode or external_seed set, set also the TF seed
if debug or external_seed:
Expand Down
3 changes: 1 addition & 2 deletions validphys2/src/validphys/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
import os
import sys

import lhapdf

from reportengine import app
from validphys import mplstyles, uploadutils
from validphys.config import Config, Environment
from validphys.lhapdf_compatibility import lhapdf

providers = [
"validphys.results",
Expand Down
3 changes: 1 addition & 2 deletions validphys2/src/validphys/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import platform
import tempfile

import lhapdf
from matplotlib import scale as mscale

from reportengine.checks import CheckError, check, make_argcheck, make_check
Expand Down Expand Up @@ -71,7 +70,7 @@ def check_can_save_grid(ns, **kwags):
if not ns['installgrid']:
return

write_path = lhapdf.paths()[-1]
write_path = lhaindex.get_lha_datapaths()
try:
tempfile.TemporaryFile(dir=write_path)
except OSError as e:
Expand Down
19 changes: 7 additions & 12 deletions validphys2/src/validphys/lhaindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
import os.path as osp
import re

import lhapdf

from reportengine.compat import yaml
from validphys.lhapdf_compatibility import lhapdf

_indexes_to_names = None
_names_to_indexes = None
Expand All @@ -25,7 +24,7 @@ def expand_index_names(globstr):


def expand_local_names(globstr):
paths = get_lha_paths()
paths = lhapdf.paths()
return [
name
for path in paths
Expand All @@ -51,7 +50,7 @@ def get_indexes_to_names():


def finddir(name):
for path in get_lha_paths():
for path in lhapdf.paths():
d = osp.join(path, name)
if osp.isdir(d):
return d
Expand All @@ -60,7 +59,7 @@ def finddir(name):

def isinstalled(name):
"""Check that name exists in LHAPDF dir"""
return name and any(osp.isdir(osp.join(path, name)) for path in get_lha_paths())
return name and any(osp.isdir(osp.join(path, name)) for path in lhapdf.paths())


def get_names_to_indexes():
Expand Down Expand Up @@ -88,7 +87,7 @@ def get_pdf_name(index):

def parse_index(index_file):
d = {}
name_re = '(\d+)\s+(\S+)'
name_re = r'(\d+)\s+(\S+)'
with open(index_file) as localfile:
for line in localfile.readlines():
m = re.match(name_re, line)
Expand Down Expand Up @@ -116,7 +115,7 @@ def as_from_name(name):


def infofilename(name):
for path in get_lha_paths():
for path in lhapdf.paths():
info = osp.join(path, name, name + '.info')
if osp.exists(info):
return info
Expand All @@ -130,12 +129,8 @@ def parse_info(name):
return result


def get_lha_paths():
return lhapdf.paths()


def get_lha_datapath():
return get_lha_paths()[-1]
return lhapdf.paths()[-1]


def get_index_path(folder=None):
Expand Down
125 changes: 125 additions & 0 deletions validphys2/src/validphys/lhapdf_compatibility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""
Module for LHAPDF compatibility backends

If LHAPDF is installed, the module will transparently hand over everything to LHAPDF
if LHAPDF is not available, it will try to use a combination of the packages
`lhapdf-management` and `pdfflow`
which cover all the features of LHAPDF used during the fit (and likely most of validphys)
"""
from functools import cached_property

import numpy as np

try:
import lhapdf

USING_LHAPDF = True
except ModuleNotFoundError:
import logging

import lhapdf_management as lhapdf

log = logging.getLogger(__name__)
log.warning("LHAPDF was not found, using an alternative backend")

USING_LHAPDF = False


class _PDFFlowPDF:
"""Wrapper around the PDFFlow PDF so that it can be used as an LHAPDF
set by validphys
Takes as input a pdf_meta object (which is a PDFset from lhapdf_management
and which knows where the PDF needs to be loaded from) and a single member

Loading the PDF is done in a lazy manner since most of the time only a few members are needed.

Since PDFFlow is only utilized to load the PDF for interpolation, the import is delayed until
the first call to `mkPDF`. This allows the usage of most of validphys without tensorflow.
"""

def __init__(self, pdf_meta, member):
if USING_LHAPDF:
raise ValueError("PDFFlow should not be instantiated when using LHAPDF")

self._pdf_meta = pdf_meta
self._m = member
self._pdf = None
self._flavors = self._pdf_meta.info["Flavors"]

@cached_property
def pdf(self):
# Don't import PDF Flow until you really needed it
import pdfflow

if self._pdf is None:
pdf_def = f"{self._pdf_meta.name}/{self._m}"
self._pdf = pdfflow.mkPDF(pdf_def, self._pdf_meta.path.parent)
return self._pdf

def flavors(self):
return self._flavors

def _xfxQ_all_pid(self, x, q):
x = np.atleast_1d(x)
q = np.atleast_1d(q)

res = self.pdf.py_xfxQ2_allpid(x, q**2).numpy()
return dict(zip(self._flavors, res.T))

def xfxQ(self, a, b, c=None):
"""Wrapper for the LHAPDF xfxQ function
This is an overloaded function in LHAPDF so depending
on the number of arguments we will do:
xfxQ(flavours, x, Q)
or
xfxQ(x, q)
RoyStegeman marked this conversation as resolved.
Show resolved Hide resolved

All of x/q/flavours can be either a scalar or an array
"""
if c is None:
return self._xfxQ_all_pid(a, b)

# PDFFlow doesn't allow to ask for flavours that do not exist
# so let us retrieve all and return 0s for non existing flavs
ret_dict = self.xfxQ(b, c)
zeros = np.zeros_like(b)

if isinstance(a, int):
return ret_dict.get(a, zeros)
return [ret_dict.get(i, zeros) for i in a]

def xfxQ2(self, a, b, c=None):
"""Wrapper for LHAPDF xfxQ2 function, like xfxQ for Q2"""
if c is None:
return self.xfxQ(a, np.sqrt(b))
return self.xfxQ(a, b, np.sqrt(c))


def make_pdf(pdf_name, member=None):
"""Load a PDF
if member is given, load the single member otherwise, load the entire set as a list

if LHAPDF is provided, it returns LHAPDF PDF instances
otherwise it returns and object which is _compatible_ with LHAPDF
for lhapdf functions for the selected backend

Parameters:
----------
pdf_name: str
name of the PDF to load
member: int
index of the member of the PDF to load

Returns:
-------
list(pdf_sets)
"""
if USING_LHAPDF:
if member is None:
return lhapdf.mkPDFs(pdf_name)
return [lhapdf.mkPDF(pdf_name, member)]

pdf_meta = lhapdf.load_pdf_meta(pdf_name)
if member is None:
return [_PDFFlowPDF(pdf_meta, m) for m in range(len(pdf_meta))]
return [_PDFFlowPDF(pdf_meta, member)]
7 changes: 4 additions & 3 deletions validphys2/src/validphys/lhapdfset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
"""
import logging

import lhapdf
import numpy as np

from validphys.lhapdf_compatibility import make_pdf

log = logging.getLogger(__name__)


Expand All @@ -46,9 +47,9 @@ def __init__(self, name, error_type):
self._error_type = error_type
if self.is_t0:
# If at this point we already know this is a T0 set, load only the CV
self._lhapdf_set = [lhapdf.mkPDF(name)]
self._lhapdf_set = make_pdf(name, 0)
else:
self._lhapdf_set = lhapdf.mkPDFs(name)
self._lhapdf_set = make_pdf(name)
self._flavors = None

@property
Expand Down
22 changes: 6 additions & 16 deletions validphys2/src/validphys/lhio.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import pathlib
import shutil

import lhapdf
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -137,9 +136,7 @@ def big_matrix(gridlist):
and the central value"""
central_value = gridlist[0]
X = pd.concat(
gridlist[1:],
axis=1,
keys=range(1, len(gridlist) + 1), # avoid confusion with rep0
gridlist[1:], axis=1, keys=range(1, len(gridlist) + 1) # avoid confusion with rep0
).subtract(central_value, axis=0)
if np.any(X.isnull()) or X.shape[0] != len(central_value):
raise ValueError("Incompatible grid specifications")
Expand All @@ -148,11 +145,7 @@ def big_matrix(gridlist):

def rep_matrix(gridlist):
"""Return a properly indexes matrix of all the members"""
X = pd.concat(
gridlist,
axis=1,
keys=range(1, len(gridlist) + 1), # avoid confusion with rep0
)
X = pd.concat(gridlist, axis=1, keys=range(1, len(gridlist) + 1)) # avoid confusion with rep0
if np.ravel(pd.isnull(X)).any():
raise ValueError("Found null values in grid")
return X
Expand Down Expand Up @@ -239,6 +232,7 @@ def new_pdf_from_indexes(
files directly. It is slower and will call LHAPDF to fill the grids,
but works for sets where the replicas have different grids.
"""
import lhapdf
RoyStegeman marked this conversation as resolved.
Show resolved Hide resolved

if extra_fields is not None:
raise NotImplementedError()
Expand Down Expand Up @@ -303,7 +297,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None):
# preparing output folder
neig = V.shape[1]

base = pathlib.Path(lhapdf.paths()[-1]) / pdf.name
base = pathlib.Path(lhaindex.get_lha_datapath()) / pdf.name
if set_name is None:
set_name = pdf.name + "_hessian_" + str(neig)
if folder is None:
Expand All @@ -314,8 +308,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None):
if os.path.exists(set_root):
shutil.rmtree(set_root)
log.warning(
"Target directory for new PDF, %s, already exists. Removing contents.",
set_root,
"Target directory for new PDF, %s, already exists. Removing contents.", set_root
)
os.makedirs(os.path.join(set_root))

Expand All @@ -336,10 +329,7 @@ def hessian_from_lincomb(pdf, V, set_name=None, folder=None, extra_fields=None):
yaml.dump(extra_fields, out, default_flow_style=False)

_headers, grids = load_all_replicas(pdf)
result = (big_matrix(grids).dot(V)).add(
grids[0],
axis=0,
)
result = (big_matrix(grids).dot(V)).add(grids[0], axis=0)
hess_header = b"PdfType: error\nFormat: lhagrid1\n"
for column in result.columns:
write_replica(column + 1, set_root, hess_header, result[column])
Expand Down
Loading