Skip to content

Commit

Permalink
add a test for all commondata files
Browse files Browse the repository at this point in the history
  • Loading branch information
scarlehoff committed Mar 8, 2024
1 parent d093c07 commit 8349c67
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 11 deletions.
11 changes: 8 additions & 3 deletions validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
by modifying the CommonMetaData using one of the loaded Variants one can change the resulting
:py:class:`validphys.coredata.CommonData` object.
"""

import dataclasses
from functools import cached_property, lru_cache
import logging
Expand Down Expand Up @@ -588,7 +589,6 @@ def load_kinematics(self, fill_to_three=True, drop_minmax=True):
d["mid"] = 0.5 * (d["max"] + d["min"])

if drop_minmax:
# TODO: for now we are dropping min/max information since it didn't exist in the past
d["min"] = None
d["max"] = None
else:
Expand Down Expand Up @@ -786,6 +786,11 @@ def cm_energy(self):
return None
return float(energy_string[:-3].replace("P", ".")) * factor

@cached_property
def allowed_datasets(self):
"""Return the implemented datasets as a list <setname>_<observable>"""
return [f"{self.setname}_{i.observable_name}" for i in self.implemented_observables]

@cached_property
def allowed_observables(self):
"""
Expand All @@ -809,7 +814,7 @@ def select_observable(self, obs_name_raw):


@lru_cache
def _parse_entire_set_metadata(metadata_file):
def parse_set_metadata(metadata_file):
"""Read the metadata file"""
return parse_yaml_inp(metadata_file, SetMetaData)

Expand All @@ -822,7 +827,7 @@ def parse_new_metadata(metadata_file, observable_name, variant=None):
The triplet (metadata_file, observable_name, variant) define unequivocally the information
to be parsed from the commondata library
"""
set_metadata = _parse_entire_set_metadata(metadata_file)
set_metadata = parse_set_metadata(metadata_file)

# Select one observable from the entire metadata
metadata = set_metadata.select_observable(observable_name)
Expand Down
15 changes: 14 additions & 1 deletion validphys2/src/validphys/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Resolve paths to useful objects, and query the existence of different resources
within the specified paths.
"""

import functools
from functools import cached_property
import logging
Expand All @@ -22,7 +23,7 @@
from reportengine import filefinder
from reportengine.compat import yaml
from validphys import lhaindex
from validphys.commondataparser import load_commondata_old, parse_new_metadata
from validphys.commondataparser import load_commondata_old, parse_new_metadata, parse_set_metadata
from validphys.core import (
PDF,
CommonDataSpec,
Expand Down Expand Up @@ -326,6 +327,18 @@ def available_datasets(self):
old_datasets = [i for i in legacy_to_new_mapping.keys() if not i.startswith(skip)]
return set(old_datasets)

@property
@functools.lru_cache()
def implemented_datasets(self):
"""Provide all implemented datasets that can be found in the datafiles folder
regardless of whether they can be used for fits (i.e., whether they include a theory),
are "fake" (integrability/positivity) or are missing some information.
"""
datasets = []
for metadata_file in self.commondata_folder.glob("*/metadata.yaml"):
datasets += parse_set_metadata(metadata_file).allowed_datasets
return datasets

@property
@functools.lru_cache()
def available_pdfs(self):
Expand Down
5 changes: 3 additions & 2 deletions validphys2/src/validphys/plotoptions/kintransforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ def new_labels(self, k1, k2, k3):
return k1, k2, k3

def xq2map(self, k1, k2, k3, **extra_labels):
# This is going to be a problem
return k1, k2
raise NotImplementedError(
"xq2map is not implemented for this dataset (kin_override set to identity and process_options not implemented"
)


class Kintransform(metaclass=abc.ABCMeta):
Expand Down
12 changes: 7 additions & 5 deletions validphys2/src/validphys/process_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Only variables included in the `_Vars` enum and processes included in the ``Processes`` dictionary are allowed.
"""

import dataclasses
from typing import Callable, Optional, Tuple, Union

Expand Down Expand Up @@ -66,7 +67,7 @@ class _Process:
def __hash__(self):
return hash(self.name)

def same_kin_variables(self, kin_cov):
def are_accepted_variables(self, kin_cov):
"""Check if the kinematic variables from the kinematic coverage are the same
of the accepted variables."""
# Accepting in any case the legacy variables
Expand All @@ -88,7 +89,7 @@ def xq2map(self, kin_df, metadata):

# Check if the kinematic variables defined in metadata corresponds to the
# accepted variables
if not self.same_kin_variables(metadata.kinematic_coverage):
if not self.are_accepted_variables(metadata.kinematic_coverage):
raise NotImplementedError(
f"kinematic variables are not supported for process {self.name}. You are using {metadata.kinematic_coverage}, please use {self.accepted_variables} ({metadata.name})"
)
Expand Down Expand Up @@ -159,7 +160,8 @@ def _hqp_yq_xq2map(kin_dict):

def _hqp_yqq_xq2map(kin_dict):
# Compute x, Q2
ratio = np.sqrt(kin_dict[_Vars.m_t2]) / kin_dict[_Vars.sqrts]
mass2 = _get_or_fail(kin_dict, [_Vars.m_t2, _Vars.m_ttBar])
ratio = np.sqrt(mass2) / kin_dict[_Vars.sqrts]
x1 = ratio * np.exp(kin_dict[_Vars.y_ttBar])
x2 = ratio * np.exp(-kin_dict[_Vars.y_ttBar])
q2 = kin_dict[_Vars.m_t2]
Expand Down Expand Up @@ -218,14 +220,14 @@ def _displusjet_xq2map(kin_dict):
HQP_YQQ = _Process(
"HQP_YQQ",
"Differential cross section w.r.t. absolute rapidity of ttBar",
accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts),
accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar),
xq2map_function=_hqp_yqq_xq2map,
)

HQP_PTQ = _Process(
"HQP_PTQ",
"Normalized double differential cross section w.r.t. absolute rapidity and transverse momentum of t",
accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts),
accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts, _Vars.m_t2),
xq2map_function=_hqp_ptq_xq2map,
)

Expand Down
94 changes: 94 additions & 0 deletions validphys2/src/validphys/tests/test_datafiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Test all datafiles
All tests are under ``test_all_datasets`` ran with all datasets so that one gets one failure per dataset in case of problems
"""

import pytest

from validphys.covmats import INTRA_DATASET_SYS_NAME
from validphys.loader import FallbackLoader
from validphys.plotoptions.kintransforms import identity as kintransform_identity

l = FallbackLoader()
all_datasets = sorted(l.implemented_datasets)


def _load_main_and_variants(dataset_name):
"""Given a dataset name, returns a list with the default load and all variants"""
cds = [l.check_commondata(dataset_name)]
for variant_name in cds[0].metadata.variants:
cds.append(l.check_commondata(dataset_name, variant=variant_name))
return cds


@pytest.mark.parametrize("dataset_name", all_datasets)
def test_all_datasets(dataset_name):
"""checks that a dataset can be loaded (together with its variants),
that the kinematics, uncertainties and data can be read
"""
# Load the data and all its variants
cds = _load_main_and_variants(dataset_name)
main_cd = cds[0]

# Check ndata
ndata = main_cd.ndata

# kinematics check
_ = main_cd.metadata.load_kinematics(drop_minmax=False)
kin_df = main_cd.metadata.load_kinematics()

# Check that the kinematic coverage is contained in the kinematics dataframe
kin_cov = main_cd.metadata.kinematic_coverage
assert set(kin_cov) <= set(kin_df.columns.get_level_values(0))

process_type = main_cd.metadata.process_type

# check whether the kin override is set to the identity
# and if so, check that the process_type is not simply a string
kin_override = main_cd.metadata.plotting.kinematics_override
if isinstance(kin_override, kintransform_identity) and isinstance(process_type, str):
# Skip for the time being the processes for which there is no implementation but have been
# merged to master: issue #1991
if process_type not in ("HQP_MQQ", "INC"):
raise NotImplementedError(f"The {process_type=} is not implemented in process_options")

elif not isinstance(process_type, str):
if not process_type.are_accepted_variables(kin_cov):
raise ValueError(
f"The dataset {dataset_name} uses {kin_cov} while accepted variables for {process_type} are {process_type.accepted_variables}"
)

# load the central data for every variant
all_dc = [cd.metadata.load_data_central() for cd in cds]
# and check they have the same lenght (it should've been tested internally already)
assert all(len(i) == ndata for i in all_dc)

# check the uncertainties can be loaded
# note that due to legacy data there might be datasets without data_uncertainties
# but that would only happen for non-variant (or member 0 of the list)
all_unc = [cd.metadata.load_uncertainties() for cd in cds[1:]]
if main_cd.metadata.data_uncertainties:
all_unc.insert(0, main_cd.metadata.load_uncertainties())

for unc in all_unc:
# Check that, if present, the special `stat` key is ADD and UNCORR
if "stat" in unc:
stat = unc["stat"]
assert stat.columns[0][0] == "ADD"
assert stat.columns[0][1] == "UNCORR"

intra_dataset = unc.columns.get_level_values("type").isin(
list(INTRA_DATASET_SYS_NAME) + ["SKIP", "SCALEVAR"]
)

# Check that inter dataset correlations are unique
inter_dataset_corrs = unc.loc[:, ~intra_dataset]
inter_types = inter_dataset_corrs.columns.get_level_values("type")
if not inter_types.is_unique:
raise ValueError(
f"The inter-dataset uncertainties for {dataset_name} are not unique: {inter_types.value_counts()}"
)

# Check that all treatments are either MULT or ADD
assert set(unc.columns.get_level_values("treatment").unique()) <= {"MULT", "ADD"}

0 comments on commit 8349c67

Please sign in to comment.