diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/data_legacy_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/data_TCHANNEL-XSEC.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/data_legacy_TCHANNEL-XSEC.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/data_TCHANNEL-XSEC.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/kinematics_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/kinematics_TCHANNEL-XSEC.yaml index 3980a84f4b..d73682d60a 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/kinematics_TCHANNEL-XSEC.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/kinematics_TCHANNEL-XSEC.yaml @@ -1,13 +1,5 @@ bins: -- k1: +- m_t2: min: null - mid: 0.0 - max: null - k2: - min: null - mid: 30032.89 - max: null - k3: - min: null - mid: 13000.0 + mid: 29756.25 max: null diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/metadata.yaml index ce1d0c588f..d57dd2de55 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/metadata.yaml @@ -1,6 +1,6 @@ setname: CMS_SINGLETOP_13TEV -version: 1 -version_comment: Port of old commondata +version: 2 +version_comment: Implementation of new data nnpdf_metadata: nnpdf31_process: SINGLETOP experiment: CMS @@ -8,7 +8,7 @@ arXiv: url: https://arxiv.org/abs/1610.00678 journal: Phys. Lett. B 772 (2017) 752 iNSPIRE: - url: '' + url: https://inspirehep.net/literature/1489193 hepdata: url: '' version: -1 @@ -23,27 +23,17 @@ implemented_observables: npoints: [] ndata: 1 plotting: - kinematics_override: inc_sqrt_scale + kinematics_override: identity dataset_label: CMS single top $R_{t}$ 13 TeV plot_x: idat kinematic_coverage: - - k1 - - k2 - - k3 + - m_t2 kinematics: variables: - k1: - description: Variable k1 - label: k1 - units: '' - k2: - description: Variable k2 - label: k2 - units: '' - k3: - description: Variable k3 - label: k3 - units: '' + m_t2: + description: "top mass squared" + label: $M^2$ + units: $GeV^2$ file: kinematics_TCHANNEL-XSEC.yaml theory: conversion_factor: 1.0 @@ -51,10 +41,10 @@ implemented_observables: FK_tables: - - CMS_SINGLETOP_TCH_R_13TEV-CMS_SINGLETOP_TCH_R_13TEV_T - - CMS_SINGLETOP_TCH_R_13TEV-CMS_SINGLETOP_TCH_R_13TEV_TB - data_uncertainties: [] + data_uncertainties: [uncertainties_TCHANNEL-XSEC.yaml] variants: legacy: data_uncertainties: - - uncertainties_legacy_TCHANNEL-XSEC.yaml - data_central: data_legacy_TCHANNEL-XSEC.yaml + - uncertainties_TCHANNEL-XSEC.yaml + data_central: data_TCHANNEL-XSEC.yaml ported_from: CMS_SINGLETOP_TCH_R_13TEV diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/uncertainties_TCHANNEL-XSEC.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_13TEV/uncertainties_TCHANNEL-XSEC.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/data_legacy_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/data_TCHANNEL-XSEC.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/data_legacy_TCHANNEL-XSEC.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/data_TCHANNEL-XSEC.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/kinematics_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/kinematics_TCHANNEL-XSEC.yaml index 49b5ce05d2..d73682d60a 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/kinematics_TCHANNEL-XSEC.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/kinematics_TCHANNEL-XSEC.yaml @@ -1,13 +1,5 @@ bins: -- k1: +- m_t2: min: null - mid: 0.0 - max: null - k2: - min: null - mid: 30032.89 - max: null - k3: - min: null - mid: 7000.0 + mid: 29756.25 max: null diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/metadata.yaml index 127f47b0df..0df08ee0a1 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/metadata.yaml @@ -1,6 +1,6 @@ setname: CMS_SINGLETOP_7TEV -version: 1 -version_comment: Port of old commondata +version: 2 +version_comment: Implementation of new data nnpdf_metadata: nnpdf31_process: SINGLETOP experiment: CMS @@ -8,7 +8,7 @@ arXiv: url: https://arxiv.org/abs/1209.4533 journal: JHEP 12 (2012) 035 iNSPIRE: - url: '' + url: https://inspirehep.net/literature/1186734 hepdata: url: '' version: -1 @@ -23,27 +23,17 @@ implemented_observables: npoints: [] ndata: 1 plotting: - kinematics_override: inc_sqrt_scale + kinematics_override: identity dataset_label: CMS single top $\sigma_{t}+\sigma_{\bar{t}}$ 7 TeV plot_x: idat kinematic_coverage: - - k1 - - k2 - - k3 + - m_t2 kinematics: variables: - k1: - description: Variable k1 - label: k1 - units: '' - k2: - description: Variable k2 - label: k2 - units: '' - k3: - description: Variable k3 - label: k3 - units: '' + m_t2: + description: "top mass squared" + label: $M^2$ + units: $GeV^2$ file: kinematics_TCHANNEL-XSEC.yaml theory: conversion_factor: 1.0 @@ -51,10 +41,10 @@ implemented_observables: FK_tables: - - CMS_SINGLETOP_TCH_TOT_7TEV-CMS_SINGLETOP_TCH_R_7TEV_T - - CMS_SINGLETOP_TCH_TOT_7TEV-CMS_SINGLETOP_TCH_R_7TEV_TB - data_uncertainties: [] + data_uncertainties: [uncertainties_TCHANNEL-XSEC.yaml] variants: legacy: data_uncertainties: - - uncertainties_legacy_TCHANNEL-XSEC.yaml - data_central: data_legacy_TCHANNEL-XSEC.yaml + - uncertainties_TCHANNEL-XSEC.yaml + data_central: data_TCHANNEL-XSEC.yaml ported_from: CMS_SINGLETOP_TCH_TOT_7TEV diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/uncertainties_TCHANNEL-XSEC.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_7TEV/uncertainties_TCHANNEL-XSEC.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/data_legacy_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/data_TCHANNEL-XSEC.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/data_legacy_TCHANNEL-XSEC.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/data_TCHANNEL-XSEC.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/filter.py new file mode 100644 index 0000000000..2a1d71e761 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/filter.py @@ -0,0 +1,239 @@ +import pathlib + +import numpy as np +import pandas as pd +import yaml + +from nnpdf_data.filter_utils.utils import prettify_float + +yaml.add_representer(float, prettify_float) + +NB_POINTS = 1 +MT_VALUE = 172.5 +SQRT_S = 8_000.0 + +from nnpdf_data.filter_utils.utils import cormat_to_covmat, covmat_to_artunc +from nnpdf_data.filter_utils.utils import symmetrize_errors as se + + +def load_yaml(table_id: int, version: int = 1) -> dict: + """Load the HEP data table in yaml format. + + Parameters + ---------- + table_id: int + table ID number + + Returns + ------- + dict: + ditionary containing the table contents + + """ + filename = f"HEPData-ins1287736-v{version}-Table_{table_id}" + table = pathlib.Path(f"./rawdata/{filename}.yaml") + + return yaml.safe_load(table.read_text()) + + +def get_kinematics(hepdata: dict, bin_index: list = [], indx: int = 0) -> list: + """Read the version and list of tables from metadata. + + Parameters + ---------- + hepdata: dict + dictionary containing all data info + bin_index: list + list of Non-empty bin index + indx: int + Column index from which to read, default=0 + + Returns + ------- + kinematics: list + kinematic info + + """ + if len(hepdata["independent_variables"]) > 0: + bins = hepdata["independent_variables"][indx]["values"] + if len(bin_index) > 0: + bins = [bins[i] for i in bin_index] + else: + bins = [] + + kinematics = [] + if len(bins) > 1: # differential case + for i in bin_index: + ymin, ymax = [float(value) for value in bins[i]["value"].split('-')] + kin_value = { + "y_t": {"min": ymin, "mid": (ymin + ymax) / 2, "max": ymax}, + "m_t2": {"min": None, "mid": MT_VALUE**2, "max": None}, + "sqrts": {"min": None, "mid": SQRT_S, "max": None}, + } + kinematics.append(kin_value) + else: # inclusive case + kin_value = {"m_t2": {"min": None, "mid": MT_VALUE**2, "max": None}} + kinematics.append(kin_value) + + return kinematics + + +def get_data_values(hepdata: dict, bin_index: list, indx: int = 0) -> list: + """Extract the central values from the HepData yaml file. + + Parameters + ---------- + hepdata: dict + dictionary containing all data info + bin_index: list + Bin indices that must be parsed + indx: int + Column index from which to read the central value, default=0 + + Returns + ------- + list: + list of dictionaries whose contents are the central values + + """ + central = hepdata["dependent_variables"][indx]["values"] + return np.array([central[i]["value"] for i in bin_index]) + + +def get_errors(hepdata: dict, bin_index: list) -> dict: + """ + Extract the uncertainties from hepdata and computes the shift of the central value in case of + asymmetric uncertainties + + Parameters + ---------- + hepdata: dict + Hepdata yaml file loaded as dictionary + bin_index: list + Bin indices that must be parsed + + Returns + ------- + dict: + Dictionary containing the errors (as pandas DataFrame) and shifts of central values + """ + # parse the systematics + errors = {} + for error in hepdata["dependent_variables"][0]["values"][0]["errors"]: + errors[error["label"]] = [error["symerror"]] + + # get the description of the uncertainty from hepdata + errors = pd.DataFrame(errors, index=["bin 0"]) + + return {"errors": errors} + + +def format_uncertainties(uncs: dict) -> list: + """Format the uncertainties to be dumped into the yaml file. + + Parameters + ---------- + uncs: dict + Dictionary containing the various source of uncertainties + + Returns + ------- + list: + list of dictionaries whose elements are the various errors + + """ + + combined_errors = [] + n_bins = uncs["systematics"].index.str.startswith("bin").sum() + for i in range(n_bins): + errors = {} + if "statistics" in uncs: + errors["stat"] = float(uncs["statistics"].iloc[i, 0]) + for j, unc in enumerate(uncs["systematics"].loc[f"bin {i}"].values): + errors[f"sys_corr_{j + 1}"] = float(unc) + + combined_errors.append(errors) + + return combined_errors + + +def dump_commondata(kinematics: list, data: list, errors: dict, obs: str) -> None: + """Function that generates and writes the commondata files. + + Parameters + ---------- + kinematics: list + list containing the kinematic values + data: list + list containing the central values + errors: dict + Dictionary containing the different errors + obs: str + Name to append to the file names + """ + + if "statistics" in errors: + error_definition = { + "stat": { + "description": "Uncorrelated statistical uncertainties", + "treatment": "ADD", + "type": "UNCORR", + } + } + else: + error_definition = {} + + n_sys = errors["systematics"].shape[1] + for i in range(n_sys): + + error_definition[f"sys_corr_{i + 1}"] = { + "description": errors["systematics"].columns[i], + "treatment": errors["systematics"].loc["treatment"].iloc[i], + "type": errors["systematics"].loc["type"].iloc[i], + } + + errors_formatted = format_uncertainties(errors) + with open(f"data_{obs}.yaml", "w") as file: + yaml.dump({"data_central": data.tolist()}, file, sort_keys=False) + + with open(f"kinematics_{obs}.yaml", "w") as file: + yaml.dump({"bins": kinematics}, file, sort_keys=False) + + with open(f"uncertainties_{obs}.yaml", "w") as file: + yaml.dump( + {"definitions": error_definition, "bins": errors_formatted}, file, sort_keys=False + ) + + +def main_filter() -> None: + """ + This filter produces the commondata for the following three observables: + 1) T-Y-NORM + 2) TBAR-Y-NORM + 3) TCHANNEL-XSEC (ratio tq/tqbar) + 1) and 2) follow the same procedure. + """ + + # TCHANNEL-XSEC RATIO + yaml_content_data = load_yaml(table_id=3, version=1) + + data_central = get_data_values(yaml_content_data, bin_index=[0], indx=0) + kinematics = get_kinematics(yaml_content_data) + uncertainties = get_errors(yaml_content_data, bin_index=[0]) + + stat_unc = uncertainties["errors"][["stat"]] + sys_unc = uncertainties["errors"][["sys"]] + sys_unc.columns = ["Total systematic uncertainty"] + n_sys = sys_unc.shape[1] + sys_types = {"treatment": ["MULT"] * n_sys, "type": ["UNCORR"] * n_sys} + sys_types_df = pd.DataFrame(sys_types, index=sys_unc.columns).T + sys_unc = pd.concat([sys_types_df, sys_unc]) + + errors = {"statistics": stat_unc, "systematics": sys_unc} + dump_commondata(kinematics, data_central, errors, obs="TCHANNEL-XSEC") + + return + + +if __name__ == "__main__": + main_filter() diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/kinematics_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/kinematics_TCHANNEL-XSEC.yaml index 04c429a38a..d73682d60a 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/kinematics_TCHANNEL-XSEC.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/kinematics_TCHANNEL-XSEC.yaml @@ -1,13 +1,5 @@ bins: -- k1: +- m_t2: min: null - mid: 0.0 - max: null - k2: - min: null - mid: 30032.89 - max: null - k3: - min: null - mid: 8000.0 + mid: 29756.25 max: null diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/metadata.yaml index c371f2cbce..405b8796c9 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/metadata.yaml @@ -1,6 +1,6 @@ setname: CMS_SINGLETOP_8TEV -version: 1 -version_comment: Port of old commondata +version: 2 +version_comment: Implementation of new data nnpdf_metadata: nnpdf31_process: SINGLETOP experiment: CMS @@ -8,10 +8,10 @@ arXiv: url: https://arxiv.org/abs/1403.7366 journal: JHEP 06 (2014) 090 iNSPIRE: - url: '' + url: https://inspirehep.net/literature/1287736 hepdata: - url: '' - version: -1 + url: https://www.hepdata.net/record/ins1287736 + version: 1 implemented_observables: - observable_name: TCHANNEL-XSEC observable: @@ -19,31 +19,21 @@ implemented_observables: label: CMS single top $R_{t}$ 8 TeV units: '' process_type: INC - tables: [] + tables: [3] npoints: [] ndata: 1 plotting: - kinematics_override: inc_sqrt_scale + kinematics_override: identity dataset_label: CMS single top $R_{t}$ 8 TeV plot_x: idat kinematic_coverage: - - k1 - - k2 - - k3 + - m_t2 kinematics: variables: - k1: - description: Variable k1 - label: k1 - units: '' - k2: - description: Variable k2 - label: k2 - units: '' - k3: - description: Variable k3 - label: k3 - units: '' + m_t2: + description: "top mass squared" + label: $M^2$ + units: $GeV^2$ file: kinematics_TCHANNEL-XSEC.yaml theory: conversion_factor: 1.0 @@ -51,10 +41,10 @@ implemented_observables: FK_tables: - - CMS_SINGLETOP_TCH_R_8TEV-CMS_SINGLETOP_TCH_R_8TEV_T - - CMS_SINGLETOP_TCH_R_8TEV-CMS_SINGLETOP_TCH_R_8TEV_TB - data_uncertainties: [] + data_uncertainties: [uncertainties_TCHANNEL-XSEC.yaml] variants: legacy: data_uncertainties: - - uncertainties_legacy_TCHANNEL-XSEC.yaml - data_central: data_legacy_TCHANNEL-XSEC.yaml + - uncertainties_TCHANNEL-XSEC.yaml + data_central: data_TCHANNEL-XSEC.yaml ported_from: CMS_SINGLETOP_TCH_R_8TEV diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/rawdata/HEPData-ins1287736-v1-Table_3.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/rawdata/HEPData-ins1287736-v1-Table_3.yaml new file mode 100644 index 0000000000..1de5e463eb --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/rawdata/HEPData-ins1287736-v1-Table_3.yaml @@ -0,0 +1,14 @@ +dependent_variables: +- header: {name: R(TOP/TOPBAR) = SIG(TOP) / SIG(TOPBAR)} + qualifiers: + - {name: RE, value: P P --> TOP < (MU+ NUMU + E+ NUE) BOTTOM > + TOPBAR < (MU- NUMUBAR + + E- NUEBAR) BOTTOMBAR > X} + values: + - errors: + - {label: stat, symerror: 0.1} + - {label: sys, symerror: 0.19} + value: 1.95 +independent_variables: +- header: {name: SQRT(S), units: GEV} + values: + - {value: 8000.0} diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/uncertainties_TCHANNEL-XSEC.yaml similarity index 81% rename from nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/uncertainties_TCHANNEL-XSEC.yaml index c3b500d05b..90aaf014d2 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/uncertainties_legacy_TCHANNEL-XSEC.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_SINGLETOP_8TEV/uncertainties_TCHANNEL-XSEC.yaml @@ -4,7 +4,7 @@ definitions: treatment: ADD type: UNCORR sys_corr_1: - description: 'Sys uncertainty idx: 1' + description: Total systematic uncertainty treatment: MULT type: UNCORR bins: