NNPDF · ecole41 · Dec 3, 2024 · Dec 4, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/data_cc.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/data_cc.yaml
@@ -0,0 +1,25 @@
+data_central:
+- 3524.0
+- 3549.0
+- 3411.0
+- 3423.0
+- 2942.0
+- 1541.0
+- 135220.0
+- 134740.0
+- 134240.0
+- 133080.0
+- 132480.0
+- 129060.0
+- 119920.0
+- 107320.0
+- 89870.0
+- 68800.0
+- 45620.0
+- 22230.0
+- 1510.0
+- 1458.0
+- 1350.0
+- 1183.0
+- 770.5
+- 328.7
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/data_cf.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/data_cf.yaml
@@ -0,0 +1,16 @@
+data_central:
+- 7710.0
+- 17930.0
+- 3.25200000e+04
+- 50550.0
+- 68880.0
+- 86590.0
+- 86210.0
+- 40690.0
+- 10950.0
+- 300.0
+- 548.0
+- 925.0
+- 937.0
+- 437.0
+- 70.4
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/filter.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/filter.py
@@ -0,0 +1,187 @@
+"""
+When running `python filter.py` the relevant data yaml
+file will be created in the `nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB` directory.
+"""
+
+import yaml
+from filter_utils import get_data_values, get_kinematics, get_systematics
+
+from nnpdf_data.filter_utils.utils import prettify_float
+
+yaml.add_representer(float, prettify_float)
+
+
+def filter_ATLAS_Z0_7TEV_46FB_data_kinematic():
+    """
+    This function writes the systematics to yaml files.
+    """
+
+    central_values_cc, central_values_cf = get_data_values()
+
+    kin_cc, kin_cf = get_kinematics()
+
+    data_central_yaml_cc = {"data_central": central_values_cc}
+    data_central_yaml_cf = {"data_central": central_values_cf}
+
+    kinematics_yaml_cc = {"bins": kin_cc}
+    kinematics_yaml_cf = {"bins": kin_cf}
+
+    # write central values and kinematics to yaml file
+    with open("data_cc.yaml", "w") as file:
+        yaml.dump(data_central_yaml_cc, file, sort_keys=False)
+
+    with open("data_cf.yaml", "w") as file:
+        yaml.dump(data_central_yaml_cf, file, sort_keys=False)
+
+    with open("kinematics_cc.yaml", "w") as file:
+        yaml.dump(kinematics_yaml_cc, file, sort_keys=False)
+
+    with open("kinematics_cf.yaml", "w") as file:
+        yaml.dump(kinematics_yaml_cf, file, sort_keys=False)
+
+
+def filter_ATLAS_Z0_7TEV_46FB_systematics():
+    """
+    This function writes the systematics to a yaml file.
+    """
+
+    with open("metadata.yaml", "r") as file:
+        metadata = yaml.safe_load(file)
+
+    systematics_cc, systematics_cf = get_systematics()
+
+    # error definition
+    error_definitions_cc = {}
+    errors_cc = []
+
+    error_definitions_cf = {}
+    errors_cf = []
+
+    counter = 1
+    counter_3 = 0
+    for sys in systematics_cc:
+
+        if sys[0]['name'] == 'stat':
+            error_definitions_cc[sys[0]['name']] = {
+                "description": "Uncorrelated statistical uncertainties",
+                "treatment": "ADD",
+                "type": "UNCORR",
+            }
+
+        elif sys[0]['name'] == 'AtlasLumi2011':
+            error_definitions_cc[sys[0]['name']] = {
+                "description": "'Sys uncertainty idx: 132'",
+                "treatment": "MULT",
+                "type": "ATLASLUMI11",
+            }
+
+        elif sys[0]['name'] == 'uncor' or sys[0]['name'] == 'uncor.1':
+            counter_3 += 1
+
+            if counter_3 == 1:
+                counter += 1
+                error_definitions_cc['sys_corr_1'] = {
+                    "description": "Sys uncertainty idx: 1",
+                    "treatment": "MULT",
+                    "type": "UNCORR",
+                }
+            elif counter_3 == 2:
+
+                error_definitions_cc[sys[0]['name']] = {
+                    "description": "Sys uncertainty idx: 133",
+                    "treatment": "MULT",
+                    "type": "UNCORR",
+                }
+
+        else:
+            error_definitions_cc['sys_corr_' + str(counter)] = {
+                "description": "Sys uncertainty idx: " + str(counter),
+                "treatment": "MULT",
+                "type": f"{sys[0]['name']}",
+            }
+            counter += 1
+
+    for i in range(metadata['implemented_observables'][0]['ndata']):
+        error_value_cc = {}
+        counter_2 = 0
+        for sys in systematics_cc:
+            if counter_2 == 0:
+                error_value_cc[sys[0]['name']] = float(sys[0]['values'][i])
+            else:
+                error_value_cc['sys_corr_' + str(counter_2)] = float(sys[0]['values'][i])
+            counter_2 += 1
+
+        errors_cc.append(error_value_cc)
+
+    uncertainties_yaml_cc = {"definitions": error_definitions_cc, "bins": errors_cc}
+
+    # write uncertainties
+    with open(f"uncertainties_cc.yaml", 'w') as file:
+        yaml.dump(uncertainties_yaml_cc, file, sort_keys=False)
+
+    counter = 1
+    counter_3 = 0
+    for sys in systematics_cf:
+
+        if sys[0]['name'] == 'stat':
+            error_definitions_cf[sys[0]['name']] = {
+                "description": "Uncorrelated statistical uncertainties",
+                "treatment": "ADD",
+                "type": "UNCORR",
+            }
+
+        elif sys[0]['name'] == 'AtlasLumi2011':
+            error_definitions_cf[sys[0]['name']] = {
+                "description": "'Sys uncertainty idx: 132'",
+                "treatment": "MULT",
+                "type": "ATLASLUMI11",
+            }
+
+        elif sys[0]['name'] == 'uncor' or sys[0]['name'] == 'uncor.1':
+            counter_3 += 1
+
+            if counter_3 == 1:
+                counter += 1
+                error_definitions_cf['sys_corr_1'] = {
+                    "description": "Sys uncertainty idx: 1",
+                    "treatment": "MULT",
+                    "type": "UNCORR",
+                }
+            elif counter_3 == 2:
+
+                error_definitions_cf[sys[0]['name']] = {
+                    "description": "Sys uncertainty idx: 133",
+                    "treatment": "MULT",
+                    "type": "UNCORR",
+                }
+
+        else:
+            error_definitions_cf['sys_corr_' + str(counter)] = {
+                "description": "Sys uncertainty idx: " + str(counter),
+                "treatment": "MULT",
+                "type": f"{sys[0]['name']}",
+            }
+            counter += 1
+
+    for i in range(metadata['implemented_observables'][1]['ndata']):
+        error_value_cf = {}
+        counter_2 = 0
+        for sys in systematics_cf:
+            if counter_2 == 0:
+                error_value_cf[sys[0]['name']] = float(sys[0]['values'][i])
+            else:
+                error_value_cf['sys_corr_' + str(counter_2)] = float(sys[0]['values'][i])
+            counter_2 += 1
+
+        errors_cf.append(error_value_cf)
+
+    uncertainties_yaml_cf = {"definitions": error_definitions_cf, "bins": errors_cf}
+
+    # write uncertainties
+    with open(f"uncertainties_cf.yaml", 'w') as file:
+        yaml.dump(uncertainties_yaml_cf, file, sort_keys=False)
+
+
+if __name__ == "__main__":
+    filter_ATLAS_Z0_7TEV_46FB_data_kinematic()
+    filter_ATLAS_Z0_7TEV_46FB_systematics()
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/filter_utils.py
@@ -0,0 +1,131 @@
+"""
+This module contains helper functions that are used to extract the data values 
+from the rawdata files.
+"""
+
+import yaml
+
+import pandas as pd
+import numpy as np
+
+
+def get_data_values():
+    """
+    returns the central data values in the form of a list.
+    """
+
+    data_central_cc = []
+    data_central_cf = []
+    cc_tables = [11, 12, 13]
+    cf_tables = [14, 15]
+
+    for table in cc_tables:
+        hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
+        with open(hepdata_table, 'r') as file:
+            input = yaml.safe_load(file)
+        values = input['dependent_variables'][0]['values']
+        for value in values:
+            # store data central and convert the units
+            data_central_cc.append(value['value'] * 1000)
+
+    for table in cf_tables:
+        hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
+        with open(hepdata_table, 'r') as file:
+            input = yaml.safe_load(file)
+        values = input['dependent_variables'][0]['values']
+        for value in values:
+            # store data central and convert the units
+            data_central_cf.append(value['value'] * 1000)
+
+    return data_central_cc, data_central_cf
+
+
+def get_kinematics():
+    """
+    returns the kinematics in the form of a list of dictionaries.
+    """
+    kin_cc = []
+    kin_cf = []
+    cc_tables = [11, 12, 13]
+    cf_tables = [14, 15]
+
+    # Define a mapping for table numbers to av_m_ll2 values
+    av_m_ll2_mapping = {11: 56**2, 12: 91**2, 13: 133**2, 14: 91**2, 15: 133**2}
+
+    for table in cc_tables:
+        hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
+        av_m_ll2 = av_m_ll2_mapping[table]
+        with open(hepdata_table, 'r') as file:
+            input = yaml.safe_load(file)
+
+        for i, M in enumerate(input["independent_variables"][0]['values']):
+            kin_value = {
+                'abs_eta': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None},
+                'm_ll2': {'min': None, 'mid': av_m_ll2, 'max': None},
+                'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
+            }
+            kin_cc.append(kin_value)
+
+    for table in cf_tables:
+        hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
+        av_m_ll2 = av_m_ll2_mapping[table]
+        with open(hepdata_table, 'r') as file:
+            input = yaml.safe_load(file)
+
+        for i, M in enumerate(input["independent_variables"][0]['values']):
+            kin_value = {
+                'abs_eta': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None},
+                'm_ll2': {'min': None, 'mid': av_m_ll2, 'max': None},
+                'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
+            }
+            kin_cf.append(kin_value)
+
+    return kin_cc, kin_cf
+
+
+def get_systematics_dataframe():
+    """
+    returns the absolute systematic uncertainties in the form of a pandas dataframe.
+    """
+    sys_rawdata_path_cc = "rawdata/zy_cc.csv"
+    sys_rawdata_path_cf = "rawdata/zy_cf.csv"
+
+    abs_unc_df_arr = []
+    data_central_cc, data_central_cf = get_data_values()
+
+    for sys_rawdata_path in [sys_rawdata_path_cc, sys_rawdata_path_cf]:
+        df = pd.read_csv(sys_rawdata_path)
+        data_central = data_central_cc if "cc" in sys_rawdata_path else data_central_cf
+
+        # convert (MULT) percentage unc to absolute unc
+        abs_unc_df = (df.T[2:] * data_central).T / 100
+        abs_unc_df_arr.append(abs_unc_df)
+
+    abs_unc_df_cc, abs_unc_df_cf = abs_unc_df_arr
+    return abs_unc_df_cc, abs_unc_df_cf
+
+
+def get_systematics():
+    """ """
+    abs_unc_df_cc, abs_unc_df_cf = get_systematics_dataframe()
+
+    uncertainties_cc = []
+    uncertainties_cf = []
+
+    for i, unc_dp in enumerate(abs_unc_df_cc.values.T):
+        name = f"{abs_unc_df_cc.columns[i]}"
+        values = [unc_dp[j] for j in range(len(unc_dp))]
+        uncertainties_cc.append([{"name": name, "values": values}])
+
+    for i, unc_dp in enumerate(abs_unc_df_cf.values.T):
+        name = f"{abs_unc_df_cf.columns[i]}"
+        values = [unc_dp[j] for j in range(len(unc_dp))]
+        uncertainties_cf.append([{"name": name, "values": values}])
+
+    return uncertainties_cc, uncertainties_cf
+
+
+if __name__ == "__main__":
+    get_data_values()
+    get_kinematics()
+    get_systematics()