Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement ATLAS Z0 7TEV 46FB Dataset #2237

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/data_cc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
data_central:
- 3524.0
- 3549.0
- 3411.0
- 3423.0
- 2942.0
- 1541.0
- 135220.0
- 134740.0
- 134240.0
- 133080.0
- 132480.0
- 129060.0
- 119920.0
- 107320.0
- 89870.0
- 68800.0
- 45620.0
- 22230.0
- 1510.0
- 1458.0
- 1350.0
- 1183.0
- 770.5
- 328.7
16 changes: 16 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/data_cf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
data_central:
- 7710.0
- 17930.0
- 3.25200000e+04
- 50550.0
- 68880.0
- 86590.0
- 86210.0
- 40690.0
- 10950.0
- 300.0
- 548.0
- 925.0
- 937.0
- 437.0
- 70.4
187 changes: 187 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""
When running `python filter.py` the relevant data yaml
file will be created in the `nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB` directory.
"""

import yaml
from filter_utils import get_data_values, get_kinematics, get_systematics

from nnpdf_data.filter_utils.utils import prettify_float

yaml.add_representer(float, prettify_float)


def filter_ATLAS_Z0_7TEV_46FB_data_kinematic():
"""
This function writes the systematics to yaml files.
"""

central_values_cc, central_values_cf = get_data_values()

kin_cc, kin_cf = get_kinematics()

data_central_yaml_cc = {"data_central": central_values_cc}
data_central_yaml_cf = {"data_central": central_values_cf}

kinematics_yaml_cc = {"bins": kin_cc}
kinematics_yaml_cf = {"bins": kin_cf}

# write central values and kinematics to yaml file
with open("data_cc.yaml", "w") as file:
yaml.dump(data_central_yaml_cc, file, sort_keys=False)

with open("data_cf.yaml", "w") as file:
yaml.dump(data_central_yaml_cf, file, sort_keys=False)

with open("kinematics_cc.yaml", "w") as file:
yaml.dump(kinematics_yaml_cc, file, sort_keys=False)

with open("kinematics_cf.yaml", "w") as file:
yaml.dump(kinematics_yaml_cf, file, sort_keys=False)


def filter_ATLAS_Z0_7TEV_46FB_systematics():
"""
This function writes the systematics to a yaml file.
"""

with open("metadata.yaml", "r") as file:
metadata = yaml.safe_load(file)

systematics_cc, systematics_cf = get_systematics()

# error definition
error_definitions_cc = {}
errors_cc = []

error_definitions_cf = {}
errors_cf = []

counter = 1
counter_3 = 0
for sys in systematics_cc:

if sys[0]['name'] == 'stat':
error_definitions_cc[sys[0]['name']] = {
"description": "Uncorrelated statistical uncertainties",
"treatment": "ADD",
"type": "UNCORR",
}

elif sys[0]['name'] == 'AtlasLumi2011':
error_definitions_cc[sys[0]['name']] = {
"description": "'Sys uncertainty idx: 132'",
"treatment": "MULT",
"type": "ATLASLUMI11",
}

elif sys[0]['name'] == 'uncor' or sys[0]['name'] == 'uncor.1':
counter_3 += 1

if counter_3 == 1:
counter += 1
error_definitions_cc['sys_corr_1'] = {
"description": "Sys uncertainty idx: 1",
"treatment": "MULT",
"type": "UNCORR",
}
elif counter_3 == 2:

error_definitions_cc[sys[0]['name']] = {
"description": "Sys uncertainty idx: 133",
"treatment": "MULT",
"type": "UNCORR",
}

else:
error_definitions_cc['sys_corr_' + str(counter)] = {
"description": "Sys uncertainty idx: " + str(counter),
"treatment": "MULT",
"type": f"{sys[0]['name']}",
}
counter += 1

for i in range(metadata['implemented_observables'][0]['ndata']):
error_value_cc = {}
counter_2 = 0
for sys in systematics_cc:
if counter_2 == 0:
error_value_cc[sys[0]['name']] = float(sys[0]['values'][i])
else:
error_value_cc['sys_corr_' + str(counter_2)] = float(sys[0]['values'][i])
counter_2 += 1

errors_cc.append(error_value_cc)

uncertainties_yaml_cc = {"definitions": error_definitions_cc, "bins": errors_cc}

# write uncertainties
with open(f"uncertainties_cc.yaml", 'w') as file:
yaml.dump(uncertainties_yaml_cc, file, sort_keys=False)

counter = 1
counter_3 = 0
for sys in systematics_cf:

if sys[0]['name'] == 'stat':
error_definitions_cf[sys[0]['name']] = {
"description": "Uncorrelated statistical uncertainties",
"treatment": "ADD",
"type": "UNCORR",
}

elif sys[0]['name'] == 'AtlasLumi2011':
error_definitions_cf[sys[0]['name']] = {
"description": "'Sys uncertainty idx: 132'",
"treatment": "MULT",
"type": "ATLASLUMI11",
}

elif sys[0]['name'] == 'uncor' or sys[0]['name'] == 'uncor.1':
counter_3 += 1

if counter_3 == 1:
counter += 1
error_definitions_cf['sys_corr_1'] = {
"description": "Sys uncertainty idx: 1",
"treatment": "MULT",
"type": "UNCORR",
}
elif counter_3 == 2:

error_definitions_cf[sys[0]['name']] = {
"description": "Sys uncertainty idx: 133",
"treatment": "MULT",
"type": "UNCORR",
}

else:
error_definitions_cf['sys_corr_' + str(counter)] = {
"description": "Sys uncertainty idx: " + str(counter),
"treatment": "MULT",
"type": f"{sys[0]['name']}",
}
counter += 1

for i in range(metadata['implemented_observables'][1]['ndata']):
error_value_cf = {}
counter_2 = 0
for sys in systematics_cf:
if counter_2 == 0:
error_value_cf[sys[0]['name']] = float(sys[0]['values'][i])
else:
error_value_cf['sys_corr_' + str(counter_2)] = float(sys[0]['values'][i])
counter_2 += 1

errors_cf.append(error_value_cf)

uncertainties_yaml_cf = {"definitions": error_definitions_cf, "bins": errors_cf}

# write uncertainties
with open(f"uncertainties_cf.yaml", 'w') as file:
yaml.dump(uncertainties_yaml_cf, file, sort_keys=False)


if __name__ == "__main__":
filter_ATLAS_Z0_7TEV_46FB_data_kinematic()
filter_ATLAS_Z0_7TEV_46FB_systematics()
131 changes: 131 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_46FB/filter_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""
This module contains helper functions that are used to extract the data values
from the rawdata files.
"""

import yaml

import pandas as pd
import numpy as np


def get_data_values():
"""
returns the central data values in the form of a list.
"""

data_central_cc = []
data_central_cf = []
cc_tables = [11, 12, 13]
cf_tables = [14, 15]

for table in cc_tables:
hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)
values = input['dependent_variables'][0]['values']
for value in values:
# store data central and convert the units
data_central_cc.append(value['value'] * 1000)

for table in cf_tables:
hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)
values = input['dependent_variables'][0]['values']
for value in values:
# store data central and convert the units
data_central_cf.append(value['value'] * 1000)

return data_central_cc, data_central_cf


def get_kinematics():
"""
returns the kinematics in the form of a list of dictionaries.
"""
kin_cc = []
kin_cf = []
cc_tables = [11, 12, 13]
cf_tables = [14, 15]

# Define a mapping for table numbers to av_m_ll2 values
av_m_ll2_mapping = {11: 56**2, 12: 91**2, 13: 133**2, 14: 91**2, 15: 133**2}

for table in cc_tables:
hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
av_m_ll2 = av_m_ll2_mapping[table]
with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

for i, M in enumerate(input["independent_variables"][0]['values']):
kin_value = {
'abs_eta': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None},
'm_ll2': {'min': None, 'mid': av_m_ll2, 'max': None},
'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
}
kin_cc.append(kin_value)

for table in cf_tables:
hepdata_table = f"rawdata/HEPData-ins1502620-v1-Table_{table}.yaml"
av_m_ll2 = av_m_ll2_mapping[table]
with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

for i, M in enumerate(input["independent_variables"][0]['values']):
kin_value = {
'abs_eta': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None},
'm_ll2': {'min': None, 'mid': av_m_ll2, 'max': None},
'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
}
kin_cf.append(kin_value)

return kin_cc, kin_cf


def get_systematics_dataframe():
"""
returns the absolute systematic uncertainties in the form of a pandas dataframe.
"""
sys_rawdata_path_cc = "rawdata/zy_cc.csv"
sys_rawdata_path_cf = "rawdata/zy_cf.csv"

abs_unc_df_arr = []
data_central_cc, data_central_cf = get_data_values()

for sys_rawdata_path in [sys_rawdata_path_cc, sys_rawdata_path_cf]:
df = pd.read_csv(sys_rawdata_path)
data_central = data_central_cc if "cc" in sys_rawdata_path else data_central_cf

# convert (MULT) percentage unc to absolute unc
abs_unc_df = (df.T[2:] * data_central).T / 100
abs_unc_df_arr.append(abs_unc_df)

abs_unc_df_cc, abs_unc_df_cf = abs_unc_df_arr
return abs_unc_df_cc, abs_unc_df_cf


def get_systematics():
""" """
abs_unc_df_cc, abs_unc_df_cf = get_systematics_dataframe()

uncertainties_cc = []
uncertainties_cf = []

for i, unc_dp in enumerate(abs_unc_df_cc.values.T):
name = f"{abs_unc_df_cc.columns[i]}"
values = [unc_dp[j] for j in range(len(unc_dp))]
uncertainties_cc.append([{"name": name, "values": values}])

for i, unc_dp in enumerate(abs_unc_df_cf.values.T):
name = f"{abs_unc_df_cf.columns[i]}"
values = [unc_dp[j] for j in range(len(unc_dp))]
uncertainties_cf.append([{"name": name, "values": values}])

return uncertainties_cc, uncertainties_cf


if __name__ == "__main__":
get_data_values()
get_kinematics()
get_systematics()
Loading
Loading