Skip to content

Commit

Permalink
added athena utils file and removed repeated code in filter files
Browse files Browse the repository at this point in the history
  • Loading branch information
comane committed Apr 8, 2024
1 parent 858d246 commit b404044
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 639 deletions.
107 changes: 107 additions & 0 deletions nnpdf_data/nnpdf_data/athena_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import pandas as pd
from pathlib import Path
from typing import Optional, Union

import yaml
import numpy as np


def read_excel(path_xlsx: Path, beams: tuple) -> pd.DataFrame:
"""Parse the xlsx file containing all the information regarding
Expand All @@ -22,3 +27,105 @@ def read_excel(path_xlsx: Path, beams: tuple) -> pd.DataFrame:
el, ep = beams
df_beam = xdf[(xdf["El"] == el) & (xdf["Eh"] == ep)]
return df_beam[df_beam["y"] <= 1]


def fluctuate_data(central: np.ndarray, abserr: np.ndarray) -> np.ndarray:
"""Fluctuate the central values according to the uncertainties.
Parameters
----------
central : np.ndarray
array of central values
abserr : np.ndarray
array containing the values of the errors
Returns
-------
np.ndarray
fluctuated central values according to a normal distribution
"""
shifted_cv = [np.random.normal(c, e) for c, e in zip(central, abserr)]
return np.array(shifted_cv)


def write_data(
df: pd.DataFrame, abserr: Optional[Union[np.ndarray, None]] = None, add_fluctuate: bool = False
) -> None:
"""Write the input kinematics, central values, and uncertainties
into the new commondata format.
Parameters
----------
df : pd.DataFrame
a pandas table containing the information required to generate
the commondata
abserr: Optional[Union[np.ndarray, None]]
if not None contains the fluctuated centra values
add_fluctuate: bool
whether or not to fluctuate the central values
"""
# -----------------------------------------------------------------
# Dump the Central values
if not add_fluctuate:
data_central = [None for _ in range(len(df))]
else:
data_central = abserr.tolist()
print(f"The dataset has {len(data_central)} datapoints!")

data_central_yaml = {"data_central": data_central}
with open("data.yaml", "w") as file:
yaml.dump(data_central_yaml, file, sort_keys=False)

# -----------------------------------------------------------------
# Dump the kinematics
kins = [
{
"x": {"min": None, "mid": float(d["x"]), "max": None},
"Q2": {"min": None, "mid": float(d["Q2"]), "max": None},
"y": {"min": None, "mid": float(d["y"]), "max": None},
}
for _, d in df.iterrows()
]

kinematics_yaml = {"bins": kins}
with open("kinematics.yaml", "w") as file:
yaml.dump(kinematics_yaml, file, sort_keys=False)

# -----------------------------------------------------------------
# Dump the uncertainty values
errors = []
for idx, (_, d) in enumerate(df.iterrows()):
if not add_fluctuate:
errors.append({"stat": None, "sys": None, "shift_lumi": None, "norm": None})
else:
errors.append(
{
"stat": float(data_central[idx] * d["unpol_stat_percent"] * 1e-2),
"sys": float(data_central[idx] * d["ptpt_percent"] * 1e-2),
"shift_lumi": float(d["shift_uncer"]),
"norm": float(data_central[idx] * d["norm_percent"] * 1e-2),
}
)

error_definition = {
"stat": {"description": "statistical uncertainty", "treatment": "ADD", "type": "UNCORR"},
"sys": {
"description": "systematic uncertainty",
"treatment": "MULT", # TODO: to check
"type": "UNCORR",
},
"shift_lumi": {
"description": "uncertainty on the precision of the relative luminosity",
"treatment": "ADD",
"type": "UNCORR", # TODO: to check
},
"norm": {
"description": "relative (percent) normalization uncertainty (beam pol)",
"treatment": "MULT", # TODO: to check
"type": "CORR", # TODO: to check
},
}

uncertainties_yaml = {"definitions": error_definition, "bins": errors}
with open("uncertainties.yaml", "w") as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)
129 changes: 2 additions & 127 deletions nnpdf_data/nnpdf_data/new_commondata/ATHENA_NC_105GEV_EP/filter.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,11 @@
from pathlib import Path
from typing import Optional, Union

import numpy as np
import pandas as pd
import yaml

np.random.seed(1234567890)


def read_excel(path_xlsx: Path, beams: tuple) -> pd.DataFrame:
"""Parse the xlsx file containing all the information regarding
the projections and returns the ones corresponding to the chosen
beam energies.
from nnpdf_data.athena_utils import read_excel, fluctuate_data, write_data

Parameters
----------
path_xlsx : Path
path to the xlsx file
beams: tuple
tuple specifying the beam energies of the lepton & proton
Returns
-------
pd.DataFrame
returns a panda table corresponding to the chosen beams
"""
xdf = pd.read_excel(path_xlsx)
el, ep = beams
df_beam = xdf[(xdf["El"] == el) & (xdf["Eh"] == ep)]
return df_beam[df_beam["y"] <= 1]
np.random.seed(1234567890)


def read_cvs() -> np.ndarray:
Expand All @@ -37,108 +14,6 @@ def read_cvs() -> np.ndarray:
return np.array(cv_yaml["predictions_central"])


def fluctuate_data(central: np.ndarray, abserr: np.ndarray) -> np.ndarray:
"""Fluctuate the central values according to the uncertainties.
Parameters
----------
central : np.ndarray
array of central values
abserr : np.ndarray
array containing the values of the errors
Returns
-------
np.ndarray
fluctuated central values according to a normal distribution
"""
shifted_cv = [np.random.normal(c, e) for c, e in zip(central, abserr)]
return np.array(shifted_cv)


def write_data(
df: pd.DataFrame, abserr: Optional[Union[np.ndarray, None]] = None, add_fluctuate: bool = False
) -> None:
"""Write the input kinematics, central values, and uncertainties
into the new commondata format.
Parameters
----------
df : pd.DataFrame
a pandas table containing the information required to generate
the commondata
abserr: Optional[Union[np.ndarray, None]]
if not None contains the fluctuated centra values
add_fluctuate: bool
whether or not to fluctuate the central values
"""
# -----------------------------------------------------------------
# Dump the Central values
if not add_fluctuate:
data_central = [None for _ in range(len(df))]
else:
data_central = abserr.tolist()
print(f"The dataset has {len(data_central)} datapoints!")

data_central_yaml = {"data_central": data_central}
with open("data.yaml", "w") as file:
yaml.dump(data_central_yaml, file, sort_keys=False)

# -----------------------------------------------------------------
# Dump the kinematics
kins = [
{
"x": {"min": None, "mid": float(d["x"]), "max": None},
"Q2": {"min": None, "mid": float(d["Q2"]), "max": None},
"y": {"min": None, "mid": float(d["y"]), "max": None},
}
for _, d in df.iterrows()
]

kinematics_yaml = {"bins": kins}
with open("kinematics.yaml", "w") as file:
yaml.dump(kinematics_yaml, file, sort_keys=False)

# -----------------------------------------------------------------
# Dump the uncertainty values
errors = []
for idx, (_, d) in enumerate(df.iterrows()):
if not add_fluctuate:
errors.append({"stat": None, "sys": None, "shift_lumi": None, "norm": None})
else:
errors.append(
{
"stat": float(data_central[idx] * d["unpol_stat_percent"] * 1e-2),
"sys": float(data_central[idx] * d["ptpt_percent"] * 1e-2),
"shift_lumi": float(d["shift_uncer"]),
"norm": float(data_central[idx] * d["norm_percent"] * 1e-2),
}
)

error_definition = {
"stat": {"description": "statistical uncertainty", "treatment": "ADD", "type": "UNCORR"},
"sys": {
"description": "systematic uncertainty",
"treatment": "MULT", # TODO: to check
"type": "UNCORR",
},
"shift_lumi": {
"description": "uncertainty on the precision of the relative luminosity",
"treatment": "ADD",
"type": "UNCORR", # TODO: to check
},
"norm": {
"description": "relative (percent) normalization uncertainty (beam pol)",
"treatment": "MULT", # TODO: to check
"type": "CORR", # TODO: to check
},
}

uncertainties_yaml = {"definitions": error_definition, "bins": errors}
with open("uncertainties.yaml", "w") as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)


if __name__ == "__main__":
BEAMS = (10, 275)
input_xlsx = Path("./rawdata/ATHENA_ALL_EP.xlsx")
Expand Down
Loading

0 comments on commit b404044

Please sign in to comment.