Skip to content

Commit

Permalink
Merge pull request #2175 from NNPDF/reimplement-HERA-datasets
Browse files Browse the repository at this point in the history
Reimplement hera datasets
  • Loading branch information
t7phy authored Dec 23, 2024
2 parents d303d3f + b33f457 commit 1453465
Show file tree
Hide file tree
Showing 49 changed files with 249,233 additions and 4,420 deletions.
68 changes: 68 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/HERA_CC_318GEV/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close
from pathlib import Path
from dataclasses import dataclass
import typing
from typing import List
import numpy as np
import pandas as pd
from os import PathLike
import yaml

@dataclass
class hera_commondata(commondata):
def __init__(self, filename: str | PathLike, dataset_name: str,
process: str):
# Read the data.
file = Path(filename)
df = pd.read_table(file, sep=r"\s+")

# Kinematic quantieties.
self.central_values = df["Sigma"].to_numpy()
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
self.kinematic_quantities = ["x", "Q2", "y"]

# Statistical uncertainties.
statistical_uncertainties = df["stat"].to_numpy()
for iunc,unc in enumerate(statistical_uncertainties):
unc = self.central_values[iunc]*unc/100
statistical_uncertainties[iunc] = unc
self.statistical_uncertainties = statistical_uncertainties

# Systematic uncertainties.
# remove the column containing the total uncertainty excluding
# procedural uncertainties.
df = df.drop(columns=["tot_noproc"])
sys_uncert_col_names = list(df.columns.values)[5:]
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
for iunc,unc in enumerate(systematic_uncertainties):
unc = self.central_values[iunc]*unc/100
systematic_uncertainties[iunc] = unc
self.systematic_uncertainties = systematic_uncertainties

# All uncertainties are treated as multiplicative.
systypes = []
for name in sys_uncert_col_names:
if(name == "uncor"):
systypes.append(("MULT", "UNCORR"))
else:
systypes.append(("MULT", f"HC_{name}"))
self.systypes = systypes
self.process = process
self.dataset_name = dataset_name

def main():
hera_em = hera_commondata("./rawdata/HERA1+2_CCem.dat","HERACOMBCCEM", "DIS_CC")
hera_em.write_new_commondata(Path("data_EM-SIGMARED.yaml"),
Path("kinematics_EM-SIGMARED.yaml"),
Path("uncertainties_EM-SIGMARED.yaml"))
hera_ep = hera_commondata("./rawdata/HERA1+2_CCep.dat","HERACOMBCCEP", "DIS_CC")
hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"),
Path("kinematics_EP-SIGMARED.yaml"),
Path("uncertainties_EP-SIGMARED.yaml"))

if __name__ == "__main__":
main()



Loading

0 comments on commit 1453465

Please sign in to comment.