Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement hera datasets #2175

Merged
merged 22 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
3298bb0
initial commit for HERA dataset reimplementation
Oct 15, 2024
8430f56
initial commit for HERA dataset reimplementation
Oct 15, 2024
c32641b
add reimplementation of HERA beauty and charm QCD analysis and combin…
Oct 16, 2024
66d5101
small improvements in filter script.
Oct 16, 2024
386d497
Add check for covmat, remove total uncertainty before procedural unce…
Oct 30, 2024
127822f
Change process type, fix typo in metadata
Oct 30, 2024
3bad329
change names of kinematic varibles from k1, k2, k3 to x, Q2, y.
Oct 30, 2024
96311ee
Add files containing the reimplemented variant of the uncertainties t…
Oct 30, 2024
79a0990
replace Q2bins6 by k2bins6.
Oct 30, 2024
720e8b2
add reimplemented uncertainties for HERA_NC300GEV.
Oct 30, 2024
ca03af7
fix labels in metadata, remove legacy variants.
peterkrack Dec 8, 2024
89adf40
Merge branch 'master' into reimplement-HERA-datasets
peterkrack Dec 8, 2024
f05a592
fix typo in metadata.
peterkrack Dec 8, 2024
a3a958b
fix missing labels
peterkrack Dec 8, 2024
87108f8
commondata test fail when importing validphys
peterkrack Dec 8, 2024
fcd4cda
remove import of covmat_is_close from filter scripts
peterkrack Dec 8, 2024
4e7e907
remove covmat_is_close import from filter script.
peterkrack Dec 8, 2024
63c3f5b
fix in metadata
peterkrack Dec 8, 2024
0e6736a
clean up some files
peterkrack Dec 9, 2024
895ce1c
set kinematics_override back to dis_sqrt_scale to fix plots.
peterkrack Dec 11, 2024
c778061
Merge branch 'master' into reimplement-HERA-datasets
peterkrack Dec 11, 2024
b33f457
fix metadata file for HERA_NC_318GEV.
peterkrack Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/HERA_CC_318GEV/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
from pathlib import Path
from dataclasses import dataclass
import typing
from typing import List
import numpy as np
import pandas as pd
from os import PathLike
from fortranformat import FortranRecordWriter
import yaml

@dataclass
class hera_commondata(commondata):
def __init__(self, filename: str | PathLike, dataset_name: str,
process: str):
# Read the data.
file = Path(filename)
df = pd.read_table(file, sep=r"\s+")

# Kinematic quantieties.
self.central_values = df["Sigma"].to_numpy()
self.kinematics = df[["x", "Q2", "y"]].to_numpy()
self.kinematic_quantities = ["x", "Q2", "y"]

# Statistical uncertainties.
statistical_uncertainties = df["stat"].to_numpy()
for iunc,unc in enumerate(statistical_uncertainties):
unc = self.central_values[iunc]*unc/100
statistical_uncertainties[iunc] = unc
self.statistical_uncertainties = statistical_uncertainties

# Systematic uncertainties.
# remove the column containing the total uncertainty excluding
# procedural uncertainties.
df = df.drop(columns=["tot_noproc"])
sys_uncert_col_names = list(df.columns.values)[5:]
self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
systematic_uncertainties = df[sys_uncert_col_names].to_numpy()
for iunc,unc in enumerate(systematic_uncertainties):
unc = self.central_values[iunc]*unc/100
systematic_uncertainties[iunc] = unc
self.systematic_uncertainties = systematic_uncertainties

# All uncertainties are treated as multiplicative.
systypes = []
for name in sys_uncert_col_names:
if(name == "uncor"):
systypes.append(("MULT", "UNCORR"))
else:
systypes.append(("MULT", f"HC_{name}"))
self.systypes = systypes
self.process = process
self.dataset_name = dataset_name

def main():
print("Reimplementing the HERA commondata")
hera_em = hera_commondata("./rawdata/HERA1+2_CCem.dat","HERACOMBCCEM", "DIS_CC")
hera_em.write_new_commondata(Path("data_reimplemented_EM-SIGMARED.yaml"),
Path("kinematics_reimplemented_EM-SIGMARED.yaml"),
Path("uncertainties_reimplemented_EM-SIGMARED.yaml"))
hera_ep = hera_commondata("./rawdata/HERA1+2_CCep.dat","HERACOMBCCEP", "DIS_CC")
hera_ep.write_new_commondata(Path("data_reimplemented_EP-SIGMARED.yaml"),
Path("kinematics_reimplemented_EP-SIGMARED.yaml"),
Path("uncertainties_reimplemented_EP-SIGMARED.yaml"))
# Check if the covariance matrix of the reimplemented data is close to the
# legacy implementation
print("Check covariance matrix for HERA_CC_318GEV_EM-SIGMARED:")
if(covmat_is_close("HERA_CC_318GEV_EM-SIGMARED","reimplemented","legacy")):
print("Covmat is close.")
else:
print("Covmat is different.")
print("Check covariance matrix for HERA_CC_318GEV_EP-SIGMARED:")
if(covmat_is_close("HERA_CC_318GEV_EP-SIGMARED","reimplemented","legacy")):
print("Covmat is close.")
else:
print("Covmat is different.")

if __name__ == "__main__":
main()



Loading
Loading