Skip to content

Commit

Permalink
Merge pull request #2139 from NNPDF/nnpdfpol-hyperopt
Browse files Browse the repository at this point in the history
Add NNPDFpol2.0 hyperopt card
  • Loading branch information
scarlehoff authored Aug 14, 2024
2 parents d853fdc + d075989 commit 9584708
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 9 deletions.
241 changes: 241 additions & 0 deletions n3fit/runcards/examples/nnpdfpol20_hyperopt.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
############################################################
# Configuration file for N3FIT Polarized Fits hyperopt scan
############################################################
description: NNPDFpol2.0 NNLO Hyperoptimization Card
############################################################

############################################################
dataset_inputs:
# Polarized DIS datasets
- {dataset: COMPASS15_NC_NOTFIXED_MUP_G1, frac: 0.60, cfac: [NRM]}
- {dataset: COMPASS15_NC_NOTFIXED_MUD_G1, frac: 0.60, cfac: [NRM]}
- {dataset: E142_NC_NOTFIXED_EN_G1, frac: 0.60, cfac: [NRM]}
- {dataset: E143_NC_NOTFIXED_EP_G1, frac: 0.60, cfac: [NRM]}
- {dataset: E143_NC_NOTFIXED_ED_G1, frac: 0.60, cfac: [NRM]}
- {dataset: E154_NC_9GEV_EN_G1, frac: 0.60, cfac: [NRM]}
- {dataset: E155_NC_9GEV_EN_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: E155_NC_9GEV_EP_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: EMC_NC_NOTFIXED_MUP_G1, frac: 0.60, cfac: [NRM]}
- {dataset: HERMES_NC_7GEV_ED_G1, frac: 0.60, cfac: [NRM]}
- {dataset: HERMES_NC_7GEV_EP_G1, frac: 0.60, cfac: [NRM]}
- {dataset: HERMES97_NC_7GEV_EN_G1, frac: 0.60, cfac: [NRM]}
- {dataset: JLABE06_NC_3GEV_EN_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: JLABEG1B_NC_NOTFIXED_EP_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: JLABEG1B_NC_NOTFIXED_ED_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: JLABE97_NC_NOTFIXED_EN_G1, frac: 0.60, cfac: [NRM]}
- {dataset: JLABE99_NC_3GEV_EN_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: JLABEG1DVCS_NC_5GEV_ED_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: JLABEG1DVCS_NC_3GEV_EP_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: SMC_NC_NOTFIXED_MUD_G1, frac: 0.60, cfac: [NRM]}
- {dataset: SMC_NC_NOTFIXED_MUP_G1, frac: 0.60, cfac: [NRM]}
- {dataset: SMCSX_NC_24GEV_MUD_G1F1RATIO, frac: 0.60, cfac: []}
- {dataset: SMCSX_NC_17GEV_MUP_G1F1RATIO, frac: 0.60, cfac: []}
# Polarized DY datasets
- {dataset: STAR_WMWP_510GEV_WM-AL, frac: 0.60, cfac: []}
- {dataset: STAR_WMWP_510GEV_WP-AL, frac: 0.60, cfac: []}
# Polarized Jet datasets
- {dataset: STAR_2005_1JET_200GEV_ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2006_1JET_200GEV_ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2009_1JET_200GEV_CC-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2009_1JET_200GEV_CF-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2012_1JET_510GEV_ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2013_1JET_510GEV_ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2015_1JET_200GEV_CC-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2015_1JET_200GEV_CF-ALL, frac: 0.60, cfac: []}
- {dataset: PHENIX_1JET_200GEV_ALL, frac: 0.60, cfac: []}
# Poalrized Dijet datasets
- {dataset: STAR_2009_2JET_200GEV_A-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2009_2JET_200GEV_B-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2009_2JET_200GEV_C-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2009_2JET_MIDRAP_200GEV_OS-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2009_2JET_MIDRAP_200GEV_SS-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2012_2JET_510GEV_A-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2012_2JET_510GEV_B-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2012_2JET_510GEV_C-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2012_2JET_510GEV_D-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2013_2JET_510GEV_A-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2013_2JET_510GEV_B-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2013_2JET_510GEV_C-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2013_2JET_510GEV_D-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2015_2JET_MIDRAP_200GEV_OS-ALL, frac: 0.60, cfac: []}
- {dataset: STAR_2015_2JET_MIDRAP_200GEV_SS-ALL, frac: 0.60, cfac: []}

############################################################
datacuts:
t0pdfset: NNPDFpol11_100 # PDF set to generate t0 covmat
unpolarized_bc: NNPDF40_nnlo_pch_as_01180
q2min: 1.00 # Q2 minimum
w2min: 4.00 # W2 minimum

############################################################
# Define the unpolarized PDF set to be used as BC for POS
positivity_bound:
unpolarized_bc: NNPDF40_nnlo_pch_as_01180
n_std: 1.00 # Standard Deviation to be added as Error

############################################################
theory:
theoryid: 41_100_010

############################################################
hyperscan_config:
architecture:
initializers: ['glorot_normal', 'glorot_uniform']
n_layers: [3]
min_units: 10
max_units: 45
activations: ['tanh', 'sigmoid']
optimizer:
- optimizer_name: 'Nadam'
learning_rate:
sampling: log
min: 1e-4
max: 1e-2
clipnorm:
sampling: log
min: 1e-7
max: 1e-4
- optimizer_name: 'Adam'
learning_rate:
sampling: log
min: 1e-4
max: 1e-2
clipnorm:
sampling: log
min: 1e-7
max: 1e-4
stopping:
min_epochs: 15e3
max_epochs: 45e3
min_patience: 0.1
max_patience: 0.3
positivity:
min_initial: 1
max_initial: 100
integrability:
min_initial: 2
max_initial: 50

############################################################
kfold:
loss_type: chi2
replica_statistic: average_best
fold_statistic: average
penalties_in_loss: True
penalties:
- saturation
- patience
- integrability
threshold: 10
partitions:
- datasets:
- COMPASS15_NC_NOTFIXED_MUP_G1
- E142_NC_NOTFIXED_EN_G1
- E155_NC_9GEV_EN_G1F1RATIO
- HERMES_NC_7GEV_ED_G1
- STAR_2005_1JET_200GEV_ALL
- STAR_2012_1JET_510GEV_ALL
- STAR_2009_2JET_200GEV_A-ALL
- STAR_2009_2JET_MIDRAP_200GEV_SS-ALL
- STAR_2012_2JET_510GEV_A-ALL
- STAR_2013_2JET_510GEV_D-ALL
- STAR_WMWP_510GEV_WM-AL # W-
- datasets:
- COMPASS15_NC_NOTFIXED_MUD_G1
- E155_NC_9GEV_EP_G1F1RATIO
- EMC_NC_NOTFIXED_MUP_G1
- HERMES_NC_7GEV_EP_G1
- STAR_2006_1JET_200GEV_ALL
- STAR_2013_1JET_510GEV_ALL
- STAR_2009_2JET_200GEV_B-ALL
- STAR_2012_2JET_510GEV_B-ALL
- STAR_2013_2JET_510GEV_C-ALL
- STAR_2015_2JET_MIDRAP_200GEV_OS-ALL
- STAR_WMWP_510GEV_WP-AL # W+
- datasets:
- E143_NC_NOTFIXED_EP_G1
- HERMES97_NC_7GEV_EN_G1
- SMC_NC_NOTFIXED_MUD_G1
- SMCSX_NC_17GEV_MUP_G1F1RATIO
- STAR_2009_1JET_200GEV_CC-ALL
- STAR_2015_1JET_200GEV_CC-ALL
- STAR_2009_2JET_200GEV_C-ALL
- STAR_2012_2JET_510GEV_C-ALL
- STAR_2013_2JET_510GEV_B-ALL
- STAR_2015_2JET_MIDRAP_200GEV_SS-ALL
- STAR_WMWP_510GEV_WM-AL # W-
- datasets:
- E143_NC_NOTFIXED_ED_G1
- E154_NC_9GEV_EN_G1
- SMC_NC_NOTFIXED_MUP_G1
- SMCSX_NC_24GEV_MUD_G1F1RATIO
- STAR_2009_1JET_200GEV_CF-ALL
- STAR_2015_1JET_200GEV_CF-ALL
- STAR_2009_2JET_MIDRAP_200GEV_OS-ALL
- STAR_2012_2JET_510GEV_D-ALL
- STAR_2013_2JET_510GEV_A-ALL
- PHENIX_1JET_200GEV_ALL
- STAR_WMWP_510GEV_WP-AL # W+

############################################################
trvlseed: 14613416
nnseed: 1865683875
mcseed: 746898422
save: false
genrep: true

parameters:
nodes_per_layer: [25, 20, 6]
activation_per_layer: [tanh, tanh, linear]
initializer: glorot_normal
optimizer:
clipnorm: 6.073e-6
learning_rate: 2.621e-3
optimizer_name: Nadam
epochs: 20000
positivity:
initial: 1
multiplier:
threshold: 1e-7
integrability:
initial: 1
multiplier:
threshold: 1e-2
stopping_patience: 0.1
layer_type: dense
dropout: 0.0
threshold_chi2: 3.5

fitting:
fitbasis: POLARIZED_EVOL_CMP
sum_rules: TSR
savepseudodata: false
basis:
- {fl: sng, trainable: false, smallx: [1.094, 1.118], largex: [1.46, 3.003]}
- {fl: g, trainable: false, smallx: [0.8189, 1.844], largex: [2.591, 5.697]}
- {fl: t3, trainable: false, smallx: [-0.4401, 0.9163], largex: [1.773, 3.333]}
- {fl: t8, trainable: false, smallx: [0.5852, 0.8537], largex: [1.533, 3.436]}
- {fl: v, trainable: false, smallx: [0.472, 0.9576], largex: [1.571, 3.559]}
- {fl: v3, trainable: false, smallx: [0.07483, 0.9501], largex: [1.514, 3.467]}

###########################################################
positivity:
posdatasets:
- {dataset: NNPDF_POS_2P24GEV_XGL-POLARIZED, maxlambda: 1e8}
- {dataset: NNPDF_POS_2P24GEV_XDQ-POLARIZED, maxlambda: 1e8}
- {dataset: NNPDF_POS_2P24GEV_XDB-POLARIZED, maxlambda: 1e8}
- {dataset: NNPDF_POS_2P24GEV_XUQ-POLARIZED, maxlambda: 1e8}
- {dataset: NNPDF_POS_2P24GEV_XUB-POLARIZED, maxlambda: 1e8}
- {dataset: NNPDF_POS_2P24GEV_XSQSUM-POLARIZED, maxlambda: 1e8}

############################################################
integrability:
integdatasets:
- {dataset: NNPDF_INTEG_1GEV_XGL-POLARIZED, maxlambda: 1e2}
- {dataset: NNPDF_INTEG_1GEV_XSIGMA-POLARIZED, maxlambda: 1e2}
- {dataset: NNPDF_INTEG_1GEV_XV-POLARIZED, maxlambda: 1e2}
- {dataset: NNPDF_INTEG_1GEV_XV3-POLARIZED, maxlambda: 1e2}

############################################################
debug: false
parallel_models: true
14 changes: 8 additions & 6 deletions n3fit/src/n3fit/hyper_optimization/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def compute_loss(
self,
penalties: dict[str, np.ndarray],
experimental_loss: np.ndarray,
pdf_model: MetaModel,
pdf_object: N3PDF,
experimental_data: list[DataGroupSpec],
fold_idx: int = 0,
) -> float:
Expand All @@ -214,8 +214,8 @@ def compute_loss(
as defined in 'penalties.py' and instantiated within :class:`~n3fit.model_trainer.ModelTrainer`.
experimental_loss: NDArray(replicas)
Experimental loss for each replica.
pdf_model: :class:`n3fit.backends.MetaModel`
N3fitted meta-model.
pdf_object: :class:`n3fit.vpinterface.N3PDF`
N3fitted PDF
experimental_data: List[validphys.core.DataGroupSpec]
List of tuples containing `validphys.core.DataGroupSpec` instances for each group data set
fold_idx: int
Expand All @@ -233,18 +233,20 @@ def compute_loss(
>>> import numpy as np
>>> from n3fit.hyper_optimization.rewards import HyperLoss
>>> from n3fit.model_gen import generate_pdf_model
>>> from n3fit.vpinterface import N3PDF
>>> from validphys.loader import Loader
>>> hyper = HyperLoss(loss_type="chi2", replica_statistic="average", fold_statistic="average")
>>> penalties = {'saturation': np.array([1.0, 2.0]), 'patience': np.array([3.0, 4.0]), 'integrability': np.array([5.0, 6.0]),}
>>> experimental_loss = np.array([0.1, 0.2])
>>> ds = Loader().check_dataset("NMC_NC_NOTFIXED_P_EM-SIGMARED", theoryid=399, cuts="internal")
>>> ds = Loader().check_dataset("NMC_NC_NOTFIXED_P_EM-SIGMARED", variant="legacy", theoryid=399, cuts="internal")
>>> experimental_data = [Loader().check_experiment("My DataGroupSpec", [ds])]
>>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'g', 's', 'sbar']]
>>> pdf_model = generate_pdf_model(nodes=[8], activations=['linear'], seed=0, num_replicas=2, flav_info=fake_fl, fitbasis="FLAVOUR")
>>> loss = hyper.compute_loss(penalties, experimental_loss, pdf_model, experimental_data)
>>> pdf = N3PDF(pdf_model.split_replicas())
>>> loss = hyper.compute_loss(penalties, experimental_loss, pdf, experimental_data)
"""
# calculate phi for a given k-fold using vpinterface and validphys
phi_per_fold = compute_phi(N3PDF(pdf_model.split_replicas()), experimental_data)
phi_per_fold = compute_phi(pdf_object, experimental_data)

# update hyperopt metrics
# these are saved in the phi_vector and chi2_matrix attributes, excluding penalties
Expand Down
8 changes: 6 additions & 2 deletions n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,11 +1007,15 @@ def hyperparametrizable(self, params):
# containing only exp datasets within the held out fold
experimental_data = self._filter_datagroupspec(partition["datasets"])

vplike_pdf = N3PDF(pdf_model.split_replicas())
if self.boundary_condition is not None:
vplike_pdf.register_boundary(self.boundary_condition["unpolarized_bc"])

# Compute per replica hyper losses
hyper_loss = self._hyper_loss.compute_loss(
penalties=penalties,
experimental_loss=experimental_loss,
pdf_model=pdf_model,
pdf_object=vplike_pdf,
experimental_data=experimental_data,
fold_idx=k,
)
Expand All @@ -1025,7 +1029,7 @@ def hyperparametrizable(self, params):
]
trvl_data = self._filter_datagroupspec(trvl_exp_names)
# evaluate phi on training/validation exp set
trvl_phi = compute_phi(N3PDF(pdf_model.split_replicas()), trvl_data)
trvl_phi = compute_phi(vplike_pdf, trvl_data)

# Now save all information from this fold
l_hyper.append(hyper_loss)
Expand Down
6 changes: 5 additions & 1 deletion n3fit/src/n3fit/tests/test_hyperopt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Test hyperoptimization features
"""

import json
import pathlib
import shutil
Expand All @@ -14,6 +15,7 @@

from n3fit.hyper_optimization.rewards import HyperLoss
from n3fit.model_gen import generate_pdf_model
from n3fit.vpinterface import N3PDF
from validphys.loader import Loader


Expand Down Expand Up @@ -75,8 +77,9 @@ def test_compute_per_fold_loss(loss_type, replica_statistic, expected_per_fold_l
loss = HyperLoss(loss_type=loss_type, replica_statistic=replica_statistic)

# calculate statistic loss for one specific fold
pdf_object = N3PDF(pdf_model.split_replicas())
predicted_per_fold_loss = loss.compute_loss(
penalties, experimental_loss, pdf_model, experimental_data
penalties, experimental_loss, pdf_object, experimental_data
)

# Assert
Expand Down Expand Up @@ -173,6 +176,7 @@ def test_restart_from_pickle(tmp_path):
assert restart_json[i]['misc']['idxs'] == direct_json[i]['misc']['idxs']
# Note that it doesn't check the final loss of the second trial


@pytest.mark.linux
def test_parallel_hyperopt(tmp_path):
"""Ensure that the parallel implementation of hyperopt with MongoDB works as expected."""
Expand Down

0 comments on commit 9584708

Please sign in to comment.