Skip to content

Commit

Permalink
add also [1] to selected_poinst + include enpoint if 1
Browse files Browse the repository at this point in the history
  • Loading branch information
RoyStegeman committed Oct 4, 2024
1 parent a89d1c7 commit 01b26a0
Showing 1 changed file with 30 additions and 19 deletions.
49 changes: 30 additions & 19 deletions n3fit/src/n3fit/scaler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Callable, List, Optional
from typing import Callable, Optional

import numpy as np
import numpy.typing as npt
from scipy.interpolate import PchipInterpolator


def generate_scaler(
input_list: List[npt.NDArray], interpolation_points: Optional[int] = None
input_list: list[npt.NDArray], interpolation_points: Optional[int] = None
) -> Callable:
"""
Generate the scaler function that applies feature scaling to the input data.
Expand All @@ -27,43 +27,54 @@ def generate_scaler(
input_arr_size = input_arr.size

# Define an evenly spaced grid in the domain [0,1]
# force_set_smallest is used to make sure the smallest point included in the scaling is
# 1e-9, to prevent trouble when saving it to the LHAPDF grid
# force_set_smallest is used to make sure the smallest point included in the scaling is 1e-9, to
# prevent trouble when saving it to the LHAPDF grid
force_set_smallest = input_arr.min() > 1e-9
include_endpoint = (
1.0 in input_arr
) # if 1.0 is in the xgrid it should also be 1.0 in the output xgrid
if force_set_smallest:
new_xgrid = np.linspace(
start=1 / input_arr_size, stop=1.0, endpoint=False, num=input_arr_size
start=1 / input_arr_size, stop=1.0, endpoint=include_endpoint, num=input_arr_size
)
else:
new_xgrid = np.linspace(start=0, stop=1.0, endpoint=False, num=input_arr_size)
new_xgrid = np.linspace(start=0, stop=1.0, endpoint=include_endpoint, num=input_arr_size)

# When mapping the FK xgrids onto our new grid, we need to consider degeneracies among
# the x-values in the FK grids
# When mapping the FK xgrids onto our new grid, we need to consider degeneracies among the x-values
# in the FK grids
unique, counts = np.unique(input_arr, return_counts=True)
map_to_complete = []
map_to = []
for cumsum_ in np.cumsum(counts):
# Make sure to include the smallest new_xgrid value, such that we have a point at
# x<=1e-9
map_to_complete.append(new_xgrid[cumsum_ - counts[0]])
map_to_complete = np.array(map_to_complete)
map_from_complete = unique
map_to.append(new_xgrid[cumsum_ - counts[0]])
map_to = np.array(map_to)
map_from = unique

# If needed, set feature_scaling(x=1e-9)=0
if force_set_smallest:
map_from_complete = np.insert(map_from_complete, 0, 1e-9)
map_to_complete = np.insert(map_to_complete, 0, 0.0)
map_from = np.insert(map_from, 0, 1e-9)
map_to = np.insert(map_to, 0, 0.0)

# Select the indices of the points that will be used by the interpolator
onein = map_from_complete.size / (int(interpolation_points) - 1)
onein = map_from.size / (int(interpolation_points - 1))
selected_points = [round(i * onein - 1) for i in range(1, int(interpolation_points))]
if selected_points[0] != 0:
selected_points = [0] + selected_points
map_from = map_from_complete[selected_points]
map_from = np.log(map_from)
map_to = map_to_complete[selected_points]
selected_points += [1] # add also this one since 1e-9 is just an outlier

# make a mask of which pints to keep
mask = np.zeros(len(map_from), dtype=bool)
mask[selected_points] = True

# apply the mask and lot the input
masked_map_from = map_from[mask]
log_masked_map_from = np.log(masked_map_from)
masked_map_to = map_to[mask]

# construct the scaler
try:
scaler = PchipInterpolator(map_from, map_to)
scaler = PchipInterpolator(log_masked_map_from, masked_map_to)
except ValueError as e:
raise ValueError(
"interpolation_points is larger than the number of unique input x-values"
Expand Down

0 comments on commit 01b26a0

Please sign in to comment.