add also [1] to selected_poinst + include enpoint if 1

NNPDF · Oct 4, 2024 · 01b26a0 · 01b26a0
1 parent a89d1c7
commit 01b26a0
Showing 1 changed file with 30 additions and 19 deletions.
diff --git a/n3fit/src/n3fit/scaler.py b/n3fit/src/n3fit/scaler.py
@@ -1,12 +1,12 @@
-from typing import Callable, List, Optional
+from typing import Callable, Optional
 
 import numpy as np
 import numpy.typing as npt
 from scipy.interpolate import PchipInterpolator
 
 
 def generate_scaler(
-    input_list: List[npt.NDArray], interpolation_points: Optional[int] = None
+    input_list: list[npt.NDArray], interpolation_points: Optional[int] = None
 ) -> Callable:
     """
     Generate the scaler function that applies feature scaling to the input data.
@@ -27,43 +27,54 @@ def generate_scaler(
     input_arr_size = input_arr.size
 
     # Define an evenly spaced grid in the domain [0,1]
-    # force_set_smallest is used to make sure the smallest point included in the scaling is
-    # 1e-9, to prevent trouble when saving it to the LHAPDF grid
+    # force_set_smallest is used to make sure the smallest point included in the scaling is 1e-9, to
+    # prevent trouble when saving it to the LHAPDF grid
     force_set_smallest = input_arr.min() > 1e-9
+    include_endpoint = (
+        1.0 in input_arr
+    )  # if 1.0 is in the xgrid it should also be 1.0 in the output xgrid
     if force_set_smallest:
         new_xgrid = np.linspace(
-            start=1 / input_arr_size, stop=1.0, endpoint=False, num=input_arr_size
+            start=1 / input_arr_size, stop=1.0, endpoint=include_endpoint, num=input_arr_size
         )
     else:
-        new_xgrid = np.linspace(start=0, stop=1.0, endpoint=False, num=input_arr_size)
+        new_xgrid = np.linspace(start=0, stop=1.0, endpoint=include_endpoint, num=input_arr_size)
 
-    # When mapping the FK xgrids onto our new grid, we need to consider degeneracies among
-    # the x-values in the FK grids
+    # When mapping the FK xgrids onto our new grid, we need to consider degeneracies among the x-values
+    # in the FK grids
     unique, counts = np.unique(input_arr, return_counts=True)
-    map_to_complete = []
+    map_to = []
     for cumsum_ in np.cumsum(counts):
         # Make sure to include the smallest new_xgrid value, such that we have a point at
         # x<=1e-9
-        map_to_complete.append(new_xgrid[cumsum_ - counts[0]])
-    map_to_complete = np.array(map_to_complete)
-    map_from_complete = unique
+        map_to.append(new_xgrid[cumsum_ - counts[0]])
+    map_to = np.array(map_to)
+    map_from = unique
 
     #  If needed, set feature_scaling(x=1e-9)=0
     if force_set_smallest:
-        map_from_complete = np.insert(map_from_complete, 0, 1e-9)
-        map_to_complete = np.insert(map_to_complete, 0, 0.0)
+        map_from = np.insert(map_from, 0, 1e-9)
+        map_to = np.insert(map_to, 0, 0.0)
 
     # Select the indices of the points that will be used by the interpolator
-    onein = map_from_complete.size / (int(interpolation_points) - 1)
+    onein = map_from.size / (int(interpolation_points - 1))
     selected_points = [round(i * onein - 1) for i in range(1, int(interpolation_points))]
     if selected_points[0] != 0:
         selected_points = [0] + selected_points
-    map_from = map_from_complete[selected_points]
-    map_from = np.log(map_from)
-    map_to = map_to_complete[selected_points]
+    selected_points += [1]  # add also this one since 1e-9 is just an outlier
 
+    # make a mask of which pints to keep
+    mask = np.zeros(len(map_from), dtype=bool)
+    mask[selected_points] = True
+
+    # apply the mask and lot the input
+    masked_map_from = map_from[mask]
+    log_masked_map_from = np.log(masked_map_from)
+    masked_map_to = map_to[mask]
+
+    # construct the scaler
     try:
-        scaler = PchipInterpolator(map_from, map_to)
+        scaler = PchipInterpolator(log_masked_map_from, masked_map_to)
     except ValueError as e:
         raise ValueError(
             "interpolation_points is larger than the number of unique input x-values"