diff --git a/validphys2/src/validphys/hyperoptplot.py b/validphys2/src/validphys/hyperoptplot.py
index c09b42f041..86e2cb5fea 100644
--- a/validphys2/src/validphys/hyperoptplot.py
+++ b/validphys2/src/validphys/hyperoptplot.py
@@ -4,8 +4,8 @@
 """
 
 # Within this file you can find the "more modern" vp-integrated hyperopt stuff
-# and the older pre-vp hyperopt stuff, which can be considered deprecated but it is
-# still used for the plotting script
+# and the older pre-vp hyperopt stuff, which can be considered deprecated but
+# it is still used for the plotting script
 
 
 import glob
@@ -30,9 +30,40 @@
 regex_not_op = re.compile(r"[\w\.]+")
 
 
-class HyperoptTrial:
+def convert_string_to_numpy(matrix_string: str) -> np.ndarray:
+    """Process arrays given as strings and transform them into numpy arrays.
+
+    Args:
+        matrix_string (str): array as string.
+
+    Returns
+        numpy array.
     """
-    Hyperopt trial class.
+    # Step 1: Remove newline characters and extra spaces
+    matrix_string = matrix_string.replace('\n', ' ')
+    matrix_string = ' '.join(matrix_string.split())
+
+    # Step 2: Split the string into rows
+    rows = matrix_string.split('] [')
+
+    # Prepare an empty list to store the parsed numbers
+    matrix_list = []
+
+    # Step 3: Process each row
+    for row in rows:
+        # Remove any brackets and split the row into numbers
+        cleaned_row = row.replace('[', '').replace(']', '')
+        row_numbers = cleaned_row.split(' ')
+        # Convert numbers to floats and append to matrix_list
+        matrix_list.append([float(0) if num == 'nan' else float(num) for num in row_numbers if num])
+
+    # Step 4: Convert the list of lists to a NumPy array
+    return np.array(matrix_list)
+
+
+class HyperoptTrial:
+    """Hyperopt trial class.
+
     Makes the dictionary-like output of ``hyperopt`` into an object
     that can be easily managed
 
@@ -200,11 +231,27 @@ def link_trials(self, list_of_trials):
 }
 
 
+PRINTOUT_KEYS = [
+    "optimizer",
+    "learning_rate",
+    "clipnorm",
+    "nodes_per_layer",
+    "activation_per_layer",
+    "initializer",
+    "diff_losses",
+    "loss",
+    "loss_inverse_phi2",
+]
+
+
+# Number of standard deviation to select models
+N_STD = 1.0
+
+
 # Parse the different sections of the hyperoptimization
 # this should talk with hyper_scan somehow?
 def parse_optimizer(trial):
-    """
-    This function parses the parameters that affect the optimization
+    """This function parses the parameters that affect the optimization.
 
     optimizer
     learning_rate (if it exists)
@@ -227,9 +274,16 @@ def parse_optimizer(trial):
     return dict_out
 
 
-def parse_stopping(trial):
+def get_std(df, threshold, n_std=1.0):
+    """Given a dataframe of models, select the ones that pass the threshold
+    losss and compute the `n_std` standard deviation from these.
     """
-    This function parses the parameters that affect the stopping
+    valid_models = df.loc[df.loss <= threshold].loss.values
+    return n_std * np.std(valid_models)
+
+
+def parse_stopping(trial):
+    """This function parses the parameters that affect the stopping.
 
     epochs
     stopping_patience
@@ -252,8 +306,8 @@ def parse_stopping(trial):
 
 
 def parse_architecture(trial):
-    """
-    This function parses the family of parameters which regards the architecture of the NN
+    """This function parses the family of parameters which regards the architecture
+    of the NN.
 
     number_of_layers
     activation_per_layer
@@ -294,8 +348,7 @@ def parse_architecture(trial):
 
 
 def parse_statistics(trial):
-    """
-    Parse the statistical information of the trial
+    """Parse the statistical information of the trial.
 
     validation loss
     testing loss
@@ -312,20 +365,15 @@ def parse_statistics(trial):
     dict_out[KEYWORDS["vl"]] = validation_loss
     dict_out[KEYWORDS["tl"]] = testing_loss
 
-    # Kfolding information
-    # average = results["kfold_meta"]["hyper_avg"]
-    # std = results["kfold_meta"]["hyper_std"]
-    # dict_out["avg"] = average
-    # dict_out["std"] = std
     dict_out["hlosses"] = results["kfold_meta"]["hyper_losses"]
     dict_out["vlosses"] = results["kfold_meta"]["validation_losses"]
+    dict_out["hlosses_phi"] = results["kfold_meta"]["hyper_losses_phi"]
     return dict_out
 
 
 def parse_trial(trial):
-    """
-    Trials are very convoluted object, very branched inside
-    The goal of this function is to separate said branching so we can create hierarchies
+    """Trials are very convoluted object, very branched inside. The goal of this
+    function is to separate said branching so we can create hierarchies.
     """
     # Is this a true trial?
     if trial["state"] != 2:
@@ -341,18 +389,11 @@ def parse_trial(trial):
 
 
 def evaluate_trial(trial_dict, validation_multiplier, fail_threshold, loss_target):
+    """Read a trial dictionary and compute the true loss and decide whether the
+    run passes or not.
     """
-    Read a trial dictionary and compute the true loss and decide whether the run passes or not
-    """
-    test_f = 1.0 - validation_multiplier
     val_loss = float(trial_dict[KEYWORDS["vl"]])
-    if loss_target == "average":
-        test_loss = np.array(trial_dict["hlosses"]).mean()
-    elif loss_target == "best_worst":
-        test_loss = np.array(trial_dict["hlosses"]).max()
-    elif loss_target == "std":
-        test_loss = np.array(trial_dict["hlosses"]).std()
-    loss = val_loss * validation_multiplier + test_loss * test_f
+    test_loss = loss = trial_dict["loss"]
 
     if (
         loss > fail_threshold
@@ -360,11 +401,21 @@ def evaluate_trial(trial_dict, validation_multiplier, fail_threshold, loss_targe
         or test_loss > fail_threshold
         or np.isnan(loss)
     ):
-        trial_dict["good"] = False
-        # Set the loss an order of magnitude above the result so it shows obviously on the plots
-        loss *= 10
+        # trial_dict["good"] = False
+        # # Set the loss an order of magnitude above the result so it shows obviously on the plots
+        # loss *= 10
+        pass
 
     trial_dict["loss"] = loss
+    trial_dict["diff_losses"] = abs(loss - val_loss)
+    if loss_target == "min_chi2_max_phi2":
+        phi_per_fold = np.array(trial_dict["hlosses_phi"])
+        phi2 = np.square(phi_per_fold).mean()
+        trial_dict["loss_phi2"] = phi2
+        trial_dict["loss_inverse_phi2"] = np.reciprocal(phi2)
+    else:
+        trial_dict["loss_phi2"] = pd.NA
+        trial_dict["loss_inverse_phi2"] = pd.NA
 
 
 def generate_dictionary(
@@ -375,15 +426,15 @@ def generate_dictionary(
     val_multiplier=0.5,
     fail_threshold=10.0,
 ):
-    """
-    Reads a json file and returns a list of dictionaries
+    """Reads a json file and returns a list of dictionaries.
 
     # Arguments:
         - `replica_path`: folder in which the tries.json file can be found
         - `starting_index`: if the trials are to be added to an already existing
                             set, make sure the id has the correct index!
         - `val_multiplier`: validation multipler
-        - `fail_threhsold`: threshold for the loss to consider a configuration as a failure
+        - `fail_threhsold`: threshold for the loss to consider a configuration
+                            as a failure
     """
     filename = "{0}/{1}".format(replica_path, json_name)
 
@@ -407,9 +458,8 @@ def generate_dictionary(
 
 
 def filter_by_string(filter_string):
-    """
-    Receives a data_dict (a parsed trial) and a filter string,
-    returns True if the trial passes the filter
+    """Receives a data_dict (a parsed trial) and a filter string,
+    returns True if the trial passes the filter.
 
     filter string must have the format: key<operator>string
     where <operator> can be any of !=, =, >, <
@@ -536,26 +586,42 @@ def hyperopt_dataframe(commandline_args):
             dataframe = dataframe_raw
         else:
             dataframe = dataframe_raw[dataframe_raw["good"]]
+        dataframe = dataframe_raw
+        dataframe.dropna(subset=["loss_inverse_phi2"], inplace=True)
 
     # Now select the best one
     best_idx = dataframe.loss.idxmin()
-    best_trial_series = dataframe.loc[best_idx]
-    # Make into a dataframe and transpose or the plotting code will complain
-    best_trial = best_trial_series.to_frame().T
-
-    log.info("Best setup:")
-    with pd.option_context("display.max_rows", None, "display.max_columns", None):
-        log.info(best_trial)
+    best_models = pd.DataFrame()
+
+    if args.loss_target == "min_chi2_max_phi2":
+        minimum = dataframe.loss[best_idx]
+        # set std to the spread of chi2 among the replicas of the best fit
+        std = get_std(dataframe, args.threshold, n_std=N_STD)
+        lim_max = dataframe.loss[best_idx] + N_STD * std
+        # select rows with chi2 losses within the best point and lim_max
+        selected_chi2 = dataframe[(dataframe.loss >= minimum) & (dataframe.loss <= lim_max)]
+        # among the selected points, select the nth lowest in 1/phi^2
+        selected_phi2 = selected_chi2.loss_inverse_phi2.nsmallest(args.max_phi2_n_models)
+        # find the location of these points in the dataframe
+        indices = dataframe[dataframe['loss_inverse_phi2'].isin(selected_phi2)].index
+        # save the best models
+        best_models = dataframe.loc[indices]
+        best_inverse_phi2_idx = best_models.loss_inverse_phi2.idxmin()
+        best_trial = best_models.loc[best_inverse_phi2_idx].to_frame().T
+    else:
+        best_trial_series = dataframe.loc[best_idx]
+        # Make into a dataframe and transpose
+        best_trial = best_trial_series.to_frame().T
 
-    return dataframe, best_trial
+    return dataframe, best_trial, best_models
 
 
 @table
 def best_setup(hyperopt_dataframe, hyperscan_config, commandline_args):
+    """Generates a clean table with information on the hyperparameter settings
+    of the best setup.
     """
-    Generates a clean table with information on the hyperparameter settings of the best setup.
-    """
-    _, best_trial = hyperopt_dataframe
+    _, best_trial, _ = hyperopt_dataframe
     best_idx = best_trial.index[0]
     best_trial = best_trial.rename(index={best_idx: "parameter settings"})
     best_trial = best_trial[
@@ -572,20 +638,123 @@ def best_setup(hyperopt_dataframe, hyperscan_config, commandline_args):
             "initializer",
             "dropout",
             "loss",
+            "loss_inverse_phi2",
         ]
     ]
-    best_trial.insert(11, "loss type", commandline_args["loss_target"])
+    best_trial.insert(12, "loss type", commandline_args["loss_target"])
     best_trial = best_trial.T
     return best_trial
 
 
+@figure
+def plot_models_for_min_chi2_max_phi2(hyperopt_dataframe, commandline_args):
+    """Generates plot of the model index as a function of loss and 1/phi2."""
+    if commandline_args["loss_target"] != 'min_chi2_max_phi2':
+        fig, _ = plotutils.subplots()
+        return fig
+
+    dataframe, _, best_models = hyperopt_dataframe
+    fig = plot_models(dataframe, best_models, commandline_args)
+    return fig
+
+
+def plot_models(dataframe, best_models, commandline_args):
+    """Model plot called by `plot_models_for_min_chi2_max_phi2`."""
+    # Reset the index of the dataframe
+    dataframe = dataframe.reset_index(drop=True)
+    # dataframe.sort_values(by=["loss"], inplace=True)
+
+    figs, ax = plotutils.subplots()
+
+    # define x and y
+    x = dataframe.index
+    y = dataframe['loss']
+    z = dataframe['loss_inverse_phi2']
+
+    indices = []
+    # Iterate over each value in best_models['iteration']
+    for value in best_models['iteration']:
+        # Find the index of this value in dataframe['iteration']
+        index = dataframe[dataframe['iteration'] == value].index
+        # Append the index to the list
+        indices.extend(index)
+
+    x_best_chi2_worst_phi = indices
+    y_best_chi2_worst_phi = best_models['loss']
+
+    # minimum chi2 point
+    min_idx = dataframe.loss.idxmin()
+    y_min = dataframe.loss[min_idx]
+    x_min = x[min_idx]
+
+    # Print out the selected models
+    print(f"Selected {commandline_args['max_phi2_n_models']} Models:")
+    print(best_models[PRINTOUT_KEYS])
+
+    # some color definitions
+    color_phi = "C0"
+    color_min = "C2"
+    color_chi2 = "C1"
+
+    ax.plot(x, y, marker="o", label='', linewidth=0, markersize=8, color=color_chi2)
+
+    # highlight the min chi2 point
+    ax.scatter(
+        x_min, y_min, color=color_min, s=200, marker='o', alpha=0.5, label=r'Lowest $L(\theta)$'
+    )
+
+    # highlight all selected points from the best_chi2_worst_phi algorithm
+    ax.scatter(
+        x_best_chi2_worst_phi,
+        y_best_chi2_worst_phi,
+        color=color_phi,
+        s=200,
+        marker='o',
+        alpha=0.5,
+        label=r'10 Selected Models',
+    )
+
+    # highlight area for all x and y up to std
+    nstd = get_std(dataframe, commandline_args["threshold"], n_std=N_STD)
+    ax.axhspan(
+        y_min,
+        y_min + nstd,
+        color='yellow',
+        alpha=0.25,
+        label=r'Lowest $L(\theta)$ + 1$\sigma$ band',
+    )
+
+    ax.set_ylim(bottom=1.0, top=5.0)
+    ax.set_xlabel('trial')
+    ax.set_ylabel(r'Loss', color=color_chi2)
+    ax.tick_params(axis='y', colors=color_chi2)
+    ax.set_title('')
+    # add a legend with a solid white background
+    ax.legend(loc="upper left")
+    ax.grid(True)
+
+    # PLOTTING OF PHI2
+    ax2 = ax.twinx()
+    ax2.plot(
+        x, z, marker="s", label='', linewidth=0, markersize=6, color=color_phi, fillstyle='none'
+    )
+    ax2.set_ylabel(r'$1/\left<\varphi^{2}\right>$', color=color_phi)
+    ax2.tick_params(axis='y', colors=color_phi)
+    # ax2.legend()
+    ax2.grid(False)
+    ax2.spines['right'].set_visible(True)
+
+    figs.tight_layout()
+    return figs
+
+
 @table
 def hyperopt_table(hyperopt_dataframe):
     """
     Generates a table containing complete information on all the tested setups that passed the
     filters set in the commandline arguments.
     """
-    dataframe, _ = hyperopt_dataframe
+    dataframe, _, _ = hyperopt_dataframe
     dataframe.sort_values(by=["loss"], inplace=True)
     return dataframe
 
@@ -595,7 +764,7 @@ def plot_iterations(hyperopt_dataframe):
     """
     Generates a scatter plot of the loss as a function of the iteration index.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     fig = plot_scans(dataframe, best_trial, "iteration")
     return fig
 
@@ -605,7 +774,7 @@ def plot_optimizers(hyperopt_dataframe):
     """
     Generates a violin plot of the loss per optimizer.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     fig = plot_scans(dataframe, best_trial, "optimizer")
     return fig
 
@@ -615,7 +784,7 @@ def plot_clipnorm(hyperopt_dataframe, optimizer_name):
     """
     Generates a scatter plot of the loss as a function of the clipnorm for a given optimizer.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     filtered_dataframe = dataframe[dataframe.optimizer == optimizer_name]
     best_filtered_idx = filtered_dataframe.loss.idxmin()
     best_idx = best_trial.iteration.iloc[0]
@@ -632,7 +801,7 @@ def plot_learning_rate(hyperopt_dataframe, optimizer_name):
     """
     Generates a scatter plot of the loss as a function of the learning rate for a given optimizer.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     filtered_dataframe = dataframe[dataframe.optimizer == optimizer_name]
     best_filtered_idx = filtered_dataframe.loss.idxmin()
     best_idx = best_trial.iteration.iloc[0]
@@ -649,7 +818,7 @@ def plot_initializer(hyperopt_dataframe):
     """
     Generates a violin plot of the loss per initializer.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     fig = plot_scans(dataframe, best_trial, "initializer")
     return fig
 
@@ -659,7 +828,7 @@ def plot_epochs(hyperopt_dataframe):
     """
     Generates a scatter plot of the loss as a function the number of epochs.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     fig = plot_scans(dataframe, best_trial, "epochs")
     return fig
 
@@ -669,7 +838,7 @@ def plot_number_of_layers(hyperopt_dataframe):
     """
     Generates a violin plot of the loss as a function of the number of layers of the model.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     fig = plot_scans(dataframe, best_trial, "number_of_layers")
     return fig
 
@@ -679,7 +848,7 @@ def plot_activation_per_layer(hyperopt_dataframe):
     """
     Generates a violin plot of the loss per activation function.
     """
-    dataframe, best_trial = hyperopt_dataframe
+    dataframe, best_trial, _ = hyperopt_dataframe
     fig = plot_scans(dataframe, best_trial, "activation_per_layer")
     return fig
 
@@ -727,22 +896,10 @@ def plot_scans(df, best_df, plotting_parameter, include_best=True):
                 best_df[key] = original_best.apply(lambda x: x[0])
         ordering_true, best_x = order_axis(df, best_df, key=key)
         ax = sns.violinplot(
-            x=key,
-            y=loss_k,
-            data=df,
-            ax=ax,
-            palette="Set2",
-            cut=0.0,
-            order=ordering_true,
+            x=key, y=loss_k, data=df, ax=ax, palette="Set2", cut=0.0, order=ordering_true
         )
         ax = sns.stripplot(
-            x=key,
-            y=loss_k,
-            data=df,
-            ax=ax,
-            color="gray",
-            order=ordering_true,
-            alpha=0.4,
+            x=key, y=loss_k, data=df, ax=ax, color="gray", order=ordering_true, alpha=0.4
         )
 
     # Finally plot the "best" one, which will be first
diff --git a/validphys2/src/validphys/hyperplottemplates/report.md b/validphys2/src/validphys/hyperplottemplates/report.md
index c0f5a76ada..9d9db48d4d 100644
--- a/validphys2/src/validphys/hyperplottemplates/report.md
+++ b/validphys2/src/validphys/hyperplottemplates/report.md
@@ -3,6 +3,9 @@
 ## Best Setup
 {@ best_setup @}
 
+## Models
+{@ plot_models_for_min_chi2_max_phi2 @}
+
 ## Optimizers
 {@ plot_optimizers @}
 
diff --git a/validphys2/src/validphys/scripts/vp_hyperoptplot.py b/validphys2/src/validphys/scripts/vp_hyperoptplot.py
index 1faa875070..b55738e2e9 100644
--- a/validphys2/src/validphys/scripts/vp_hyperoptplot.py
+++ b/validphys2/src/validphys/scripts/vp_hyperoptplot.py
@@ -1,30 +1,35 @@
-from validphys.app import App
-from validphys.loader import Loader, HyperscanNotFound
-from validphys import hyperplottemplates
-from reportengine.compat import yaml
-import pwd
+import logging
 import os
+import pwd
 
-import logging
+from reportengine.compat import yaml
+from validphys import hyperplottemplates
+from validphys.app import App
+from validphys.loader import HyperscanNotFound, Loader
 
 log = logging.getLogger(__name__)
 
 
 class HyperoptPlotApp(App):
     def add_positional_arguments(self, parser):
-        """ Wrapper around argumentparser """
+        """Wrapper around argumentparser"""
         # Hyperopt settings
         parser.add_argument(
-            "hyperopt_name",
-            help="Folder of the hyperopt fit to generate the report for",
+            "hyperopt_name", help="Folder of the hyperopt fit to generate the report for"
         )
         parser.add_argument(
             "-l",
             "--loss_target",
             help="Choice for the definition of target loss",
-            choices=['average', 'best_worst', 'std'],
+            choices=['average', 'best_worst', 'std', 'min_chi2_max_phi2'],
             default='average',
         )
+        parser.add_argument(
+            "--max_phi2_n_models",
+            help="If --loss_target=min_chi2_max_phi2, outputs n models with the highest phi2.",
+            type=int,
+            default=1,
+        )
         parser.add_argument(
             "-v",
             "--val_multiplier",
@@ -73,16 +78,12 @@ def add_positional_arguments(self, parser):
             type=str,
             default=pwd.getpwuid(os.getuid())[4].replace(",", ""),
         )
-        parser.add_argument(
-            "--title",
-            help="Add custom title to the report's meta data",
-            type=str,
-        )
+        parser.add_argument("--title", help="Add custom title to the report's meta data", type=str)
         parser.add_argument(
             "--keywords",
             help="Add keywords to the report's meta data. The keywords must be provided as a list",
             type=list,
-            default=[]
+            default=[],
         )
         args = parser.parse_args()
 
@@ -127,7 +128,8 @@ def complete_mapping(self):
             "combine": args["combine"],
             "autofilter": args["autofilter"],
             "debug": args["debug"],
-            "loss_target": args["loss_target"]
+            "loss_target": args["loss_target"],
+            "max_phi2_n_models": args["max_phi2_n_models"],
         }
 
         try: