diff --git a/src/mitim_modules/freegsu/FREEGSUmain.py b/src/mitim_modules/freegsu/FREEGSUmain.py index 0830a019..adb281a9 100644 --- a/src/mitim_modules/freegsu/FREEGSUmain.py +++ b/src/mitim_modules/freegsu/FREEGSUmain.py @@ -33,7 +33,7 @@ def default_namelist(optimization_options): class freegsu(STRATEGYtools.opt_evaluator): - def __init__(self, folder, namelist=None, function_parameters={}): + def __init__(self, folder, namelist=None, function_parameters={}, **kwargs): print( "\n-----------------------------------------------------------------------------------------" ) @@ -49,6 +49,7 @@ def __init__(self, folder, namelist=None, function_parameters={}): folder, namelist=namelist, default_namelist_function=default_namelist if (namelist is None) else None, + **kwargs ) def prep( diff --git a/src/mitim_modules/maestro/utils/PORTALSbeat.py b/src/mitim_modules/maestro/utils/PORTALSbeat.py index 57b14aeb..217fc5d8 100644 --- a/src/mitim_modules/maestro/utils/PORTALSbeat.py +++ b/src/mitim_modules/maestro/utils/PORTALSbeat.py @@ -253,7 +253,7 @@ def _inform(self, use_previous_residual = True, use_previous_surrogate_data = Tr if use_previous_surrogate_data and ('portals_surrogate_data_file' in self.maestro_instance.parameters_trans_beat): if 'surrogateOptions' not in self.optimization_options: self.optimization_options['surrogateOptions'] = {} - self.optimization_options['surrogateOptions']["extrapointsFile"] = self.maestro_instance.parameters_trans_beat['portals_surrogate_data_file'] + self.optimization_options['surrogateOptions']["add_data_from_file"] = self.maestro_instance.parameters_trans_beat['portals_surrogate_data_file'] self.folder_starting_point = self.maestro_instance.parameters_trans_beat['portals_last_run_folder'] @@ -295,7 +295,7 @@ def _inform(self, use_previous_residual = True, use_previous_surrogate_data = Tr # In the situation where the last radial location moves, I cannot reuse that surrogate data if last_radial_location_moved and reusing_surrogate_data: print('\t\t- Last radial location was moved, so surrogate data will not be reused for that specific location') - self.optimization_options['surrogateOptions']["extrapointsModelsAvoidContent"] = ['Tar',f'_{len(self.MODELparameters[strKeys])}'] + self.optimization_options['surrogateOptions']["add_data_to_modelsAvoidContent"] = ['Tar',f'_{len(self.MODELparameters[strKeys])}'] self.try_flux_match_only_for_first_point = False def _inform_save(self): diff --git a/src/mitim_modules/portals/PORTALSmain.py b/src/mitim_modules/portals/PORTALSmain.py index 320befdd..2570d452 100644 --- a/src/mitim_modules/portals/PORTALSmain.py +++ b/src/mitim_modules/portals/PORTALSmain.py @@ -63,8 +63,8 @@ def default_namelist(optimization_options, CGYROrun=False): } # Surrogate - optimization_options["surrogateOptions"]["selectSurrogate"] = partial( - PORTALStools.selectSurrogate, CGYROrun=CGYROrun + optimization_options["surrogateOptions"]["selectSurrogates"] = partial( + PORTALStools.selectSurrogates, CGYROrun=CGYROrun ) optimization_options["surrogateOptions"]["ensure_within_bounds"] = True @@ -86,7 +86,10 @@ def __init__( self, folder, # Folder where the PORTALS workflow will be run namelist=None, # If None, default namelist will be used. If not None, it will be read and used - TensorsType=torch.double, # Type of tensors to be used (torch.float, torch.double) + tensor_opts = { + "dtype": torch.double, + "device": torch.device("cpu"), + }, CGYROrun=False, # If True, use CGYRO defaults for best optimization practices portals_transformation_variables = None, # If None, use defaults for both main and trace portals_transformation_variables_trace = None, @@ -109,7 +112,7 @@ def __init__( super().__init__( folder, namelist=namelist, - TensorsType=TensorsType, + tensor_opts=tensor_opts, default_namelist_function=( partial(default_namelist, CGYROrun=CGYROrun) if (namelist is None) @@ -186,8 +189,6 @@ def __init__( --------------------------------------------- """ - - ( portals_transformation_variables, portals_transformation_variables_trace, @@ -315,7 +316,7 @@ def prep( limitsAreRelative=limitsAreRelative, cold_start=cold_start, hardGradientLimits=hardGradientLimits, - dfT=self.dfT, + tensor_opts = self.tensor_opts, seedInitial=seedInitial, checkForSpecies=askQuestions, ModelOptions=ModelOptions, @@ -335,14 +336,14 @@ def prep( # Ignore targets in surrogate_data.csv # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - if 'extrapointsModels' not in self.optimization_options['surrogateOptions'] or \ - self.optimization_options['surrogateOptions']['extrapointsModels'] is None or \ - len(self.optimization_options['surrogateOptions']['extrapointsModels'])==0: + if 'add_data_to_models' not in self.optimization_options['surrogateOptions'] or \ + self.optimization_options['surrogateOptions']['add_data_to_models'] is None or \ + len(self.optimization_options['surrogateOptions']['add_data_to_models'])==0: self._define_reuse_models() else: - print("\t- extrapointsModels already defined, not changing") + print("\t- add_data_to_models already defined, not changing") def _define_reuse_models(self): ''' @@ -352,21 +353,21 @@ def _define_reuse_models(self): '_5' to avoid reusing position 5 ''' - self.optimization_options['surrogateOptions']['extrapointsModels'] = [] + self.optimization_options['surrogateOptions']['add_data_to_models'] = [] # Define avoiders - if self.optimization_options['surrogateOptions']['extrapointsModelsAvoidContent'] is None: - self.optimization_options['surrogateOptions']['extrapointsModelsAvoidContent'] = ['Tar'] + if self.optimization_options['surrogateOptions']['add_data_to_modelsAvoidContent'] is None: + self.optimization_options['surrogateOptions']['add_data_to_modelsAvoidContent'] = ['Tar'] - # Define extrapointsModels + # Define add_data_to_models for key in self.surrogate_parameters['surrogate_transformation_variables_lasttime'].keys(): add_key = True - for avoid in self.optimization_options['surrogateOptions']['extrapointsModelsAvoidContent']: + for avoid in self.optimization_options['surrogateOptions']['add_data_to_modelsAvoidContent']: if avoid in key: add_key = False break if add_key: - self.optimization_options['surrogateOptions']['extrapointsModels'].append(key) + self.optimization_options['surrogateOptions']['add_data_to_models'].append(key) def run(self, paramsfile, resultsfile): # Read what PORTALS sends diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py index 0308649a..52662332 100644 --- a/src/mitim_modules/portals/PORTALStools.py +++ b/src/mitim_modules/portals/PORTALStools.py @@ -7,24 +7,48 @@ from mitim_tools.misc_tools.LOGtools import printMsg as print from IPython import embed -def selectSurrogate(output, surrogateOptions, CGYROrun=False): +def selectSurrogates(outputs, surrogateOptions, CGYROrun=False): + ''' + This divides potentially different outputs into different + surrogates to be joined with ModelList + ''' + + # Find transition to Targets + for iTar, output in enumerate(outputs): + if output[2:5] == "Tar": + break - print( - f'\t- Selecting surrogate options for "{output}" to be run' - ) + # Find transition to last location of transport + for iTra, output in enumerate(outputs): + if output.split('_')[-1] == outputs[iTar-1].split('_')[-1]: + break - if output is not None: - # If it's a target, just linear - if output[2:5] == "Tar": - surrogateOptions["TypeMean"] = 1 - surrogateOptions["TypeKernel"] = 2 # Constant kernel - # If it's not, stndard - else: - surrogateOptions["TypeMean"] = 2 # Linear in gradients, constant in rest - surrogateOptions["TypeKernel"] = 1 # RBF - # surrogateOptions['ExtraNoise'] = True + surrogateOptions_dict = {} + + for i in range(len(outputs)): + # Turbulent and Neoclassical at inner locations + surrogateOptions_dict[i+1] = copy.deepcopy(surrogateOptions) + surrogateOptions_dict[i+1]["TypeMean"] = 1 # Linear + surrogateOptions_dict[i+1]["TypeKernel"] = 1 # RBF + + + + # # Turbulent and Neoclassical at inner locations + # surrogateOptions_dict[iTra] = copy.deepcopy(surrogateOptions) + # surrogateOptions_dict[iTra]["TypeMean"] = 1 # Linear + # surrogateOptions_dict[iTra]["TypeKernel"] = 1 # RBF - return surrogateOptions + # # Turbulent and Neoclassical at outer location (generally less variables) + # surrogateOptions_dict[iTar] = copy.deepcopy(surrogateOptions) + # surrogateOptions_dict[iTar]["TypeMean"] = 1 # Linear + # surrogateOptions_dict[iTar]["TypeKernel"] = 1 # RBF + + # # Targets (If it's a target, just linear) + # surrogateOptions_dict[len(outputs)] = copy.deepcopy(surrogateOptions) + # surrogateOptions_dict[len(outputs)]["TypeMean"] = 1 # Linear + # surrogateOptions_dict[len(outputs)]["TypeKernel"] = 2 # Constant kernel + + return surrogateOptions_dict def default_portals_transformation_variables(additional_params = []): """ @@ -91,7 +115,7 @@ def default_portals_transformation_variables(additional_params = []): return portals_transformation_variables, portals_transformation_variables_trace -def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformation_variables): +def input_transformation_portals(Xorig, output, surrogate_parameters, surrogate_transformation_variables): """ - Xorig will be a tensor (batch1...N,dim) unnormalized (with or without gradients). @@ -123,9 +147,7 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati """ _, num = output.split("_") - index = powerstate.indexes_simulation[ - int(num) - ] # num=1 -> pos=1, so that it takes the second value in vectors + index = powerstate.indexes_simulation[int(num)] # num=1 -> pos=1, so that it takes the second value in vectors xFit = torch.Tensor().to(X) for ikey in surrogate_transformation_variables[output]: @@ -151,12 +173,14 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati # ---------------------------------------------------------------------- -def transformPORTALS(X, surrogate_parameters, output): +def output_transformation_portals(X, surrogate_parameters, outputs): """ 1. Make sure all batches are squeezed into a single dimension ------------------------------------------------------------------ E.g.: (batch1,batch2,batch3,dim) -> (batch1*batch2*batch3,dim) + Output is a factor that account for all the outputs """ + shape_orig = np.array(X.shape) X = X.view(np.prod(shape_orig[:-1]), shape_orig[-1]) @@ -169,14 +193,16 @@ def transformPORTALS(X, surrogate_parameters, output): # Produce relevant quantities here (in particular, GB will be used) powerstate = constructEvaluationProfiles(X, surrogate_parameters) - # --- Original model output is in real units, transform to GB here b/c that's how GK codes work - factorGB = GBfromXnorm(X, output, powerstate) - # --- Ratio of fluxes (quasilinear) - factorRat = ratioFactor(X, surrogate_parameters, output, powerstate) - # --- Specific to output - factorImp = ImpurityGammaTrick(X, surrogate_parameters, output, powerstate) + compounded = torch.Tensor().to(X) + for output in outputs: + # --- Original model output is in real units, transform to GB here b/c that's how GK codes work + factorGB = GBfromXnorm(X, output, powerstate) + # --- Ratio of fluxes (quasilinear) + factorRat = ratioFactor(X, surrogate_parameters, output, powerstate) + # --- Specific to output + factorImp = ImpurityGammaTrick(X, surrogate_parameters, output, powerstate) - compounded = factorGB * factorRat * factorImp + compounded = torch.cat((compounded, factorGB * factorRat * factorImp), dim=-1) """ 3. Go back to the original batching system @@ -185,7 +211,7 @@ def transformPORTALS(X, surrogate_parameters, output): """ shape_orig[-1] = compounded.shape[-1] compounded = compounded.view(tuple(shape_orig)) - + return compounded @@ -226,7 +252,7 @@ def computeTurbExchangeIndividual(PexchTurb, powerstate): return PexchTurb_integrated -# def transformPORTALS(X,Y,Yvar,surrogate_parameters,output): +# def output_transformation_portals(X,Y,Yvar,surrogate_parameters,output): # ''' # Transform direct evaluation output to something that the model understands better. @@ -245,14 +271,14 @@ def computeTurbExchangeIndividual(PexchTurb, powerstate): # return Ytr,Ytr_var -# def untransformPORTALS(X, mean, upper, lower, surrogate_parameters, output): +# def unoutput_transformation_portals(X, mean, upper, lower, surrogate_parameters, output): # ''' -# Transform direct model output to the actual evaluation output (must be the opposite to transformPORTALS) +# Transform direct model output to the actual evaluation output (must be the opposite to output_transformation_portals) # - Receives unnormalized X (batch1,...,dim) to construct QGB (batch1,...,1) corresponding to what output I'm looking at # - Transforms and produces Y and confidence bounds (batch1,...,) -# This untransforms whatever has happened in the transformPORTALS function +# This untransforms whatever has happened in the output_transformation_portals function # ''' # factor = factorProducer(X,surrogate_parameters,output).squeeze(-1) @@ -375,9 +401,11 @@ def constructEvaluationProfiles(X, surrogate_parameters, recalculateTargets=Fals if ("parameters_combined" in surrogate_parameters) and ( "powerstate" in surrogate_parameters["parameters_combined"] ): + powerstate = surrogate_parameters["parameters_combined"]["powerstate"] else: + powerstate = surrogate_parameters["powerstate"] if X.shape[0] > 0: diff --git a/src/mitim_modules/portals/utils/PORTALSanalysis.py b/src/mitim_modules/portals/utils/PORTALSanalysis.py index 69218a7f..901eaf76 100644 --- a/src/mitim_modules/portals/utils/PORTALSanalysis.py +++ b/src/mitim_modules/portals/utils/PORTALSanalysis.py @@ -730,15 +730,15 @@ def __init__(self, gpdict): self._output_variables.append(key) for key in self._models: if hasattr(self._models[key], 'gpmodel'): - if hasattr(self._models[key].gpmodel, 'train_X_usedToTrain'): - xtrain = self._models[key].gpmodel.train_X_usedToTrain.detach().cpu().numpy() + if hasattr(self._models[key].gpmodel, 'train_X_use'): + xtrain = self._models[key].gpmodel.train_X_use.detach().cpu().numpy() if len(xtrain.shape) < 2: xtrain = np.atleast_2d(xtrain) if xtrain.shape[1] != len(self._input_variables): xtrain = xtrain.T self._training_inputs[key] = pd.DataFrame(xtrain, columns=self._input_variables) - if hasattr(self._models[key].gpmodel, 'train_Y_usedToTrain'): - ytrain = self._models[key].gpmodel.train_Y_usedToTrain.detach().cpu().numpy() + if hasattr(self._models[key].gpmodel, 'train_Y_use'): + ytrain = self._models[key].gpmodel.train_Y_use.detach().cpu().numpy() if len(ytrain.shape) < 2: ytrain = np.atleast_2d(ytrain) if ytrain.shape[1] != 1: diff --git a/src/mitim_modules/portals/utils/PORTALSinit.py b/src/mitim_modules/portals/utils/PORTALSinit.py index 059022c2..4b94a156 100644 --- a/src/mitim_modules/portals/utils/PORTALSinit.py +++ b/src/mitim_modules/portals/utils/PORTALSinit.py @@ -26,10 +26,13 @@ def initializeProblem( dvs_fixed=None, start_from_folder=None, define_ranges_from_profiles=None, - dfT=torch.randn((2, 2), dtype=torch.double), ModelOptions=None, seedInitial=None, checkForSpecies=True, + tensor_opts = { + "dtype": torch.double, + "device": torch.device("cpu"), + } ): """ Notes: @@ -39,6 +42,8 @@ def initializeProblem( - define_ranges_from_profiles must be PROFILES class """ + dfT = torch.randn((2, 2), **tensor_opts) + if seedInitial is not None: torch.manual_seed(seed=seedInitial) @@ -177,6 +182,7 @@ def initializeProblem( "TypeTarget": portals_fun.MODELparameters["Physics_options"]["TypeTarget"], "TargetCalc": portals_fun.PORTALSparameters["TargetCalc"]}, }, + tensor_opts = tensor_opts ) # *************************************************************************************************** @@ -227,6 +233,7 @@ def initializeProblem( "TypeTarget": portals_fun.MODELparameters["Physics_options"]["TypeTarget"], "TargetCalc": portals_fun.PORTALSparameters["TargetCalc"]}, }, + tensor_opts = tensor_opts ) dictCPs_base_extra = {} @@ -270,33 +277,46 @@ def initializeProblem( dictDVs[name] = [dvs_fixed[name][0], base_gradient, dvs_fixed[name][1]] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Define output dictionaries + # Define output dictionaries (order is important, consistent with selectSurrogates) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ofs, name_objectives = [], [] - for ikey in dictCPs_base: - if ikey == "te": - var = "Qe" - elif ikey == "ti": - var = "Qi" - elif ikey == "ne": - var = "Ge" - elif ikey == "nZ": - var = "GZ" - elif ikey == "w0": - var = "Mt" - for i in range(len(portals_fun.MODELparameters["RhoLocations"])): - ofs.append(f"{var}Turb_{i+1}") - ofs.append(f"{var}Neo_{i+1}") + # Turb and neo, ending with last location + + for i in range(len(portals_fun.MODELparameters["RhoLocations"])): + for model in ["Turb", "Neo"]: + for ikey in dictCPs_base: + if ikey == "te": var = "Qe" + elif ikey == "ti": var = "Qi" + elif ikey == "ne": var = "Ge" + elif ikey == "nZ": var = "GZ" + elif ikey == "w0": var = "Mt" + + ofs.append(f"{var}{model}_{i+1}") + + + if f"{var}Res_{i+1}" not in name_objectives: + name_objectives.append(f"{var}Res_{i+1}") + + if portals_fun.PORTALSparameters["surrogateForTurbExch"]: + for i in range(len(portals_fun.MODELparameters["RhoLocations"])): + ofs.append(f"PexchTurb_{i+1}") + + # Tar + + for i in range(len(portals_fun.MODELparameters["RhoLocations"])): + model = "Tar" + for ikey in dictCPs_base: + if ikey == "te": var = "Qe" + elif ikey == "ti": var = "Qi" + elif ikey == "ne": var = "Ge" + elif ikey == "nZ": var = "GZ" + elif ikey == "w0": var = "Mt" - ofs.append(f"{var}Tar_{i+1}") + ofs.append(f"{var}{model}_{i+1}") - name_objectives.append(f"{var}Res_{i+1}") - if portals_fun.PORTALSparameters["surrogateForTurbExch"]: - for i in range(len(portals_fun.MODELparameters["RhoLocations"])): - ofs.append(f"PexchTurb_{i+1}") name_transformed_ofs = [] for of in ofs: @@ -333,8 +353,8 @@ def initializeProblem( Variables[ikey] = prepportals_transformation_variables(portals_fun, ikey) portals_fun.surrogate_parameters = { - "transformationInputs": PORTALStools.produceNewInputs, - "transformationOutputs": PORTALStools.transformPORTALS, + "transformationInputs": PORTALStools.input_transformation_portals, + "transformationOutputs": PORTALStools.output_transformation_portals, "powerstate": portals_fun.powerstate, "applyImpurityGammaTrick": portals_fun.PORTALSparameters[ "applyImpurityGammaTrick" diff --git a/src/mitim_modules/portals/utils/PORTALSoptimization.py b/src/mitim_modules/portals/utils/PORTALSoptimization.py index 61d66baa..5ac0c285 100644 --- a/src/mitim_modules/portals/utils/PORTALSoptimization.py +++ b/src/mitim_modules/portals/utils/PORTALSoptimization.py @@ -104,6 +104,7 @@ def flux_match_surrogate(step,profiles_new, plot_results=True, file_write_csv=No # Create powerstate with the same options as the original portals but with the new profiles + embed() powerstate = STATEtools.powerstate( profiles_new, EvolutionOptions={ @@ -115,6 +116,9 @@ def flux_match_surrogate(step,profiles_new, plot_results=True, file_write_csv=No }, TransportOptions=TransportOptions, TargetOptions=step.surrogate_parameters["powerstate"].TargetOptions, + tensor_opts = { + "dtype": step.surrogate_parameters["powerstate"].dfT.dtype, + "device": step.surrogate_parameters["powerstate"].dfT.device}, ) # Pass powerstate as part of the surrogate_parameters such that transformations now occur with the new profiles diff --git a/src/mitim_modules/powertorch/STATEtools.py b/src/mitim_modules/powertorch/STATEtools.py index e4df8976..c024faec 100644 --- a/src/mitim_modules/powertorch/STATEtools.py +++ b/src/mitim_modules/powertorch/STATEtools.py @@ -10,8 +10,6 @@ from mitim_tools.misc_tools.LOGtools import printMsg as print from IPython import embed -UseCUDAifAvailable = True - # ------------------------------------------------------------------ # POWERSTATE Class # ------------------------------------------------------------------ @@ -32,6 +30,10 @@ def __init__( "TargetCalc": "powerstate" }, }, + tensor_opts = { + "dtype": torch.double, + "device": torch.device("cpu"), + } ): ''' Inputs: @@ -69,15 +71,7 @@ def _ensure_ne_before_nz(lst): self.ProfilesPredicted = _ensure_ne_before_nz(self.ProfilesPredicted) # Default type and device tensor - self.dfT = torch.randn( - (2, 2), - dtype=torch.double, - device=torch.device( - "cpu" - if ((not UseCUDAifAvailable) or (not torch.cuda.is_available())) - else "cuda" - ), - ) + self.dfT = torch.randn((2, 2), **tensor_opts) ''' Potential profiles to evolve (aLX) and their corresponding flux matching diff --git a/src/mitim_modules/vitals/VITALSmain.py b/src/mitim_modules/vitals/VITALSmain.py index 1827e945..0f345fe2 100644 --- a/src/mitim_modules/vitals/VITALSmain.py +++ b/src/mitim_modules/vitals/VITALSmain.py @@ -31,7 +31,7 @@ def default_namelist(optimization_options): class vitals(STRATEGYtools.opt_evaluator): - def __init__(self, folder, namelist=None): + def __init__(self, folder, namelist=None, **kwargs): print( "\n-----------------------------------------------------------------------------------------" ) @@ -45,6 +45,7 @@ def __init__(self, folder, namelist=None): folder, namelist=namelist, default_namelist_function=default_namelist if (namelist is None) else None, + **kwargs ) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/mitim_tools/misc_tools/FARMINGtools.py b/src/mitim_tools/misc_tools/FARMINGtools.py index 2706bf8e..3f8b0f16 100644 --- a/src/mitim_tools/misc_tools/FARMINGtools.py +++ b/src/mitim_tools/misc_tools/FARMINGtools.py @@ -23,8 +23,6 @@ from mitim_tools.misc_tools.CONFIGread import read_verbose_level from IPython import embed -UseCUDAifAvailable = True - """ New handling of jobs in remote or local clusters. Example use: @@ -908,9 +906,6 @@ def ParallelProcedure( else: import multiprocessing - if UseCUDAifAvailable and torch.cuda.is_available(): - multiprocessing.set_start_method("spawn") - """ This way of pooling passes a lock when initializing every child class. It handles a global lock, and then every child can call lock.acquire() and lock.release() diff --git a/src/mitim_tools/misc_tools/IOtools.py b/src/mitim_tools/misc_tools/IOtools.py index e808bd15..f6430dd8 100644 --- a/src/mitim_tools/misc_tools/IOtools.py +++ b/src/mitim_tools/misc_tools/IOtools.py @@ -82,6 +82,7 @@ def __exit__(self, *args): def _get_time(self): self.timeDiff = getTimeDifference(self.timeBeginning, niceText=False) + self.timeDiff_txt = createTimeTXT(self.timeDiff) print(f'{self.name} took {createTimeTXT(self.timeDiff)}') diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py index e72b75e6..2a982ed2 100644 --- a/src/mitim_tools/opt_tools/BOTORCHtools.py +++ b/src/mitim_tools/opt_tools/BOTORCHtools.py @@ -1,6 +1,6 @@ """ ************************************************************************************************************** -This set of tools are custom modifications to BOTORCH or GPYTORCH ones to satisfy my needs +This set of tools are custom modifications to BOTORCH or GPYTORCH ones to satisfy MITIM/PORTALS needs ************************************************************************************************************** """ @@ -10,12 +10,28 @@ from IPython import embed from mitim_tools.misc_tools.LOGtools import printMsg as print -# ---------------------------------------------------------------------------------------------------------------------------- -# SingleTaskGP needs to be modified because I want to input options and outcome transform taking X, otherwise it should be a copy -# ---------------------------------------------------------------------------------------------------------------------------- - - -class ExactGPcustom(botorch.models.gp_regression.SingleTaskGP): +from botorch.models.transforms.input import InputTransform +from botorch.models.transforms.outcome import OutcomeTransform +from botorch.models.utils import validate_input_scaling +from torch import Tensor +from linear_operator.operators import CholLinearOperator, DiagLinearOperator +from typing import Iterable +from torch.nn import ModuleDict +from botorch.posteriors.gpytorch import GPyTorchPosterior +from botorch.posteriors.posterior import Posterior +from linear_operator.operators import BlockDiagLinearOperator, CatLinearOperator, BlockInterleavedLinearOperator +from gpytorch.distributions import MultitaskMultivariateNormal +from gpytorch.distributions import MultivariateNormal + +''' +******************************************************************************* +SingleTaskGP needs to be custom in MITIM because: + - Training occurs in transformed space directly, to allow for _added points + - Outcome transform calls are modified to take X + - Options are provided (secondary) +******************************************************************************* +''' +class SingleTaskGP_MITIM(botorch.models.gp_regression.SingleTaskGP): def __init__( self, train_X, @@ -24,78 +40,117 @@ def __init__( input_transform=None, outcome_transform=None, surrogateOptions={}, - variables=None, + #variables=None, train_X_added=torch.Tensor([]), train_Y_added=torch.Tensor([]), train_Yvar_added=torch.Tensor([]), ): """ - _added refers to already-transformed variables that are added from table + Notes: + - train_X is raw untransformed, [batch, dx] + - train_Y is raw untransformed, [batch, dy] + - train_Yvar is raw untransformed, [batch, dy] + - _added refers to already-transformed variables (tf1) that are added from table: + train_X_added is raw transformed, [dytr, batch, dxtr] + train_Y_added is raw transformed, [batch, dytr] + train_Yvar_added is raw transformed,[batch, dytr] + - The original SingleTaskGP receives train_X and train_Y completely raw, + and inside of its __init__, it does transform train_Y and treats it like that + throughout. For train_X, it passes the original, but calculates dimensions + on the transformed version. """ + # ----------------------------------------------------------------------- + # Surrogate model options + # ----------------------------------------------------------------------- + TypeMean = surrogateOptions.get("TypeMean", 0) TypeKernel = surrogateOptions.get("TypeKernel", 0) FixedNoise = surrogateOptions.get("FixedNoise", False) ConstrainNoise = surrogateOptions.get("ConstrainNoise", -1e-4) learn_additional_noise = surrogateOptions.get("ExtraNoise", False) print("\t\t* Surrogate model options:") - print( - f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}" - ) + print(f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}") - self.store_training( - train_X, - train_X_added, - train_Y, - train_Y_added, - train_Yvar, - train_Yvar_added, - input_transform, - outcome_transform, - ) + # ** Store training data - """ - ---------------------------------------------------------------------------------------- - What set_dimensions did, and select things to train (already transformed and normalized) - ---------------------------------------------------------------------------------------- - """ + # x, y are raw untransformed, and I want raw transformed. xa, ya are raw transformed + #x_tr = input_transform["tf1"](train_X)if input_transform is not None else train_X + #y_tr, yv_tr = outcome_transform["tf1"](train_X, train_Y, train_Yvar) if outcome_transform is not None else train_Y, train_Yvar + #self.train_X_use = torch.cat((train_X_added, x_tr), axis=-2) + #self.train_Y_use = torch.cat((train_Y_added, y_tr), axis=-2) + #self.train_Yvar_use = torch.cat((train_Yvar_added, yv_tr), axis=-2) - # Grab num_outputs - self._num_outputs = train_Y.shape[-1] - # Grab ard_num_dims - if train_X.shape[0] > 0: - with torch.no_grad(): - transformed_X = self.transform_inputs( - X=train_X, input_transform=input_transform - ) - self.ard_num_dims = transformed_X.shape[-1] - else: - self.ard_num_dims = train_X_added.shape[-1] - transformed_X = torch.empty((0, self.ard_num_dims)).to(train_X) - - # Transform outcomes + self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) + with torch.no_grad(): + transformed_X = self.transform_inputs( + X=train_X, input_transform=input_transform + ) if outcome_transform is not None: train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar) - - # Added points are raw transformed, so I need to normalize them - if train_X_added.shape[0] > 0: - train_X_added = input_transform["tf2"](train_X_added) - train_Y_added, train_Yvar_added = outcome_transform["tf2"]( - train_Y_added, train_Yvar_added - ) - # ----- - - train_X_usedToTrain = torch.cat((transformed_X, train_X_added), axis=0) - train_Y_usedToTrain = torch.cat((train_Y, train_Y_added), axis=0) - train_Yvar_usedToTrain = torch.cat((train_Yvar, train_Yvar_added), axis=0) - - self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions( - train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain + # Validate again after applying the transforms + self._validate_tensor_args(X=transformed_X, Y=train_Y, Yvar=train_Yvar) + ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None) + validate_input_scaling( + train_X=transformed_X, + train_Y=train_Y, + train_Yvar=train_Yvar, + ignore_X_dims=ignore_X_dims, + ) + self._set_dimensions(train_X=train_X, train_Y=train_Y) + train_X, train_Y, train_Yvar = self._transform_tensor_args( + X=train_X, Y=train_Y, Yvar=train_Yvar ) - train_Y_usedToTrain = train_Y_usedToTrain.squeeze(-1) - train_Yvar_usedToTrain = train_Yvar_usedToTrain.squeeze(-1) + self._aug_batch_shape = transformed_X.shape[:-2] + train_X_use = train_X + train_Y_use = train_Y + train_Yvar_use = train_Yvar + self.ard_num_dims = transformed_X.shape[-1] + + + + # # Grab num_outputs + # self._num_outputs = train_Y.shape[-1] + + # # Grab ard_num_dims + # if train_X.shape[0] > 0: + # with torch.no_grad(): + # transformed_X = self.transform_inputs(X=train_X, input_transform=input_transform) + # self.ard_num_dims = transformed_X.shape[-1] + # else: + # self.ard_num_dims = train_X_added.shape[-1] + # transformed_X = torch.empty((0, self.ard_num_dims)).to(train_X) + + # # Transform outcomes + # if outcome_transform is not None: + # train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar) + + # # Added points are raw transformed, so I need to normalize them + # if train_X_added.shape[0] > 0: + # train_X_added = input_transform["tf2"](train_X_added) + # train_Y_added, train_Yvar_added = outcome_transform["tf3"](*outcome_transform["tf2"](train_Y_added, train_Yvar_added)) + + # # Concatenate the added points + # train_X_use = torch.cat((transformed_X, train_X_added), axis=-2) + # train_Y_use = torch.cat((train_Y, train_Y_added), axis=-2) + # train_Yvar_use = torch.cat((train_Yvar, train_Yvar_added), axis=-2) + + # # Validate again after applying the transforms + # self._validate_tensor_args(X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use) + # ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None) + # validate_input_scaling( + # train_X=train_X_use, + # train_Y=train_Y_use, + # train_Yvar=train_Yvar_use, + # ignore_X_dims=ignore_X_dims, + # ) + # self._set_dimensions(train_X=train_X_use, train_Y=train_Y_use) + + # train_X_use, train_Y_use, train_Yvar_use = self._transform_tensor_args( + # X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use + # ) """ ----------------------------------------------------------------------- @@ -110,7 +165,7 @@ def __init__( likelihood = ( gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood( - noise=train_Yvar_usedToTrain.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning) + noise=train_Yvar_use.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning) batch_shape=self._aug_batch_shape, learn_additional_noise=learn_additional_noise, ) @@ -145,12 +200,7 @@ def __init__( ----------------------------------------------------------------------- """ - gpytorch.models.exact_gp.ExactGP.__init__( - self, - train_inputs=train_X_usedToTrain, - train_targets=train_Y_usedToTrain, - likelihood=likelihood, - ) + gpytorch.models.exact_gp.ExactGP.__init__(self, train_inputs=train_X_use, train_targets=train_Y_use, likelihood=likelihood) """ ----------------------------------------------------------------------- @@ -160,20 +210,16 @@ def __init__( if TypeMean == 0: self.mean_module = gpytorch.means.constant_mean.ConstantMean( - batch_shape=self._aug_batch_shape - ) + batch_shape=self._aug_batch_shape ) elif TypeMean == 1: self.mean_module = gpytorch.means.linear_mean.LinearMean( - self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True - ) - elif TypeMean == 2: - self.mean_module = MITIM_LinearMeanGradients( - batch_shape=self._aug_batch_shape, variables=variables - ) - elif TypeMean == 3: - self.mean_module = MITIM_CriticalGradient( - batch_shape=self._aug_batch_shape, variables=variables - ) + self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True ) + # elif TypeMean == 2: + # self.mean_module = MITIM_LinearMeanGradients( + # batch_shape=self._aug_batch_shape, variables=variables ) + # elif TypeMean == 3: + # self.mean_module = MITIM_CriticalGradient( + # batch_shape=self._aug_batch_shape, variables=variables ) """ ----------------------------------------------------------------------- @@ -186,9 +232,7 @@ def __init__( outputscale_prior = gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15) # Do not allow too small lengthscales? - lengthscale_constraint = ( - None # gpytorch.constraints.constraints.GreaterThan(0.05) - ) + lengthscale_constraint = None # gpytorch.constraints.constraints.GreaterThan(0.05) self._subset_batch_dict["covar_module.raw_outputscale"] = -1 self._subset_batch_dict["covar_module.base_kernel.raw_lengthscale"] = -3 @@ -239,31 +283,9 @@ def __init__( self.outcome_transform = outcome_transform if input_transform is not None: self.input_transform = input_transform - - self.to(train_X) - - def store_training(self, x, xa, y, ya, yv, yva, input_transform, outcome_transform): - - # x, y are raw untransformed, and I want raw transformed - if input_transform is not None: - x_tr = input_transform["tf1"](x) - else: - x_tr = x - if outcome_transform is not None: - y_tr, yv_tr = outcome_transform["tf1"](x, y, yv) - else: - y_tr, yv_tr = y, yv - - # xa, ya are raw transformed - xa_tr = xa - ya_tr, yva_tr = ya, yva - - self.train_X_usedToTrain = torch.cat((xa_tr, x_tr), axis=0) - self.train_Y_usedToTrain = torch.cat((ya_tr, y_tr), axis=0) - self.train_Yvar_usedToTrain = torch.cat((yva_tr, yv_tr), axis=0) + self.to(train_X_use) # Modify posterior call from BatchedMultiOutputGPyTorchModel to call posterior untransform with "X" - def posterior( self, X, @@ -272,6 +294,7 @@ def posterior( posterior_transform=None, **kwargs, ): + self.eval() # make sure model is in eval mode # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time @@ -307,72 +330,13 @@ def posterior( return posterior_transform(posterior) return posterior -# ---------------------------------------------------------------------------------------------------------------------------- -# ModelListGP needs to be modified to allow me to have "common" parameters to models, to not run at every transformation again -# ---------------------------------------------------------------------------------------------------------------------------- - - -class ModifiedModelListGP(botorch.models.model_list_gp_regression.ModelListGP): - def __init__(self, *gp_models): - super().__init__(*gp_models) - - def prepareToGenerateCommons(self): - self.models[0].input_transform.tf1.flag_to_store = True - # Make sure that this ModelListGP evaluation is fresh - if ( - "parameters_combined" - in self.models[0].input_transform.tf1.surrogate_parameters - ): - del self.models[0].input_transform.tf1.surrogate_parameters[ - "parameters_combined" - ] - - def cold_startCommons(self): - self.models[0].input_transform.tf1.flag_to_store = False - if ( - "parameters_combined" - in self.models[0].input_transform.tf1.surrogate_parameters - ): - del self.models[0].input_transform.tf1.surrogate_parameters[ - "parameters_combined" - ] - - def transform_inputs(self, X): - self.prepareToGenerateCommons() - X_tr = super().transform_inputs(X) - self.cold_startCommons() - - return X_tr - - def posterior( - self, - X, - output_indices=None, - observation_noise=False, - posterior_transform=None, - **kwargs, - ): - self.prepareToGenerateCommons() - posterior = super().posterior( - X, - output_indices=output_indices, - observation_noise=observation_noise, - posterior_transform=posterior_transform, - **kwargs, - ) - self.cold_startCommons() - - return posterior - - -# ---------------------------------------------------------------------------------------------------------------------------- -# I need my own transformation based on physics -# ---------------------------------------------------------------------------------------------------------------------------- - - -class Transformation_Inputs( - botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module -): +''' +******************************************************************************* +Physics transformation for inputs +******************************************************************************* +''' +class input_physics_transform( + botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module): def __init__( self, output, @@ -403,9 +367,7 @@ def __init__( @botorch.models.transforms.utils.subset_transform def _transform(self, X): if (self.output is not None) and (self.flag_to_evaluate): - Xtr, parameters_combined = self.surrogate_parameters[ - "transformationInputs" - ]( + Xtr, parameters_combined = self.surrogate_parameters["transformationInputs"]( X, self.output, self.surrogate_parameters, @@ -425,62 +387,71 @@ def _transform(self, X): def _untransform(self, X): raise NotImplementedError("[MITIM] This situation has not been implemented yet") +''' +******************************************************************************* +Physics transformation for outputs. Notes: + - It needs to take "X" as well +******************************************************************************* +''' -# ---------------------------------------------------------------------------------------------------------------------------- -# I need my own outcome transformation based on physics and that takes "X" as well -# ---------------------------------------------------------------------------------------------------------------------------- - - -# Copy standardize but modify in untransform the "std" which is my factor! -class Transformation_Outcomes(botorch.models.transforms.outcome.Standardize): - def __init__(self, m, output, surrogate_parameters): - super().__init__(m) - - self.output = output +class outcome_physics_transform(botorch.models.transforms.outcome.OutcomeTransform): + def __init__(self, m, outputs_names, surrogate_parameters): + super().__init__() + self.outputs_names = outputs_names self.surrogate_parameters = surrogate_parameters self.flag_to_evaluate = True - def forward(self, X, Y, Yvar): - if (self.output is not None) and (self.flag_to_evaluate): - factor = self.surrogate_parameters["transformationOutputs"]( - X, self.surrogate_parameters, self.output - ).to(X.device) - else: - factor = Y.mean(dim=-2, keepdim=True).to(Y.device) * 0.0 + 1.0 - - self.stdvs = factor - self.means = self.stdvs * 0.0 - self._stdvs_sq = self.stdvs.pow(2) - - # When calling the forward method of Standardize, do not recalculate mean and stdvs (never be on training) - self._is_trained = torch.tensor(True) - self.training = False - # ---------------------------------------- - - return super().forward(Y, Yvar) + def _is_linear(self): + return True - def untransform_posterior(self, X, posterior): - if (self.output is not None) and (self.flag_to_evaluate): + def grab_factor(self, X): + if (self.outputs_names is not None) and (self.flag_to_evaluate): factor = self.surrogate_parameters["transformationOutputs"]( - X, self.surrogate_parameters, self.output + X, self.surrogate_parameters, self.outputs_names ).to(X.device) - - self.stdvs = factor - self.means = self.stdvs * 0.0 - self._stdvs_sq = self.stdvs.pow(2) - return super().untransform_posterior(posterior) - else: - return posterior + factor = torch.ones_like(X) + return factor - def untransform(self, Y, Yvar): - raise NotImplementedError("[MITIM] This situation has not been implemented yet") + def forward(self, X, Y, Yvar): + factor = self.grab_factor(X) + return Y / factor, Yvar / factor.pow(2) if Yvar is not None else None + def untransform(self, X, Y, Yvar): + factor = self.grab_factor(X) + return Y * factor, Yvar * factor.pow(2) if Yvar is not None else None -# Because I need it to take X too (for physics only, which is always the first tf) + def untransform_posterior(self, X, posterior): + ''' + PRF: Please check, this is an attempt to + replicate the untransform_posterior method from Standardize + ''' + + # Grab linear factor + factor = self.grab_factor(X) + + # Grab the posterior distribution to modify + mvn = posterior.distribution + lcv = mvn.lazy_covariance_matrix + + # Calculate the new mean and covariance + mean_tf = factor * mvn.mean + scale_mat = DiagLinearOperator(factor.squeeze(-1)) + covar_tf = scale_mat @ lcv @ scale_mat + + # Recreate the untranformed posterior + kwargs = {"interleaved": mvn._interleaved} if posterior._is_mt else {} + mvn_tf = mvn.__class__(mean=mean_tf, covariance_matrix=covar_tf, **kwargs) + return botorch.posteriors.gpytorch.GPyTorchPosterior(mvn_tf) + +''' +******************************************************************************* +ChainedOutcomeTransform needs to be custom in MITIM because the first +transformation (tf1) is assumed to be the physics one and needs to take X +******************************************************************************* +''' class ChainedOutcomeTransform( - botorch.models.transforms.outcome.ChainedOutcomeTransform -): + botorch.models.transforms.outcome.ChainedOutcomeTransform): def __init__(self, **transforms): super().__init__(**transforms) @@ -496,14 +467,257 @@ def untransform_posterior(self, X, posterior): for i, tf in enumerate(reversed(self.values())): posterior = ( tf.untransform_posterior(X, posterior) - if i == 1 + if i == len(self.values())-1 else tf.untransform_posterior(posterior) ) # Only physics transformation (tf1) takes X - + return posterior def untransform(self, X, Y, Yvar): raise NotImplementedError("[MITIM] This situation has not been implemented yet") +''' +******************************************************************************* +BatchBroadcastedInputTransform needs to be custom in MITIM because of the no +repetition of expensive parameters +******************************************************************************* +''' +class BatchBroadcastedInputTransform(InputTransform, ModuleDict): + r"""An input transform representing a list of transforms to be broadcasted.""" + + def __init__( + self, + transforms: list[InputTransform], + broadcast_index: int = -3, + ) -> None: + r"""A transform list that is broadcasted across a batch dimension specified by + `broadcast_index`. This is allows using a batched Gaussian process model when + the input transforms are different for different batch dimensions. + + Args: + transforms: The transforms to broadcast across the first batch dimension. + The transform at position i in the list will be applied to `X[i]` for + a given input tensor `X` in the forward pass. + broadcast_index: The tensor index at which the transforms are broadcasted. + + Example: + >>> tf1 = Normalize(d=2) + >>> tf2 = InputStandardize(d=2) + >>> tf = BatchBroadcastedTransformList(transforms=[tf1, tf2]) + """ + super().__init__() + self.transform_on_train = False + self.transform_on_eval = False + self.transform_on_fantasize = False + self.transforms = transforms + if broadcast_index >= 0: + raise ValueError("A non-negative broadcast index is not supported yet.") + if broadcast_index in (-2, -1): + raise ValueError( + "The broadcast index cannot be -2 and -1, as these indices are reserved" + " for non-batch, data and input dimensions." + ) + self.broadcast_index = broadcast_index + self.is_one_to_many = self.transforms[0].is_one_to_many + if not all(tf.is_one_to_many == self.is_one_to_many for tf in self.transforms): + raise ValueError( # output shapes of transforms must be the same + "All transforms must have the same is_one_to_many property." + ) + for tf in self.transforms: + self.transform_on_train |= tf.transform_on_train + self.transform_on_eval |= tf.transform_on_eval + self.transform_on_fantasize |= tf.transform_on_fantasize + + def transform(self, X: Tensor) -> Tensor: + r"""Transform the inputs to a model. + + Individual transforms are applied in sequence and results are returned as + a batched tensor. + + Args: + X: A `batch_shape x n x d`-dim tensor of inputs. + + Returns: + A `batch_shape x n x d`-dim tensor of transformed inputs. + """ + + self.prepare_expensive_parameters() + v = torch.stack( + [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)], + dim=self.broadcast_index, + ) + self.restart_expensive_parameters() + return v + + def prepare_expensive_parameters(self): + self.transforms[0].flag_to_store = True + # Make sure that this evaluation is fresh + if "parameters_combined" in self.transforms[0].surrogate_parameters: + del self.transforms[0].surrogate_parameters["parameters_combined"] + + def restart_expensive_parameters(self): + self.transforms[0].flag_to_store = False + if "parameters_combined" in self.transforms[0].surrogate_parameters: + del self.transforms[0].surrogate_parameters["parameters_combined"] + + def untransform(self, X: Tensor) -> Tensor: + r"""Un-transform the inputs to a model. + + Un-transforms of the individual transforms are applied in reverse sequence. + + Args: + X: A `batch_shape x n x d`-dim tensor of transformed inputs. + + Returns: + A `batch_shape x n x d`-dim tensor of un-transformed inputs. + """ + # return torch.stack( + # [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)], + # dim=self.broadcast_index, + # ) + # + # return self.transforms[0].untransform(X) + self.prepare_expensive_parameters() + Xt = torch.stack( + [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)], + dim=self.broadcast_index, + ) + self.restart_expensive_parameters() + Xt = Xt.unique(dim=self.broadcast_index) + # since we are assuming that this batch dimension was added solely + # because of different transforms, rather than different original inputs X. + assert Xt.shape[self.broadcast_index] == 1 + return Xt.squeeze(self.broadcast_index) + + def equals(self, other: InputTransform) -> bool: + r"""Check if another input transform is equivalent. + + Args: + other: Another input transform. + + Returns: + A boolean indicating if the other transform is equivalent. + """ + return ( + super().equals(other=other) + and all(t1.equals(t2) for t1, t2 in zip(self.transforms, other.transforms)) + and (self.broadcast_index == other.broadcast_index) + ) + + def preprocess_transform(self, X: Tensor) -> Tensor: + r"""Apply transforms for preprocessing inputs. + + The main use cases for this method are 1) to preprocess training data + before calling `set_train_data` and 2) preprocess `X_baseline` for noisy + acquisition functions so that `X_baseline` is "preprocessed" with the + same transformations as the cached training inputs. + + Args: + X: A `batch_shape x n x d`-dim tensor of inputs. + + Returns: + A `batch_shape x n x d`-dim tensor of (transformed) inputs. + """ + + + self.prepare_expensive_parameters() + v = torch.stack( + [t.preprocess_transform(Xi) for Xi, t in self._Xs_and_transforms(X)], + dim=self.broadcast_index, + ) + self.restart_expensive_parameters() + return v + + def _Xs_and_transforms(self, X: Tensor) -> Iterable[tuple[Tensor, InputTransform]]: + r"""Returns an iterable of sub-tensors of X and their associated transforms. + + Args: + X: A `batch_shape x n x d`-dim tensor of inputs. + + Returns: + An iterable containing tuples of sub-tensors of X and their transforms. + """ + # transform_shape = ( + # len(input_transform.transforms), + # *(1 for _ in range(abs(self.broadcast_index) - 1)), + # ) + # print(f"{transform_shape = }") + # print(f"{X.shape = }") + # TODO: Add dimension rather than broadcasting over the inputs. + + # broadcast_shape = torch.broadcast_shapes(transform_shape, X.shape) + # X_expanded = X.expand(broadcast_shape) + # Xs = X_expanded.unbind(dim=self.broadcast_index) + # return zip(Xs, self.transforms) + return zip([X for _ in self.transforms], self.transforms) + +class OutcomeToBatchDimension(OutcomeTransform): + """Transform permuting dimensions in the outcome tensor.""" + + def __init__(self): + super().__init__() + + def forward( + self, Y: Tensor, Yvar: Tensor | None = None + ) -> tuple[Tensor, Tensor | None]: + return Y.unsqueeze(-3).transpose(-3, -1), ( + Yvar.unsqueeze(-3).transpose(-3, -1) #if Yvar else None + ) + + def untransform( + self, Y: Tensor, Yvar: Tensor | None = None + ) -> tuple[Tensor, Tensor | None]: + assert Y.shape[-1] == 1 + Y_perm = Y.transpose(-3, -1).squeeze(-3) + Yvar_perm = Yvar.transpose(-3, -1).squeeze(-3) if Yvar else None + return Y_perm, Yvar_perm + + @property + def _is_linear(self) -> bool: + return True + + def untransform_posterior(self, posterior: Posterior) -> Posterior: + mvn = posterior.mvn + mean = self.untransform(posterior.mean)[0] + covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3) + dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar) + return GPyTorchPosterior(distribution=dis) + +''' +******************************************************************************* +ModelListGP needs to be custom in MITIM because: + - I shouldn't run the full transformation at every posterior call, only + once per ModelList. This will allow me to have "common" parameters + to models, to not run at every transformation again +******************************************************************************* +''' +class ModelListGP_MITIM(botorch.models.model_list_gp_regression.ModelListGP): + def __init__(self, *gp_models): + super().__init__(*gp_models) + + def prepare_expensive_parameters(self): + self.models[0].input_transform.tf1.flag_to_store = True + # Make sure that this ModelListGP evaluation is fresh + if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \ + ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters): + del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"] + + def restart_expensive_parameters(self): + self.models[0].input_transform.tf1.flag_to_store = False + if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \ + ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters): + del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"] + + def transform_inputs(self, X): + self.prepare_expensive_parameters() + X_tr = super().transform_inputs(X) + self.restart_expensive_parameters() + return X_tr + + def posterior(self, *args, **kwargs): + self.prepare_expensive_parameters() + posterior = super().posterior(*args, **kwargs) + self.restart_expensive_parameters() + return posterior # ---------------------------------------------------------------------------------------------------------------------------- @@ -552,7 +766,7 @@ def forward(self, X): return acq # ---------------------------------------------------------------------------------------------------------------------------- -# Custom kernels +# Custom kernels and means # ---------------------------------------------------------------------------------------------------------------------------- class MITIM_NNKernel(gpytorch.kernels.Kernel): @@ -641,7 +855,6 @@ def forward( return val - class MITIM_ConstantKernel(gpytorch.kernels.Kernel): has_lengthscale = False @@ -672,17 +885,12 @@ def forward( return val - -# ---------------------------------------------------------------------------------------------------------------------------- -# Custom means -# ---------------------------------------------------------------------------------------------------------------------------- - -# mitim application: If a variable is a gradient, do linear, if not, do just bias class MITIM_LinearMeanGradients(gpytorch.means.mean.Mean): + # PORTALS application: If a variable is a gradient, do linear, if not, do just bias def __init__(self, batch_shape=torch.Size(), variables=None, **kwargs): super().__init__() - # Indeces of variables that are gradient, so subject to CG behavior + # Indeces of variables that are gradient, so subject to critical gradient behavior grad_vector = [] if variables is not None: for i, variable in enumerate(variables): @@ -705,7 +913,6 @@ def forward(self, x): res = x[..., self.indeces_grad].matmul(self.weights_lin).squeeze(-1) + self.bias return res - class MITIM_CriticalGradient(gpytorch.means.mean.Mean): def __init__(self, batch_shape=torch.Size(), variables=None, **kwargs): super().__init__() @@ -751,3 +958,5 @@ def forward(self, x): + self.bias ) return res + + diff --git a/src/mitim_tools/opt_tools/OPTtools.py b/src/mitim_tools/opt_tools/OPTtools.py index 63c0da47..40bd926f 100644 --- a/src/mitim_tools/opt_tools/OPTtools.py +++ b/src/mitim_tools/opt_tools/OPTtools.py @@ -834,7 +834,7 @@ def plotInfo( ) # ---------- Plot Residue GRAPHICStools.plotMultiVariate( - np.transpose(np.atleast_2d(infoOPT["y_res_start"])), + -np.transpose(np.atleast_2d(infoOPT["y_res_start"])), axs=axR, marker="s", markersize=ms, @@ -869,7 +869,7 @@ def plotInfo( ) # ---------- Plot Residue GRAPHICStools.plotMultiVariate( - np.transpose(np.atleast_2d(infoOPT["y_res"])), + -np.transpose(np.atleast_2d(infoOPT["y_res"])), axs=axR, marker="s", markersize=ms, @@ -894,17 +894,17 @@ def plotInfo( if not plotStart: y = ( - -infoOPT["acq_evaluated"] + infoOPT["acq_evaluated"] if "acq_evaluated" in infoOPT - else -infoOPT["y_res"] + else infoOPT["y_res"] ) else: - y = -infoOPT["y_res_start"] + y = infoOPT["y_res_start"] if not plotStart: - yo = -infoOPT["y_res"][0] + yo = infoOPT["y_res"][0] else: - yo = -infoOPT["y_res_start"][0] + yo = infoOPT["y_res_start"][0] x_origin = 0 + it_start x_last = len(y) - 1 + it_start diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py index bffb5af4..77a0f67d 100644 --- a/src/mitim_tools/opt_tools/STEPtools.py +++ b/src/mitim_tools/opt_tools/STEPtools.py @@ -1,5 +1,4 @@ import copy -import datetime import torch import botorch import numpy as np @@ -30,151 +29,141 @@ def __init__( StrategyOptions={}, BOmetrics=None, currentIteration=1, - ): + ): """ - train_Ystd is in standard deviations (square root of the variance), absolute magnitude - Rule: X_Y are provided in absolute units. Normalization has to happen inside each surrogate_model, - and de-normalized before giving results to the outside of the function + Notes: + - train_Ystd is in standard deviations (square root of the variance), absolute magnitude + - X_Y are provided untransformed and unnormalized. Normalization has to happen inside each + surrogate_model, and de-normalized before giving results to the outside of the function """ self.train_X, self.train_Y, self.train_Ystd = train_X, train_Y, train_Ystd - - """ - Check dimensions - - train_X should be (num_train,dimX) - - train_Y should be (num_train,dimY) - - train_Ystd should be (num_train,dimY) or just one float representing all values - """ - - if len(self.train_X.shape) < 2: - print( - "--> train x only had 1 dimension, assuming that it has only 1 dimension" - ) - self.train_X = np.transpose(np.atleast_2d(self.train_X)) - - if len(self.train_Y.shape) < 2: - print( - "--> train y only had 1 dimension, assuming that it has only 1 dimension" - ) - self.train_Y = np.transpose(np.atleast_2d(self.train_Y)) - - if ( - isinstance(self.train_Ystd, float) - or isinstance(self.train_Ystd, int) - or len(self.train_Ystd.shape) < 2 - ): - print( - "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms" - ) - if self.train_Ystd > 0: - print( - "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms" - ) - self.train_Ystd = self.train_Y * 0.0 + self.train_Ystd - else: - print( - "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in relative terms" - ) - self.train_Ystd = self.train_Y * np.abs(self.train_Ystd) - - if len(self.train_Ystd.shape) < 2: - print( - "--> train y noise only had 1 dimension, assuming that it has only 1 dimension" - ) - self.train_Ystd = np.transpose(np.atleast_2d(self.train_Ystd)) - - # **** Get argumnets into this class - self.bounds = bounds self.stepSettings = stepSettings self.BOmetrics = BOmetrics self.currentIteration = currentIteration self.StrategyOptions = StrategyOptions - # **** Step settings + # **** Step Settings self.surrogateOptions = self.stepSettings["optimization_options"]["surrogateOptions"] self.acquisition_type = self.stepSettings["optimization_options"]["acquisition_type"] self.acquisition_params = self.stepSettings["optimization_options"]["acquisition_params"] self.favor_proximity_type = self.stepSettings["optimization_options"]["favor_proximity_type"] self.optimizers = self.stepSettings["optimization_options"]["optimizers"] self.outputs = self.stepSettings["outputs"] + self.outputs_transformed = self.stepSettings["name_transformed_ofs"] self.dfT = self.stepSettings["dfT"] self.best_points_sequence = self.stepSettings["best_points_sequence"] self.fileOutputs = self.stepSettings["fileOutputs"] self.surrogate_parameters = surrogate_parameters + # **** Check dimensions + self._check_dimensions() + # **** From standard deviation to variance self.train_Yvar = self.train_Ystd**2 - def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): + def fit_step(self, avoidPoints=None, fit_output_contains=None): """ Notes: - - Note that fitWithTrainingDataIfContains = 'Tar' would only use the train_X,Y,Yvar tensors - to fit those surrogate variables that contain 'Tar' in their names. This is useful when in - PORTALS I want to simply use the training in a file and not directly from train_X,Y,Yvar for - the fluxes but I do want *new* target calculation - """ - - if avoidPoints is None: - avoidPoints = [] - + - fit_output_contains = 'Tar' would only use the train_X,Y,Yvar tensors + to fit those surrogate variables that contain 'Tar' in their names. This is useful when in + PORTALS I want to simply use the training in a file and not directly from train_X,Y,Yvar for + the fluxes but I do want new target calculation """ - ********************************************************************************************************************* - Preparing for fit - ********************************************************************************************************************* - """ # Prepare case information. Copy because I'll be removing outliers - self.x, self.y, self.yvar = ( - copy.deepcopy(self.train_X), - copy.deepcopy(self.train_Y), - copy.deepcopy(self.train_Yvar), - ) + self.x = copy.deepcopy(self.train_X) + self.y = copy.deepcopy(self.train_Y) + self.yvar = copy.deepcopy(self.train_Yvar) # Add outliers to avoid points (it cannot happen inside of SURROGATEtools or it will fail at combining) - self.avoidPoints = copy.deepcopy(avoidPoints) - self.curate_outliers() + self.avoidPoints = copy.deepcopy(avoidPoints) if avoidPoints is not None else [] + self._curate_outliers() if self.fileOutputs is not None: with open(self.fileOutputs, "a") as f: f.write("\n\n-----------------------------------------------------") f.write("\n * Fitting GP models to training data...") - print( - f"\n~~~~~~~ Performing fitting with {len(self.train_X)-len(self.avoidPoints)} training points ({len(self.avoidPoints)} avoided from {len(self.train_X)} total) ~~~~~~~~~~\n" + + # Performing Fit + + print(f"\n~~~~~~~ Fitting with {len(self.train_X)-len(self.avoidPoints)} training points ({len(self.avoidPoints)} avoided from {len(self.train_X)} total) ~~~~~~~~~~\n") + + with IOtools.timer(name = "\n\t- Fitting", name_timer = '\t\t- Time: ') as t: + + self.GP = {} + self._fit_multioutput_model(); self.GP["combined_model"] = self.GP["mo_model"] + #self._fit_individual_models(fit_output_contains=fit_output_contains) + + if self.fileOutputs is not None: + with open(self.fileOutputs, "a") as f: + f.write(f" (took total of {t.timeDiff_txt})") + + def _fit_multioutput_model(self): + + # Base model + self.GP["mo_model"] = SURROGATEtools.surrogate_model.only_define( + self.x, + self.y, + self.yvar, + self.surrogate_parameters, + outputs=self.outputs, + outputs_transformed=self.outputs_transformed, + bounds=self.bounds, + dfT=self.dfT, + surrogateOptions=self.surrogateOptions, ) - """ - ********************************************************************************************************************* - Performing Fit - ********************************************************************************************************************* - """ + # - + surrogateOptions_dict = self.surrogateOptions["selectSurrogates"](self.outputs, self.surrogateOptions) + + gps, j = [], 0 + for i in surrogateOptions_dict: + gps.append( + SURROGATEtools.surrogate_model( + self.x, + self.y[:, j:i], + self.yvar[:, j:i], + self.surrogate_parameters, + outputs=self.outputs[j:i], + outputs_transformed=self.outputs_transformed[j:i], + bounds=self.bounds, + dfT=self.dfT, + surrogateOptions=surrogateOptions_dict[i], + ) + ) + j = i + + # Fitting + gpmodels = [] + for gp in gps: + gp.fit() + gpmodels.append(gp.gpmodel) + + # Joining + self.GP["mo_model"].gpmodel = BOTORCHtools.ModelListGP_MITIM(*gpmodels) + + def _fit_individual_models(self, fit_output_contains=None): - self.GP = {"individual_models": [None] * self.y.shape[-1]} fileTraining = IOtools.expandPath(self.stepSettings['folderOutputs']) / "surrogate_data.csv" fileBackup = fileTraining.parent / "surrogate_data.csv.bak" if fileTraining.exists(): fileTraining.replace(fileBackup) print("--> Fitting multiple single-output models and creating composite model") - time1 = datetime.datetime.now() + + self.GP["individual_models"] = [None] * self.y.shape[-1] for i in range(self.y.shape[-1]): - outi = self.outputs[i] if (self.outputs is not None) else None - # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X - specialTreatment = ( - (outi is not None) - and (fitWithTrainingDataIfContains is not None) - and (fitWithTrainingDataIfContains not in outi) - ) - # ----------------------------------------------------------------------------------------------------------------------------------- - - outi_transformed = ( - self.stepSettings["name_transformed_ofs"][i] - if (self.stepSettings["name_transformed_ofs"] is not None) - else outi - ) + # Grab name of output (raw and transformed) + output_this = self.outputs[i] if (self.outputs is not None) else None + output_this_tr = self.outputs_transformed[i] if (self.outputs_transformed is not None) else None + # specialTreatment is applied when I only want to use training data from a file, not from train_X + specialTreatment = (output_this is not None) and (fit_output_contains is not None) and (fit_output_contains not in output_this) + # --------------------------------------------------------------------------------------------------- # Define model-specific functions for this output # --------------------------------------------------------------------------------------------------- @@ -182,41 +171,25 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): surrogateOptions = copy.deepcopy(self.surrogateOptions) # Then, depending on application (e.g. targets in mitim are fitted differently) - if ( - "selectSurrogate" in surrogateOptions - and surrogateOptions["selectSurrogate"] is not None - ): - surrogateOptions = surrogateOptions["selectSurrogate"]( - outi, surrogateOptions - ) + if ("selectSurrogates" in surrogateOptions) and (surrogateOptions["selectSurrogates"] is not None): + surrogateOptions = surrogateOptions["selectSurrogates"](output_this, surrogateOptions) # --------------------------------------------------------------------------------------------------- # To avoid problems with fixed values (e.g. calibration terms that are fixed) # --------------------------------------------------------------------------------------------------- threshold_to_consider_fixed = 1e-6 - MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs( - self.y.mean() - ) + MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(self.y.mean()) - if ( - np.isnan(MaxRelativeDifference) - or ( - (self.y.shape[0] > 1) - and ((MaxRelativeDifference < threshold_to_consider_fixed).all()) - ) - ) and (not specialTreatment): - print( - f"\t- Identified that outputs did not change, utilizing constant kernel for {outi}", - typeMsg="w", - ) + FixedValue = False + if (np.isnan(MaxRelativeDifference) or \ + ((self.y.shape[0] > 1) and ((MaxRelativeDifference < threshold_to_consider_fixed).all())) + ) and (not specialTreatment): + print(f"\t- Identified that outputs did not change, utilizing constant kernel for {output_this}",typeMsg="w",) FixedValue = True surrogateOptions["TypeMean"] = 0 surrogateOptions["TypeKernel"] = 6 # Constant kernel - - else: - FixedValue = False - + # --------------------------------------------------------------------------------------------------- # Fit individual output # --------------------------------------------------------------------------------------------------- @@ -227,15 +200,13 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): yvar = np.expand_dims(self.yvar[:, i], axis=1) if specialTreatment: - x, y, yvar = ( - np.empty((0, x.shape[-1])), - np.empty((0, y.shape[-1])), - np.empty((0, y.shape[-1])), - ) + x = np.empty((0, x.shape[-1])) + y = np.empty((0, y.shape[-1])) + yvar = np.empty((0, y.shape[-1])) # Surrogate - print(f"~ Model for output: {outi}") + print(f"~ Model for output: {output_this}") GP = SURROGATEtools.surrogate_model( x, @@ -243,11 +214,11 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): yvar, self.surrogate_parameters, bounds=self.bounds, - output=outi, - output_transformed=outi_transformed, - avoidPoints=self.avoidPoints, + outputs=[output_this], + outputs_transformed=[output_this_tr], dfT=self.dfT, surrogateOptions=surrogateOptions, + avoidPoints=self.avoidPoints, FixedValue=FixedValue, fileTraining=fileTraining, ) @@ -270,16 +241,17 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): self.y, self.yvar, self.surrogate_parameters, - avoidPoints=self.avoidPoints, bounds=self.bounds, dfT=self.dfT, + outputs=self.outputs, surrogateOptions=self.surrogateOptions, + avoidPoints=self.avoidPoints, ) models = () for GP in self.GP["individual_models"]: models += (GP.gpmodel,) - self.GP["combined_model"].gpmodel = BOTORCHtools.ModifiedModelListGP(*models) + self.GP["combined_model"].gpmodel = BOTORCHtools.ModelListGP_MITIM(*models) # ------------------------------------------------------------------------------------------------------ # Make sure each model has the right surrogate_transformation_variables inside the combined model @@ -287,18 +259,16 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): if self.GP["combined_model"].surrogate_transformation_variables is not None: for i in range(self.y.shape[-1]): - outi = self.outputs[i] if (self.outputs is not None) else None + output_this = self.outputs[i] if (self.outputs is not None) else None - if outi is not None: - self.GP["combined_model"].surrogate_transformation_variables[outi] = self.GP["individual_models"][i].surrogate_transformation_variables[outi] - - print(f"--> Fitting of all models took {IOtools.getTimeDifference(time1)}") + if output_this is not None: + self.GP["combined_model"].surrogate_transformation_variables[output_this] = self.GP["individual_models"][i].surrogate_transformation_variables[output_this] """ - ********************************************************************************************************************* - Postprocessing - ********************************************************************************************************************* - """ + ********************************************************************************************************************* + Postprocessing + ********************************************************************************************************************* + """ # Test (if test could not be launched is likely because a singular matrix for Choleski decomposition) print("--> Launching tests to assure batch evaluation accuracy") @@ -310,22 +280,14 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None): print("--> Launching tests evaluate accuracy on training set (absolute units)") self.GP["combined_model"].testTraining() - txt_time = IOtools.getTimeDifference(time1) - - print( - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - ) - - if self.fileOutputs is not None: - with open(self.fileOutputs, "a") as f: - f.write(f" (took total of {txt_time})") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n") def defineFunctions(self, scalarized_objective): """ I create this so that, upon reading a pickle, I re-call it. Otherwise, it is very heavy to store lambdas """ - self.evaluators = {"GP": self.GP["combined_model"]} + self.evaluators = {"GP": self.GP["combined_model"]}#mo_model"]} # ************************************************************************************************** # Objective (multi-objective model -> single objective residual) @@ -335,9 +297,7 @@ def defineFunctions(self, scalarized_objective): def residual(Y, X = None): return scalarized_objective(Y)[2] - self.evaluators["objective"] = botorch.acquisition.objective.GenericMCObjective( - residual - ) + self.evaluators["objective"] = botorch.acquisition.objective.GenericMCObjective(residual) # ************************************************************************************************** # Acquisition functions (following BoTorch assumption of maximization) @@ -378,6 +338,16 @@ def residual(Y, X = None): ) ) + elif self.acquisition_type == "ei_mc": + self.evaluators["acq_function"] = ( + botorch.acquisition.monte_carlo.qExpectedImprovement( + self.evaluators["GP"].gpmodel, + objective=self.evaluators["objective"], + best_f=self.evaluators["objective"](self.evaluators["GP"].train_Y.unsqueeze(1)).max(), + sampler=sampler + ) + ) + elif self.acquisition_type == "logei_mc": self.evaluators["acq_function"] = ( botorch.acquisition.logei.qLogExpectedImprovement( @@ -402,6 +372,14 @@ def residual(Y, X = None): # around best, needs the raw one! (for noisy it is automatic) self.evaluators["acq_function"].X_baseline = self.evaluators["GP"].train_X + + embed() + x = torch.rand(64, self.train_X.shape[-1]).to(self.dfT) + with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_acq64.prof") as s: + self.evaluators["acq_function"](x) + + + # ************************************************************************************************** # Quick function to return components (I need this for ROOT too, since I need the components) # ************************************************************************************************** @@ -444,10 +422,10 @@ def optimize( self.defineFunctions(scalarized_objective) """ - *********************************************** - Peform optimization - *********************************************** - """ + *********************************************** + Peform optimization + *********************************************** + """ if self.fileOutputs is not None: with open(self.fileOutputs, "a") as f: @@ -482,7 +460,7 @@ def optimize( f"\n~~ Complete acquisition workflows found {self.x_next.shape[0]} points" ) - def curate_outliers(self): + def _curate_outliers(self): # Remove outliers self.outliers = removeOutliers( self.y, @@ -505,6 +483,47 @@ def curate_outliers(self): if len(self.avoidPoints) > 0: print(f"\t ~~ Avoiding {len(self.avoidPoints)} points: ", self.avoidPoints) + def _check_dimensions(self): + """ + Check dimensions + - train_X should be (num_train,dimX) + - train_Y should be (num_train,dimY) + - train_Ystd should be (num_train,dimY) or just one float representing all values + """ + + if len(self.train_X.shape) < 2: + print("--> train x only had 1 dimension, assuming that it has only 1 dimension") + self.train_X = np.transpose(np.atleast_2d(self.train_X)) + + if len(self.train_Y.shape) < 2: + print("--> train y only had 1 dimension, assuming that it has only 1 dimension") + self.train_Y = np.transpose(np.atleast_2d(self.train_Y)) + + if ( + isinstance(self.train_Ystd, float) + or isinstance(self.train_Ystd, int) + or len(self.train_Ystd.shape) < 2 + ): + print( + "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms" + ) + if self.train_Ystd > 0: + print( + "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms" + ) + self.train_Ystd = self.train_Y * 0.0 + self.train_Ystd + else: + print( + "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in relative terms" + ) + self.train_Ystd = self.train_Y * np.abs(self.train_Ystd) + + if len(self.train_Ystd.shape) < 2: + print( + "--> train y noise only had 1 dimension, assuming that it has only 1 dimension" + ) + self.train_Ystd = np.transpose(np.atleast_2d(self.train_Ystd)) + def removeOutliers(y, stds_outside=5, stds_outside_checker=1, alreadyAvoided=[]): """ diff --git a/src/mitim_tools/opt_tools/STRATEGYtools.py b/src/mitim_tools/opt_tools/STRATEGYtools.py index 5fb31e74..bb527699 100644 --- a/src/mitim_tools/opt_tools/STRATEGYtools.py +++ b/src/mitim_tools/opt_tools/STRATEGYtools.py @@ -21,8 +21,6 @@ from mitim_tools.misc_tools.LOGtools import printMsg as print from mitim_tools import __mitimroot__ -UseCUDAifAvailable = True - """ Example usage (see tutorials for actual examples and parameter definitions): @@ -70,13 +68,18 @@ def __init__( self, folder, namelist=None, - TensorsType=torch.double, default_namelist_function=None, + tensor_opts = { + "dtype": torch.double, + "device": torch.device("cpu"), + } ): """ Namelist file can be provided and will be copied to the folder """ + self.tensor_opts = tensor_opts + print("- Parent opt_evaluator function initialized") self.folder = folder @@ -124,17 +127,9 @@ def __init__( # Determine type of tensors to work with torch.set_default_dtype( - TensorsType + self.tensor_opts["dtype"] ) # In case I forgot to specify a type explicitly, use as default (https://github.com/pytorch/botorch/discussions/1444) - self.dfT = torch.randn( - (2, 2), - dtype=TensorsType, - device=torch.device( - "cpu" - if ((not UseCUDAifAvailable) or (not torch.cuda.is_available())) - else "cuda" - ), - ) + self.dfT = torch.randn( (2, 2), **tensor_opts) # Name of calibrated objectives (e.g. QiRes1 to represent the objective from Qi1-QiT1) self.name_objectives = None @@ -1662,7 +1657,7 @@ def plotAcquisitionOptimization(self, fn=None, step_from=0, step_to=-1): fig = fn.add_figure(label='Acquisition Convergence') - axs = GRAPHICStools.producePlotsGrid(len(step_num), fig=fig, hspace=0.6, wspace=0.3, sharex=False, sharey=False) + axs = GRAPHICStools.producePlotsGrid(len(step_num), fig=fig, hspace=0.6, wspace=0.3) for step in step_num: @@ -1681,20 +1676,18 @@ def plotAcquisitionOptimization(self, fn=None, step_from=0, step_to=-1): for ix in range(self.steps[step].train_X.shape[0]): acq_trained[ix] = acq(torch.Tensor(self.steps[step].train_X[ix,:]).unsqueeze(0)).item() - # Plot - ax.plot(y_acq, c='b') - ax.axhline(y=acq_trained.max(), c='r', ls='--', lw=0.5, label='max acq of trained points') + ax.plot(y_acq,'-o', c='g', markersize=2, lw = 0.5, label='max of batch') + ax.axhline(y=acq_trained.max(), c='r', ls='--', lw=1.0, label='max trained') + ax.axhline(y=y_acq[0], c='b', ls='--', lw=1.0, label='max of guesses') ax.set_title(f'BO Step #{step}') - ax.set_ylabel('acquisition') - ax.set_xlabel('iteration') + ax.set_ylabel('$f_{acq}$ (to max)') + ax.set_xlabel('Evaluations') if step == step_num[0]: - ax.legend() + ax.legend(loc='best') GRAPHICStools.addDenseAxis(ax) - ax.set_ylim(top=0.0) - def plotModelStatus( self, fn=None, boStep=-1, plotsPerFigure=20, stds=2, tab_color=None @@ -1878,21 +1871,12 @@ def plotSurrogateOptimization(self, fig1=None, fig2=None, boStep=-1): xypair = np.array(xypair) - loga = True if xypair[:, 1].min() > 0 else False - axsDVs[0].legend(prop={"size": 5}) - if loga: - for p in range(len(axsOFs)): - axsOFs[p].set_xscale("log") - axsOFs[p].set_yscale("log") - ax1_r.set_ylabel("DV values") GRAPHICStools.addDenseAxis(ax1_r) GRAPHICStools.autoscale_y(ax1_r) - ax2_r.set_ylabel("Residual values") - if loga: - ax2_r.set_yscale("log") + ax2_r.set_ylabel("Acquisition values") GRAPHICStools.addDenseAxis(ax2_r) GRAPHICStools.autoscale_y(ax2_r) @@ -1901,20 +1885,18 @@ def plotSurrogateOptimization(self, fig1=None, fig2=None, boStep=-1): iinfo = info[-1]["info"] for i, y in enumerate(iinfo["y_res"]): ax0_r.axhline( - y=-y, + y=y, c=colors[ipost + 1], ls="--", lw=2, label=info[-1]["method"] if i == 0 else "", ) iinfo = info[0]["info"] - ax0_r.axhline(y=-iinfo["y_res_start"][0], c="k", ls="--", lw=2) + ax0_r.axhline(y=iinfo["y_res_start"][0], c="k", ls="--", lw=2) ax0_r.set_xlabel("Optimization iterations") - ax0_r.set_ylabel("$-f_{acq}$") + ax0_r.set_ylabel("$f_{acq}$") GRAPHICStools.addDenseAxis(ax0_r) - if loga: - ax0_r.set_yscale("log") ax0_r.legend(loc="best", prop={"size": 8}) ax0_r.set_title("Evolution of acquisition in optimization stages") diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py index 52f40286..6679c8da 100644 --- a/src/mitim_tools/opt_tools/SURROGATEtools.py +++ b/src/mitim_tools/opt_tools/SURROGATEtools.py @@ -13,15 +13,9 @@ from mitim_tools.misc_tools.LOGtools import printMsg as print from IPython import embed - - -UseCUDAifAvailable = True - # --------------------------------------------------------------------------------- # Model Class # --------------------------------------------------------------------------------- - - class surrogate_model: """ This is where each of the fittings take place. @@ -32,66 +26,96 @@ class surrogate_model: """ - def __init__( - self, + @classmethod + def only_define(cls, *args, **kwargs): + # Create an instance of the class + instance = cls.__new__(cls) + # Initialize the parameters manually + instance._init_parameters(*args, **kwargs) + return instance + + def _init_parameters(self, Xor, Yor, Yvaror, surrogate_parameters, - output=None, - output_transformed=None, + outputs=None, + outputs_transformed=None, bounds=None, avoidPoints=None, dfT=None, surrogateOptions={}, FixedValue=False, fileTraining=None, + seed = 0 ): - """ - Noise is variance here (square of standard deviation). - """ - - if avoidPoints is None: - avoidPoints = [] - - torch.manual_seed(0) - - self.avoidPoints = avoidPoints - self.output = output - self.output_transformed = output_transformed + self.avoidPoints = avoidPoints if avoidPoints is not None else [] + self.outputs = outputs + self.outputs_transformed = outputs_transformed self.surrogateOptions = surrogateOptions self.dfT = dfT self.surrogate_parameters = surrogate_parameters self.bounds = bounds self.FixedValue = FixedValue self.fileTraining = fileTraining - - self.losses = None - if self.dfT is None: - self.dfT = torch.randn( - (2, 2), - dtype=torch.double, - device=torch.device( - "cpu" - if ((not UseCUDAifAvailable) or (not torch.cuda.is_available())) - else "cuda" - ), - ) - - self.train_X = torch.from_numpy(Xor).to(self.dfT) + self.dfT = torch.randn((2, 2),dtype=torch.double,device=torch.device("cpu")) self.train_Y = torch.from_numpy(Yor).to(self.dfT) + self.train_X = torch.from_numpy(Xor).to(self.dfT) # Extend noise if needed if isinstance(Yvaror, float) or len(Yvaror.shape) == 1: - print( - f"\t- Noise (variance) has one value only ({Yvaror}), assuming constant for all samples and outputs in absolute terms", - ) + print(f"\t- Noise (variance) has one value only ({Yvaror}), assuming constant for all samples and outputs in absolute terms") Yvaror = Yor * 0.0 + Yvaror - self.train_Yvar = torch.from_numpy(Yvaror).to(self.dfT) - # ---------- Print ---------- + self.losses = None + + + def __init__( + self, + Xor, + Yor, + Yvaror, + surrogate_parameters, + outputs=None, + outputs_transformed=None, + bounds=None, + avoidPoints=None, + dfT=None, + surrogateOptions={}, + FixedValue=False, + fileTraining=None, + seed = 0 + ): + """ + Note: + - noise is variance (square of standard deviation). + """ + + torch.manual_seed(seed) + + # -------------------------------------------------------------------- + # Input parameters + # -------------------------------------------------------------------- + + self._init_parameters( + Xor, + Yor, + Yvaror, + surrogate_parameters, + outputs=outputs, + outputs_transformed=outputs_transformed, + bounds=bounds, + avoidPoints=avoidPoints, + dfT=dfT, + surrogateOptions=surrogateOptions, + FixedValue=FixedValue, + fileTraining=fileTraining, + seed=seed + ) + + # Print options print("\t- Surrogate options:") for i in self.surrogateOptions: print(f"\t\t{i:20} = {self.surrogateOptions[i]}") @@ -100,270 +124,319 @@ def __init__( # Eliminate points if needed (not from the "added" set) # -------------------------------------------------------------------- - if len(self.avoidPoints) > 0: - print( - f"\t- Fitting without considering points: {self.avoidPoints}", - typeMsg="w", - ) + self._remove_points() - self.train_X = torch.Tensor( - np.delete(self.train_X, self.avoidPoints, axis=0) - ).to(self.dfT) - self.train_Y = torch.Tensor( - np.delete(self.train_Y, self.avoidPoints, axis=0) - ).to(self.dfT) - self.train_Yvar = torch.Tensor( - np.delete(self.train_Yvar, self.avoidPoints, axis=0) - ).to(self.dfT) + # ------------------------------------------------------------------------------------------ + # Retrieve points from file -> Xtr[batch, dimXtr], Ytr[batch, dimYtr], Yvartr[batch, dimYtr] + # ------------------------------------------------------------------------------------------ + + train_X_added_full, train_Y_added, train_Yvar_added, dx_tr_full = self._add_points_from_file() # ------------------------------------------------------------------------------------- - # Add points from file + # Define transformations # ------------------------------------------------------------------------------------- - # Points to be added from file - if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]): + num_training_points = self.train_X.shape[0] + (train_X_added_full.shape[0] if train_X_added_full is not None else 0) - print( - f"\t* Requested extension of training set by points in file {self.surrogateOptions['extrapointsFile']}" - ) + input_transform, outcome_transform, dx_tr, dy_tr = self._define_MITIM_transformations(num_training_points = num_training_points) - df = pd.read_csv(self.surrogateOptions["extrapointsFile"]) - df_model = df[df['Model'] == self.output] + # For easy future use + input_transform_physics = input_transform['tf1'] + outcome_transform_physics = outcome_transform['tf1'] - if len(df_model) == 0: - print("\t- No points for this output in the file, nothing to add", typeMsg="i") - continueAdding = False - else: - continueAdding = True - else: - continueAdding = False - - if continueAdding: - - # Check 1: Do the points for this output share the same x_names? - if df_model['x_names'].nunique() > 1: - print("Different x_names for points in the file, prone to errors", typeMsg='q') - - # Check 2: Is it consistent with the x_names of this run? - x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0] - x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.output] - if x_names != x_names_check: - print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q') + # -------------------------------------------------------------------------------------------- + # Add points from file (provided as if tf1 was used -> I need to broadcast Xtr to all outputs) + # -------------------------------------------------------------------------------------------- - self.train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT) - self.train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT) - - x = [] - for i in range(len(x_names)): - x.append(df_model[f'x{i}'].to_numpy()) - self.train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT) - - # ------------------------------------------------------------------------------------------------------------ - # Define transformation (here because I want to account for the added points) - # ------------------------------------------------------------------------------------------------------------ - self.num_training_points = self.train_X.shape[0] + self.train_X_added_full.shape[0] - input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y = self._define_physics_transformation() - # ------------------------------------------------------------------------------------------------------------ - - self.train_X_added = ( - self.train_X_added_full[:, :dimTransformedDV_x] if self.train_X_added_full.shape[-1] > dimTransformedDV_x else self.train_X_added_full - ).to(self.dfT) + if train_X_added_full is not None: + raise Exception("[PRF] This is not working, I need to broadcast the input transformation to all outputs") + self.train_X_added_full = train_X_added_full.to(self.dfT) + self.train_X_added = (self.train_X_added_full[:, :dx_tr] if self.train_X_added_full.shape[-1] > dx_tr else self.train_X_added_full).to(self.dfT) + self.train_Y_added = train_Y_added.to(self.dfT) + self.train_Yvar_added = train_Yvar_added.to(self.dfT) + else: - if self.fileTraining is not None: - train_X_Complete, _ = self.surrogate_parameters["transformationInputs"]( - self.train_X, - self.output, - self.surrogate_parameters, - self.surrogate_parameters["surrogate_transformation_variables_lasttime"], - ) - dimTransformedDV_x_full = train_X_Complete.shape[-1] - else: - dimTransformedDV_x_full = self.train_X.shape[-1] - # -------------------------------------------------------------------------------------- - # Define transformation (here because I want to account for the added points) - # -------------------------------------------------------------------------------------- - self.num_training_points = self.train_X.shape[0] - input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y = self._define_physics_transformation() - # ------------------------------------------------------------------------------------------------------------ + x_transformed = input_transform_physics(self.train_X) # [batch, dimX] -> [batch, dimXtr] -> [dimY, batch, dimXtr] + + shape_xtr = list(x_transformed.shape) + shape_xtr[-2] = 0 + shape_xtr[-1] = dx_tr_full + self.train_X_added_full = torch.empty(*shape_xtr).to(self.dfT) # [dimY, 0, dimXtr] + self.train_X_added = torch.empty(*shape_xtr).to(self.dfT) - self.train_X_added_full = torch.empty((0, dimTransformedDV_x_full)).to(self.dfT) - self.train_X_added = torch.empty((0, dimTransformedDV_x)).to(self.dfT) - self.train_Y_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT) - self.train_Yvar_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT) + y_transformed, yvar_transformed = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar) + shape_ytr = list(y_transformed.shape) + shape_ytr[-2] = 0 + self.train_Y_added = torch.empty(*shape_ytr).to(self.dfT) + self.train_Yvar_added = torch.empty(*shape_ytr).to(self.dfT) # -------------------------------------------------------------------------------------- # Make sure that very small variations are not captured # -------------------------------------------------------------------------------------- - if (self.train_X_added.shape[0] > 0) and (self.train_X.shape[0] > 1): - self.ensureMinimalVariationSuppressed(input_transform_physics) + self._ensure_small_variation_suppressed(input_transform_physics) # -------------------------------------------------------------------------------------- # Make sure at least 2 points # -------------------------------------------------------------------------------------- - if self.train_X.shape[0] + self.train_X_added.shape[0] == 1: - factor = 1.2 - print( - f"\t- This objective had only one point, adding a point with linear interpolation (trick for mitim targets only), {factor}", - typeMsg="w", - ) - self.train_X = torch.cat((self.train_X, self.train_X * factor)) - self.train_Y = torch.cat((self.train_Y, self.train_Y * factor)) - self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor)) + self._ensure_minimum_dataset() # ------------------------------------------------------------------------------------- # Check minimum noises # ------------------------------------------------------------------------------------- - self.ensureMinimumNoise() + self._ensure_minimum_noise() # ------------------------------------------------------------------------------------- # Write file with surrogate if there are transformations # ------------------------------------------------------------------------------------- - if (self.fileTraining is not None) and ( - self.train_X.shape[0] + self.train_X_added.shape[0] > 0 - ): - self.writeFileTraining(input_transform_physics, outcome_transform_physics) + #self._write_datafile(input_transform_physics, outcome_transform_physics) # ------------------------------------------------------------------------------------- - # Input and Outcome transform (NORMALIZATIONS) - # ------------------------------------------------------------------------------------- - - input_transform_normalization = botorch.models.transforms.input.Normalize( - dimTransformedDV_x, bounds=None - ).to(self.dfT) - output_transformed_standardization = ( - botorch.models.transforms.outcome.Standardize((dimTransformedDV_y)) - ).to(self.dfT) - # Obtain normalization constants now (although during training this is messed up, so needed later too) - self.normalization_pass( - input_transform_physics, - input_transform_normalization, - outcome_transform_physics, - output_transformed_standardization, - ) - - # ------------------------------------------------------------------------------------ - # Combine transformations in chain of PHYSICS + NORMALIZATION - # ------------------------------------------------------------------------------------ + # ------------------------------------------------------------------------------------- - input_transform = botorch.models.transforms.input.ChainedInputTransform( - tf1=input_transform_physics, tf2=input_transform_normalization - ).to(self.dfT) + self.normalization_pass(input_transform, outcome_transform) - outcome_transform = BOTORCHtools.ChainedOutcomeTransform( - tf1=outcome_transform_physics, tf2=output_transformed_standardization - ).to(self.dfT) - - self.variables = ( - self.surrogate_transformation_variables[self.output] - if ( - (self.output is not None) - and ("surrogate_transformation_variables" in self.__dict__) - and (self.surrogate_transformation_variables is not None) - ) - else None - ) + # self.variables = ( + # self.surrogate_transformation_variables[self.outputs[0]] + # if ( + # (self.outputs is not None) + # and ("surrogate_transformation_variables" in self.__dict__) + # and (self.surrogate_transformation_variables is not None) + # ) + # else None + # ) # ************************************************************************************* # Model # ************************************************************************************* - print( - f'\t- Initializing model{" for "+self.output_transformed if (self.output_transformed is not None) else ""}', - ) + print(f'\t- Initializing model{" for "+self.outputs_transformed[0] if (self.outputs_transformed is not None and (len(self.outputs)==1)) else ""}',) - """ - self.train_X contains the untransformed of this specific run: (batch1, dimX) - self.train_X_added contains the transformed of the table: (batch2, dimXtr) - """ - self.gpmodel = BOTORCHtools.ExactGPcustom( + self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM( self.train_X, self.train_Y, self.train_Yvar, input_transform=input_transform, outcome_transform=outcome_transform, surrogateOptions=self.surrogateOptions, - variables=self.variables, + #variables=self.variables, train_X_added=self.train_X_added, train_Y_added=self.train_Y_added, train_Yvar_added=self.train_Yvar_added, ) - def _define_physics_transformation(self): + def _ensure_minimum_dataset(self): - self._select_transition_physics_based_params() + if self.train_X.shape[0] + self.train_X_added.shape[0] == 1: + factor = 1.2 + print( + f"\t- This dataset had only one point, adding a point with linear interpolation (trick for PORTALS targets only), {factor}", + typeMsg="w", + ) + self.train_X = torch.cat((self.train_X, self.train_X * factor)) + self.train_Y = torch.cat((self.train_Y, self.train_Y * factor)) + self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor)) - # Input and Outcome transform (PHYSICS) - dimY = self.train_Y.shape[-1] - input_transform_physics = BOTORCHtools.Transformation_Inputs( - self.output, self.surrogate_parameters, self.surrogate_transformation_variables - ).to(self.dfT) - outcome_transform_physics = BOTORCHtools.Transformation_Outcomes( - dimY, self.output, self.surrogate_parameters - ).to(self.dfT) - dimTransformedDV_x = input_transform_physics(self.train_X).shape[-1] - dimTransformedDV_y = dimY + def _define_MITIM_transformations(self, num_training_points): - return input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y + ''' + ******************************************************************************** + Define individual output transformations and then put together + X is [batch, dimX] + Xtr is [batch, dimXtr] of each individual output + Xtr_full is [dimY, batch, dimXtr] of the broadcasted input transformation + + Y is [batch, dimY] + Ytr is [batch, dimY] + ******************************************************************************** + ''' - def _select_transition_physics_based_params(self, ): self.surrogate_transformation_variables = None - if ("surrogate_transformation_variables_alltimes" in self.surrogate_parameters) and (self.surrogate_parameters["surrogate_transformation_variables_alltimes"] is not None): + if ("surrogate_transformation_variables_alltimes" in self.surrogate_parameters) and \ + (self.surrogate_parameters["surrogate_transformation_variables_alltimes"] is not None): transition_position = list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())[ - np.where( - self.num_training_points - < np.array( - list( - self.surrogate_parameters[ - "surrogate_transformation_variables_alltimes" - ].keys() - ) - ) - )[0][0] - ] + np.where( + num_training_points < np.array(list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())))[0][0] + ] self.surrogate_transformation_variables = self.surrogate_parameters["surrogate_transformation_variables_alltimes"][transition_position] - def normalization_pass( - self, - input_transform_physics, - input_transform_normalization, - outcome_transform_physics, - outcome_transform_normalization, - ): - input_transform_normalization.training = True - outcome_transform_normalization.training = True - outcome_transform_normalization._is_trained = torch.tensor(False) + input_transformations_physics = [] - train_X_transformed = torch.cat( - (input_transform_physics(self.train_X), self.train_X_added), axis=0 - ) - y, yvar = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar) - train_Y_transformed = torch.cat((y, self.train_Y_added), axis=0) - train_Yvar_transformed = torch.cat((yvar, self.train_Yvar_added), axis=0) + for ind_out in range(self.train_Y.shape[-1]): + + input_transform_physics = BOTORCHtools.input_physics_transform( + self.outputs[ind_out], self.surrogate_parameters, self.surrogate_transformation_variables + ).to(self.dfT) + + input_transformations_physics.append(input_transform_physics) + + dimY = self.train_Y.shape[-1] + output_transformation_physics = BOTORCHtools.outcome_physics_transform( + dimY, self.outputs, self.surrogate_parameters + ).to(self.dfT) + + # ------------------------------------------------------------------------------------ + # Broadcast the input transformation to all outputs + # ------------------------------------------------------------------------------------ + + input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics) + + transformed_X = input_transformation_physics(self.train_X) + + dx_tr = transformed_X.shape[-1] + dy_tr = self.train_Y.shape[-1] + + # ------------------------------------------------------------------------------------ + # Normalizations + # ------------------------------------------------------------------------------------ + + input_transform_normalization = botorch.models.transforms.input.Normalize( + d = dx_tr, bounds=None, batch_shape=transformed_X.shape[:-2] + ).to(self.dfT) + output_transformed_standardization = botorch.models.transforms.outcome.Standardize( + m = dy_tr, batch_shape=self.train_Y.shape[:-2] + ).to(self.dfT) + + # ------------------------------------------------------------------------------------ + # Combine transformations in chain of PHYSICS + NORMALIZATION + BATCHING + # ------------------------------------------------------------------------------------ + + input_transform = botorch.models.transforms.input.ChainedInputTransform( + tf1=input_transformation_physics, tf2=input_transform_normalization ).to(self.dfT) + + outcome_transform = BOTORCHtools.ChainedOutcomeTransform( + tf1=output_transformation_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension() ).to(self.dfT) + + return input_transform, outcome_transform, dx_tr, dy_tr + + def _add_points_from_file(self): + + is_this_single_output = (self.outputs is not None) and (len(self.outputs) == 1) + potential_addition_of_points = ("add_data_from_file" in self.surrogateOptions) and (self.surrogateOptions["add_data_from_file"] is not None) + + if potential_addition_of_points: + if is_this_single_output: + addition_of_points = self.outputs[0] in self.surrogateOptions["add_data_to_models"] + else: + raise Exception("[MITIM] add_data_from_file can only be used for single output models as of now...") + else: + addition_of_points = False + + # Points to be added from file + continueAdding = False + if addition_of_points: + + print(f"\t* Extending training set by points in file {self.surrogateOptions['add_data_from_file']}") + + df = pd.read_csv(self.surrogateOptions["add_data_from_file"]) + df_model = df[df['Model'] == self.outputs[0]] + + if len(df_model) == 0: + print("\t- No points for this output in the file, nothing to add", typeMsg="i") + continueAdding = False + else: + continueAdding = True + + if continueAdding: + + # Check 1: Do the points for this output share the same x_names? + if df_model['x_names'].nunique() > 1: + print("Different x_names for points in the file, prone to errors", typeMsg='q') + + # Check 2: Is it consistent with the x_names of this run? + x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0] + x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.outputs[0]] + if x_names != x_names_check: + print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q') + + train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT) + train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT) + + x = [] + for i in range(len(x_names)): + x.append(df_model[f'x{i}'].to_numpy()) + train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT) + + dx_tr_full = train_X_added_full.shape[-1] + + else: + + train_X_added_full = None + train_Y_added = None + train_Yvar_added = None + + train_X_Complete, _ = self.surrogate_parameters["transformationInputs"]( + self.train_X, + self.outputs[0], + self.surrogate_parameters, + self.surrogate_parameters["surrogate_transformation_variables_lasttime"], + ) + dx_tr_full = train_X_Complete.shape[-1] - train_X_transformed_norm = input_transform_normalization(train_X_transformed) - ( - train_Y_transformed_norm, - train_Yvar_transformed_norm, - ) = outcome_transform_normalization(train_Y_transformed, train_Yvar_transformed) + return train_X_added_full, train_Y_added, train_Yvar_added, dx_tr_full + def normalization_pass(self,input_transform, outcome_transform): + ''' + Notes: + - The goal of this is to capture NOW the normalization and standardization constants, + by accounting for both the actual data and the added data from file + ''' + + # ------------------------------------------------------------------------------------- + # Get input normalization and outcome standardization in training mode + # ------------------------------------------------------------------------------------- + + input_transform['tf2'].training = True + outcome_transform['tf2'].training = True + outcome_transform['tf2']._is_trained = torch.tensor(False) + + # ------------------------------------------------------------------------------------------------------- + # Get the input normalization constants by physics-transforming the train_x and adding the data from file + # ------------------------------------------------------------------------------------------------------- + + # Transform the data from file + train_X_transformed = input_transform['tf1'](self.train_X) + + # Concatenate the training data and the data from file + #train_X_transformed = torch.cat((train_X_transformed, self.train_X_added), axis=-2) + + # Get the normalization constants + _ = input_transform['tf2'](train_X_transformed) + + # ----------------------------------------------------------------------------------------------------------- + # Get the outcome standardization constants by physics-transforming the train_y and adding the data from file + # ----------------------------------------------------------------------------------------------------------- + + # Transform the data from file + train_Y_transformed, train_Yvar_transformed = outcome_transform['tf1'](self.train_X, self.train_Y, self.train_Yvar) + + # Concatenate the training data and the data from file + train_Y_transformed = torch.cat((train_Y_transformed, self.train_Y_added), axis=-2) + train_Yvar_transformed = torch.cat((train_Yvar_transformed, self.train_Yvar_added), axis=0) + + # Get the standardization constants + train_Y_transformed_norm, train_Yvar_transformed_norm = outcome_transform['tf2'](train_Y_transformed, train_Yvar_transformed) + + # ------------------------------------------------------------------------------------- # Make sure they are not on training mode - input_transform_normalization.training = False - outcome_transform_normalization.training = False - outcome_transform_normalization._is_trained = torch.tensor(True) + # ------------------------------------------------------------------------------------- + input_transform['tf2'].training = False + outcome_transform['tf2'].training = False + outcome_transform['tf2']._is_trained = torch.tensor(True) + def fit(self): print( - f"\t- Fitting model to {self.train_X.shape[0]+self.train_X_added.shape[0]} points" + f"\t- Fitting model to {self.train_X.shape[-2]+self.train_X_added.shape[-2]} points" ) # --------------------------------------------------------------------------------------------------- @@ -393,8 +466,8 @@ def fit(self): """ # Train always in physics-transformed space, to enable mitim re-use training from file - with fundamental_model_context(self): - track_fval = self.perform_model_fit(mll) + #with fundamental_model_context(self): + track_fval = self.perform_model_fit(mll) # --------------------------------------------------------------------------------------------------- # Asses optimization @@ -405,12 +478,7 @@ def fit(self): # Go back to definining the right normalizations, because the optimizer has to work on training mode... # --------------------------------------------------------------------------------------------------- - self.normalization_pass( - self.gpmodel.input_transform["tf1"], - self.gpmodel.input_transform["tf2"], - self.gpmodel.outcome_transform["tf1"], - self.gpmodel.outcome_transform["tf2"], - ) + self.normalization_pass(self.gpmodel.input_transform, self.gpmodel.outcome_transform) def perform_model_fit(self, mll): self.gpmodel.train() @@ -423,7 +491,7 @@ def perform_model_fit(self, mll): # Approx MLL --------------------------------------- (train_x,) = mll.model.train_inputs - approx_mll = len(train_x) > 2000 + approx_mll = False #len(train_x) > 2000 if approx_mll: print( f"\t* Using approximate MLL because x has {len(train_x)} elements", @@ -434,7 +502,6 @@ def perform_model_fit(self, mll): track_fval = [ -mll.forward(mll.model(*mll.model.train_inputs), mll.model.train_targets) .detach() - .item() ] def callback(x, y, mll=mll): @@ -457,7 +524,7 @@ def callback(x, y, mll=mll): mll.eval() print( - f"\n\t- Marginal log likelihood went from {track_fval[0]:.3f} to {track_fval[-1]:.3f}" + f"\n\t- Marginal log likelihood went from {track_fval[0]} to {track_fval[-1]:.3f}" ) return track_fval @@ -482,11 +549,7 @@ def predict(self, X, produceFundamental=False, nSamples=None): # with gpytorch.settings.fast_computations(log_prob=False, solves=False, covar_root_decomposition=False), \ # gpytorch.settings.eval_cg_tolerance(1E-6), gpytorch.settings.fast_pred_samples(state=False), gpytorch.settings.num_trace_samples(0): - with ( - fundamental_model_context(self) - if produceFundamental - else contextlib.nullcontext(self) - ) as surrogate_model: + with (fundamental_model_context(self) if produceFundamental else contextlib.nullcontext(self)) as surrogate_model: posterior = surrogate_model.gpmodel.posterior(X) mean = posterior.mean @@ -502,94 +565,97 @@ def predict(self, X, produceFundamental=False, nSamples=None): return mean, upper, lower, samples - def writeFileTraining(self, input_transform_physics, outcome_transform_physics): + def _write_datafile(self, input_transform_physics, outcome_transform_physics): """ -------------------------------------------------------------------- Write file with surrogate if there are transformations - Note: USE TRANSFORMATIONS AT COMPLETE NUMBER (AFTER TRANSITIONS) for those in this run, but - simply use the info that was in extra_points_file + Note: USE TRANSFORMATIONS AT COMPLETE NUMBER (AFTER TRANSITIONS) + for those in this run, but simply use the info that was in + extra_points_file -------------------------------------------------------------------- """ - # ------------------------------------------------------------------------------------------------------------------------ - # Transform the points without the added from file - # ------------------------------------------------------------------------------------------------------------------------ + if (self.fileTraining is not None) and (self.train_X.shape[-2] + self.train_X_added.shape[-2] > 0): - # I do not use directly input_transform_physics because I need all the columns, not of this specif iteration - train_X_Complete, _ = self.surrogate_parameters["transformationInputs"]( - self.train_X, - self.output, - self.surrogate_parameters, - self.surrogate_parameters["surrogate_transformation_variables_lasttime"], - ) + for i,output in enumerate(self.outputs): - train_Y, train_Yvar = outcome_transform_physics( - self.train_X, self.train_Y, self.train_Yvar - ) + # ------------------------------------------------------------------------------------------------------------------------ + # Transform the points without the added from file + # ------------------------------------------------------------------------------------------------------------------------ - dv_names_Complete = ( - self.surrogate_parameters["surrogate_transformation_variables_lasttime"][self.output] - if ( - "surrogate_transformation_variables_lasttime" in self.surrogate_parameters - and self.surrogate_parameters["surrogate_transformation_variables_lasttime"] - is not None - ) - else [i for i in self.bounds] - ) + # I do not use directly input_transform_physics because I need all the columns, not of this specif iteration + train_X_Complete, _ = self.surrogate_parameters["transformationInputs"]( + self.train_X, + output, + self.surrogate_parameters, + self.surrogate_parameters["surrogate_transformation_variables_lasttime"], + ) - if self.train_X_added_full.shape[-1] < train_X_Complete.shape[-1]: - print( - "\t\t- Points from file have less input dimensions, extending with NaNs for writing new file", - typeMsg="w", - ) - self.train_X_added_full = torch.cat( - ( - self.train_X_added_full, - torch.full( + train_Y, train_Yvar = outcome_transform_physics( + self.train_X, self.train_Y[...,i].unsqueeze(-1), self.train_Yvar[...,i].unsqueeze(-1) + ) + + dv_names_Complete = ( + self.surrogate_parameters["surrogate_transformation_variables_lasttime"][output] + if ( + "surrogate_transformation_variables_lasttime" in self.surrogate_parameters + and self.surrogate_parameters["surrogate_transformation_variables_lasttime"] + is not None + ) + else [i for i in self.bounds] + ) + + if self.train_X_added_full.shape[-1] < train_X_Complete.shape[-1]: + print( + "\t\t- Points from file have less input dimensions, extending with NaNs for writing new file", + typeMsg="w", + ) + self.train_X_added_full = torch.cat( ( - self.train_X_added_full.shape[0], - train_X_Complete.shape[-1] - - self.train_X_added_full.shape[-1], + self.train_X_added_full, + torch.full( + ( + self.train_X_added_full.shape[-2], + train_X_Complete.shape[-1] + - self.train_X_added_full.shape[-1], + ), + torch.nan, + ), ), - torch.nan, - ), - ), - axis=-1, - ) - elif self.train_X_added_full.shape[-1] > train_X_Complete.shape[-1]: - print( - "\t\t- Points from file have more input dimensions, removing last dimensions for writing new file", - typeMsg="w", - ) - self.train_X_added_full = self.train_X_added_full[ - :, : train_X_Complete.shape[-1] - ] + axis=-1, + ) + elif self.train_X_added_full.shape[-1] > train_X_Complete.shape[-1]: + print( + "\t\t- Points from file have more input dimensions, removing last dimensions for writing new file", + typeMsg="w", + ) + self.train_X_added_full = self.train_X_added_full[..., : train_X_Complete.shape[-1]] - x = torch.cat((self.train_X_added_full, train_X_Complete), axis=0) - y = torch.cat((self.train_Y_added, train_Y), axis=0) - yvar = torch.cat((self.train_Yvar_added, train_Yvar), axis=0) + x = torch.cat((self.train_X_added_full, train_X_Complete), axis=-2) + y = torch.cat((self.train_Y_added, train_Y), axis=-2) + yvar = torch.cat((self.train_Yvar_added, train_Yvar), axis=-2) - # ------------------------------------------------------------------------------------------------------------------------ - # Merged data with existing data frame and write - # ------------------------------------------------------------------------------------------------------------------------ + # ------------------------------------------------------------------------------------------------------------------------ + # Merged data with existing data frame and write + # ------------------------------------------------------------------------------------------------------------------------ - new_df = create_df_portals(x,y,yvar,dv_names_Complete,self.output) + new_df = create_df_portals(x,y,yvar,dv_names_Complete,output) - if self.fileTraining.exists(): + if self.fileTraining.exists(): - # Load the existing DataFrame from the HDF5 file - existing_df = pd.read_csv(self.fileTraining) + # Load the existing DataFrame from the HDF5 file + existing_df = pd.read_csv(self.fileTraining) - # Concatenate the existing DataFrame with the new DataFrame - combined_df = pd.concat([existing_df, new_df], ignore_index=True) + # Concatenate the existing DataFrame with the new DataFrame + combined_df = pd.concat([existing_df, new_df], ignore_index=True) - else: + else: - combined_df = new_df + combined_df = new_df - # Save the combined DataFrame back to the file - combined_df.to_csv(self.fileTraining, index=False) + # Save the combined DataFrame back to the file + combined_df.to_csv(self.fileTraining, index=False) # -------------------------- # PLOTTING AND POST-ANALYSIS @@ -770,37 +836,58 @@ def testTraining( return axs - def ensureMinimalVariationSuppressed(self, input_transform_physics, thr=1e-6): + def _remove_points(self): + + if len(self.avoidPoints) > 0: + print( + f"\t- Fitting without considering points: {self.avoidPoints}", + typeMsg="w", + ) + + self.train_X = torch.Tensor( + np.delete(self.train_X, self.avoidPoints, axis=0) + ).to(self.dfT) + self.train_Y = torch.Tensor( + np.delete(self.train_Y, self.avoidPoints, axis=0) + ).to(self.dfT) + self.train_Yvar = torch.Tensor( + np.delete(self.train_Yvar, self.avoidPoints, axis=0) + ).to(self.dfT) + + + def _ensure_small_variation_suppressed(self, input_transform_physics, thr=1e-6): """ In some cases, the added data from file might have extremely small variations in some of the fixed inputs, as compared to the trained data of this run. In such a case, modify this variation """ - # Do dimensions of the non-added points change? - x_transform = input_transform_physics(self.train_X) - indecesUnchanged = torch.where( - (x_transform.max(axis=0)[0] - x_transform.min(axis=0)[0]) - / x_transform.mean(axis=0)[0] - < thr - )[0] - - HasThisBeenApplied = 0 - - for i in indecesUnchanged: - if ( - (self.train_X_added[:, i] - x_transform[0, i]) / x_transform[0, i] - ).abs().max() < thr: - HasThisBeenApplied += 1 - for j in range(self.train_X_added.shape[0]): - self.train_X_added[j, i] = x_transform[0, i] - - if HasThisBeenApplied > 0: - print( - f"\t- Supression of small variations {thr:.1e} in added data applied to {HasThisBeenApplied} dims", - typeMsg="w", - ) + if (self.train_X_added.shape[-2] > 0) and (self.train_X.shape[-2] > 1): + + # Do dimensions of the non-added points change? + x_transform = input_transform_physics(self.train_X) + indecesUnchanged = torch.where( + (x_transform.max(axis=-2)[0] - x_transform.min(axis=-2)[0]) + / x_transform.mean(axis=-2)[0] + < thr + )[0] + + HasThisBeenApplied = 0 + + for i in indecesUnchanged: + if ( + (self.train_X_added[:, i] - x_transform[0, i]) / x_transform[0, i] + ).abs().max() < thr: + HasThisBeenApplied += 1 + for j in range(self.train_X_added.shape[-2]): + self.train_X_added[...,j, i] = x_transform[...,0, i] + + if HasThisBeenApplied > 0: + print( + f"\t- Supression of small variations {thr:.1e} in added data applied to {HasThisBeenApplied} dims", + typeMsg="w", + ) - def ensureMinimumNoise(self): + def _ensure_minimum_noise(self): if ("MinimumRelativeNoise" in self.surrogateOptions) and ( self.surrogateOptions["MinimumRelativeNoise"] is not None ): @@ -874,21 +961,25 @@ def assess_optimization(self, track_fval): BOgraphics.printParam(param_name, param, extralab="\t\t\t") - -# Class to call the model posterior directly on transformed space (x and y) class fundamental_model_context(object): + ''' + This is a context manager that will temporarily disable the physics transformations (tf1) + in the surrogate model + ''' def __init__(self, surrogate_model): self.surrogate_model = surrogate_model def __enter__(self): # Works for individual models, not ModelList - self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = False + for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)): + self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = False self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = False return self.surrogate_model def __exit__(self, *args): - self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = True + for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)): + self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = True self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = True def create_df_portals(x, y, yvar, x_names, output, max_x = 20): diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py index 1cae35b1..5c78f32a 100644 --- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py +++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py @@ -1,5 +1,4 @@ import torch -import types import botorch import random from mitim_tools.opt_tools import OPTtools @@ -49,11 +48,17 @@ def findOptima(fun, optimization_params = {}, writeTrajectory=False): acq_evaluated = [] if writeTrajectory: - def new_call(self, x, *args, v=acq_evaluated, **kwargs): - f = fun_opt(x, *args, **kwargs) - v.append(f.max().item()) - return f - fun_opt.__call__ = types.MethodType(new_call, fun_opt) + class CustomFunctionWrapper: + def __init__(self, func, eval_list): + self.func = func + self.eval_list = eval_list + + def __call__(self, x, *args, **kwargs): + f = self.func(x, *args, **kwargs) + self.eval_list.append(f.max().item()) + return f + + fun_opt = CustomFunctionWrapper(fun_opt, acq_evaluated) seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else '' print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }") diff --git a/src/mitim_tools/opt_tools/utils/TESTtools.py b/src/mitim_tools/opt_tools/utils/TESTtools.py index cab8a417..cee62b1d 100644 --- a/src/mitim_tools/opt_tools/utils/TESTtools.py +++ b/src/mitim_tools/opt_tools/utils/TESTtools.py @@ -50,6 +50,7 @@ def testBatchCapabilities(GPs, combinations=[2, 100, 1000]): for i in combinations: x = GPs.train_X[0:1, :].repeat(i, 1) + y1 = GPs.predict(x)[0] y2 = GPs.predict(x[0:1, :])[0] diff --git a/templates/main.namelist.json b/templates/main.namelist.json index 6f32673e..ed9ecaf6 100644 --- a/templates/main.namelist.json +++ b/templates/main.namelist.json @@ -36,16 +36,16 @@ "surrogateOptions": { "TypeKernel": 0, "TypeMean": 0, - "selectSurrogate": null, + "selectSurrogates": null, "FixedNoise": true, "ExtraNoise": false, "ConstrainNoise": -1e-3, "MinimumRelativeNoise": null, "stds_outside": null, "stds_outside_checker": 5, - "extrapointsFile": null, - "extrapointsModels": null, - "extrapointsModelsAvoidContent": null + "add_data_from_file": null, + "add_data_to_models": null, + "add_data_to_modelsAvoidContent": null }, "StrategyOptions": { "boundsRefine": null, diff --git a/tests/PORTALS_workflow.py b/tests/PORTALS_workflow.py index f1c8c0ac..92c37990 100644 --- a/tests/PORTALS_workflow.py +++ b/tests/PORTALS_workflow.py @@ -4,7 +4,7 @@ from mitim_modules.portals import PORTALSmain from mitim_tools import __mitimroot__ -cold_start = True +cold_start = False (__mitimroot__ / "tests" / "scratch").mkdir(parents=True, exist_ok=True) @@ -16,7 +16,7 @@ os.system(f"rm -r {folderWork.resolve()}") # Let's not consume the entire computer resources when running test... limit to 4 threads -torch.set_num_threads(8) +# torch.set_num_threads(8) # -------------------------------------------------------------------------------------------- # Optimization Class @@ -24,7 +24,7 @@ # Initialize class portals_fun = PORTALSmain.portals(folderWork) -portals_fun.optimization_options["BO_iterations"] = 1 +portals_fun.optimization_options["BO_iterations"] = 5 portals_fun.optimization_options["initial_training"] = 3 portals_fun.MODELparameters["RhoLocations"] = [0.25, 0.45, 0.65, 0.85] portals_fun.INITparameters["removeFast"] = True