-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhper_fun.py
170 lines (117 loc) · 6.41 KB
/
hper_fun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 29 12:11:36 2024
@author: atiihone
"""
import pandas as pd
import numpy as np
from hper_util_gp import predict_points
def determine_data_fusion_points(data_fusion_XZ_accum,
df_data_coll_params, acq_fun_params, x_next,
current_surrogate_model_params, materials, bounds, k):
# The locations from which the data fusion data for _the next round_ will
# be queried are determined here.
if df_data_coll_params['method'] == 'none':
# 'model_none' or 'live_none'
# Never sample from the model or query human, just add an empty df for
# records.
result = pd.DataFrame(columns=acq_fun_params['df_input_var'])
elif df_data_coll_params['method'] == 'all':
# Always sample human.
result = pd.DataFrame(x_next[k],
columns=acq_fun_params['df_input_var'])
if df_data_coll_params['use_model'] == False:
# Request humans to give feedback from the specified samples.
print('Give feedback on sample quality of these samples:\n',
result)
elif ((df_data_coll_params['method'] == 'exclz') or
(df_data_coll_params['method'] == 'eig')):
# Gradient criterion.
# Constant for the gradient limit.
c_grad = df_data_coll_params['c_grad']
# Parameters of the current surrogate model for the optimization target.
lengthscale_s = current_surrogate_model_params['lengthscale']
variance_s = current_surrogate_model_params['variance']
# Sample only if the suggested point has a larger gradient than this
# limit.
gradient_limit = np.max((np.sqrt(variance_s)/lengthscale_s)*c_grad)
# Max. gradients of the next suggested points in the surrogate model.
grad_max_s_next = current_surrogate_model_params['max_gradient']
# Pick new points for which the surrogate model has a high gradient, no
# matter if there is an earlier data fusion point nearby.
new_df_points_x_g = x_next[k][grad_max_s_next > gradient_limit]
# THIS OPTION IS NOT IN USE.
# Pick new points for which the surrogate model does not have
# a high gradient but they are located far away from the
# previously sampled points (in unknown region).
new_df_points_x_u = x_next[k][grad_max_s_next <= gradient_limit]
# If there are points to be considered based on exclusion zone or
# EIG criterion.
if new_df_points_x_g.shape[0] > 0:
# Drop the points that are excluded from the points to be queried
# because there are previous human evaluations nearby.
if df_data_coll_params['method'] == 'exclz':
# Drop points with an earlier data fusion point nearby.
# 'Nearby' is X% of the domain length here.
c_exclz = df_data_coll_params['c_exclz']
r_limit = (bounds[0]['domain'][1] - bounds[0]['domain'][0])*c_exclz/100
index = 0
for l in range(len(new_df_points_x_g)): # Should be u finally!
if data_fusion_XZ_accum[k].shape[0] > 0:
if np.any(np.sqrt(np.sum((
data_fusion_XZ_accum[k].iloc[:, 0:len(materials)] -
new_df_points_x_g[index])**2, axis=1)) <
r_limit):
new_df_points_x_g = np.delete(new_df_points_x_g,
index, axis=0)
# TO DO: Test if index works correctly when batch BO is used!
message = 'Deleted a point based on r exclusion.'
#logging.log(21, message)
else:
index = index + 1
elif df_data_coll_params['method'] == 'eig':
# If the data fusion model exists already.
if data_fusion_XZ_accum[k].shape[0] > 0:
# Drop points if the expected information gain for the
# human opinion model is too low.
## Let's use the human opinion model for EIG test.
current_df_model = acq_fun_params['df_model']
# Variance on each point x (pred. from the data fusion
# model).
var_d_next = predict_points(
current_df_model, new_df_points_x_g)[1]
# Data fusion model y variance estimate.
vary_d = current_df_model.Gaussian_noise.variance[0]
message = 'Data fusion Gaussian noise variance: ' + str(vary_d)
#logging.log(21, message)
index = 0
for l in range(len(new_df_points_x_g)):
eig = 0.5 * (np.log10(var_d_next[l, 0]/vary_d + 1))
c_eig = df_data_coll_params['c_eig']
# Let's scale EIG scale factor
# based on max EIG (i.e., c_eig = 1 samples very little,
# c_eig = 0 does not limit at all).
eig_max = - np.log10(1/2)
if eig < (eig_max * c_eig):
new_df_points_x_g = np.delete(
new_df_points_x_g, index, axis=0)
message = 'Deleted a point based on EIG.'
#logging.log(21, message)
else:
index = index + 1
# NOT IN USE AT THE MOMENT.
# Combine the two criteria.
# np.append(new_df_points_x_g, new_df_points_x_u, axis = 0)
new_df_points_x = new_df_points_x_g
if new_df_points_x.shape[0] > 0:
result = pd.DataFrame(new_df_points_x,
columns=acq_fun_params['df_input_var'])
if df_data_coll_params['use_model'] == False:
# Request humans to give feedback from the specified samples.
print('Give feedback on sample quality of these samples:\n',
result)
else:
# Add empty DataFrame
result = pd.DataFrame(columns=acq_fun_params['df_input_var'])
return result