@@ -39,26 +39,95 @@ def __init__(self, gp_model):
3939 def _scale (self , x ):
4040 return self .gp_model .apply_input_scaling (x , fit = False )
4141
42+ def _safe_normalize (self , v : np .ndarray ) -> np .ndarray :
43+ s = float (np .nansum (v ))
44+ if not np .isfinite (s ) or s <= 0 :
45+ return np .full_like (v , 1.0 / max (len (v ), 1 ))
46+ return v / s
47+
48+ def _compute_doe_weights (self ) -> tuple [np .ndarray , dict ]:
49+ """
50+ Compute weights for aggregating across GP output components.
51+
52+ Return (weights, meta) for aggregating across GP output components:
53+ - PCA on -> eigenvalue weights (explained_variance_ratio)
54+ - PCA off -> per-output variance in processed training space.
55+ """
56+ n_models = len (self .gp_model .model )
57+
58+ if self .gp_model .use_pca :
59+ pinfo = self .gp_model .pca_info
60+ ncomp = int (pinfo ['n_components' ])
61+ evr = np .asarray (pinfo ['explained_variance_ratio' ])[:ncomp ].astype (float )
62+ w = self ._safe_normalize (evr )
63+ if len (w ) != n_models : # defensive
64+ w = np .full (n_models , 1.0 / max (n_models , 1 ))
65+ basis = 'uniform_fallback'
66+ basis_vals = np .ones (n_models , dtype = float )
67+ else :
68+ basis = 'pca_explained_variance_ratio'
69+ basis_vals = evr
70+ return w , {
71+ 'mode' : 'pca' ,
72+ 'basis' : basis ,
73+ 'basis_values' : basis_vals .tolist (),
74+ 'sum' : float (np .sum (w )),
75+ 'count' : int (len (w )),
76+ }
77+
78+ # no PCA: weight by per-output variance in the space the GP trained on
79+ Y = getattr (self .gp_model , 'y_train' , None ) # noqa: N806
80+ if Y is None or Y .size == 0 :
81+ w = np .full (n_models , 1.0 / max (n_models , 1 ))
82+ return w , {
83+ 'mode' : 'no_pca' ,
84+ 'basis' : 'uniform_no_training_data' ,
85+ 'basis_values' : [1.0 ] * n_models ,
86+ 'sum' : float (np .sum (w )),
87+ 'count' : int (len (w )),
88+ }
89+
90+ if self .gp_model .output_scaler is not None :
91+ Y_proc = self .gp_model .output_scaler .transform (Y ) # noqa: N806
92+ else :
93+ Y_proc = Y # noqa: N806
94+
95+ raw = np .nanvar (Y_proc , axis = 0 , ddof = 1 ).astype (float )
96+ raw = np .where (np .isfinite (raw ) & (raw > 0 ), raw , 1.0 )
97+ w = self ._safe_normalize (raw )
98+
99+ if len (w ) != n_models : # defensive
100+ if len (w ) > n_models :
101+ basis_vals = raw [:n_models ]
102+ w = self ._safe_normalize (basis_vals )
103+ basis = 'output_variance_truncated'
104+ else :
105+ pad = np .full (
106+ n_models - len (w ), float (np .mean (raw )) if raw .size else 1.0
107+ )
108+ basis_vals = np .concatenate ([raw , pad ])
109+ w = self ._safe_normalize (basis_vals )
110+ basis = 'output_variance_padded'
111+ else :
112+ basis = 'output_variance'
113+ basis_vals = raw
114+
115+ return w , {
116+ 'mode' : 'no_pca' ,
117+ 'basis' : basis ,
118+ 'basis_values' : basis_vals .tolist (),
119+ 'sum' : float (np .sum (w )),
120+ 'count' : int (len (w )),
121+ }
122+
42123 def _hyperparameters_for_doe (self ):
43124 """
44125 Compute the weighted average of kernel hyperparameters for DoE.
45126
46127 If PCA is used, weights are based on explained variance.
47128 Otherwise, uniform averaging is used.
48129 """
49- if self .gp_model .use_pca :
50- pca_info = self .gp_model .pca_info
51- n_components = pca_info ['n_components' ]
52- explained_variance_ratio = np .asarray (
53- pca_info ['explained_variance_ratio' ]
54- )[:n_components ]
55- if np .sum (explained_variance_ratio ) == 0 :
56- w = np .full (n_components , 1.0 / n_components )
57- else :
58- w = explained_variance_ratio / np .sum (explained_variance_ratio )
59- else :
60- n_models = len (self .gp_model .model )
61- w = np .full (n_models , 1.0 / n_models )
130+ w , _ = self ._compute_doe_weights ()
62131
63132 hyperparameters_matrix = [
64133 np .atleast_2d (model .kern .param_array ) for model in self .gp_model .model
@@ -108,7 +177,7 @@ def select_training_points(
108177 seed = None ,
109178 ):
110179 """
111- Efficient sequential DoE using MSEw or IMSEw with vectorized acquisition .
180+ Efficient sequential DoE using MSEw (equation 10) or IMSEw (equation 9) from Taflanidis et al. (https://doi.org/10.1016/j.ymssp.2025.113014) .
112181
113182 Parameters
114183 ----------
@@ -119,9 +188,9 @@ def select_training_points(
119188 mci_samples : array-like
120189 Monte Carlo integration samples (original space).
121190 use_mse_w : bool
122- Whether to use MSEw ( True) or IMSEw (False).
191+ Whether to use MSE equation (10) ( True) or IMSEw equation (9) (False).
123192 weights : array-like or None
124- Optional importance weights.
193+ Optional importance weights for integration points .
125194 n_samples : int
126195 Number of candidate points to generate (IMSEw only).
127196 seed : int or None
@@ -139,121 +208,136 @@ def select_training_points(
139208
140209 selected_points = []
141210
142- # 1. Fixed candidate pool
211+ # 1. Setup candidate pool based on acquisition function
143212 if use_mse_w :
213+ # MSE (equation 10): candidates are the integration points themselves
214+ mci_samples , weights = remove_duplicate_inputs (
215+ mci_samples ,
216+ weights
217+ if weights is not None
218+ else np .ones ((mci_samples .shape [0 ], 1 )),
219+ )
144220 candidate_pool = mci_samples .copy ()
145221 else :
222+ # IMSEw (equation 9): generate separate candidate pool via LHS
146223 bounds = compute_lhs_bounds (x_train , mci_samples , padding = 0 )
147224 candidate_pool = generate_lhs_candidates (n_samples , bounds , seed = seed )
148225
226+ # 2. Sequential selection
149227 for _ in range (n_points ):
150- # 2. Scale everything
228+ # Scale all arrays
151229 scaled_x_train = self ._scale (x_train )
152230 scaled_candidates = self ._scale (candidate_pool )
153231 scaled_mci_samples = self ._scale (mci_samples )
154232
155- # 3. Set GP inputs
233+ # Set GP with current training set
156234 self .gp_model_for_doe .set_XY (
157235 scaled_x_train , np .zeros ((scaled_x_train .shape [0 ], 1 ))
158236 )
159237
160- # 4. Predict variance at integration points
161- _ , pred_var = self .gp_model_for_doe .predict (scaled_mci_samples )
162- if weights is not None :
163- pred_var *= weights .reshape (- 1 , 1 ) # shape (n_mci, 1)
164-
165- # 5. Vectorized acquisition
238+ # Compute acquisition values based on selected method
166239 if use_mse_w :
167- # Just return the average variance (same for all candidates)
168- acquisition_values = np .mean (pred_var ) * np .ones (
169- (scaled_candidates .shape [0 ], 1 )
170- )
240+ # MSE (equation 10): σ²(θ_new|Θ^(k))
241+ _ , pred_var = self .gp_model_for_doe .predict (scaled_candidates )
242+
243+ # Apply importance weights if provided
244+ if weights is not None :
245+ pred_var *= weights .reshape (- 1 , 1 )
246+
247+ acquisition_values = pred_var
248+
171249 else :
172- # Covariance matrix: shape (n_mci, n_candidates)
250+ # IMSEw (equation 9): (1/N_s) * Σ[R(θ^(l), θ_new)^β * σ²(θ^(l)|Θ^(k))]
251+
252+ # Predict variance at integration points
253+ _ , pred_var = self .gp_model_for_doe .predict (scaled_mci_samples )
254+
255+ # Apply importance weights to integration points
256+ if weights is not None :
257+ pred_var *= weights .reshape (- 1 , 1 )
258+
259+ # Compute covariance K(θ^(l), θ_new)
173260 k_matrix = self .gp_model_for_doe .kern .K (
174261 scaled_mci_samples , scaled_candidates
175262 )
176263
177- # Normalize to get correlation matrix
178- kernel_var = self .gp_model_for_doe .kern .variance [0 ] # scalar
179- corr_matrix = k_matrix / kernel_var # element-wise division
264+ # Get diagonal kernel values for covariance normalization
265+ k_diag_mci = np .diag (
266+ self .gp_model_for_doe .kern .K (scaled_mci_samples )
267+ )
268+ k_diag_candidates = np .diag (
269+ self .gp_model_for_doe .kern .K (scaled_candidates )
270+ )
271+
272+ # Correlations : R(θ^(l), θ_new)
273+ corr_matrix = k_matrix / np .sqrt (
274+ np .outer (k_diag_mci , k_diag_candidates )
275+ )
180276
277+ # Apply beta exponent
181278 beta = 2.0 * scaled_x_train .shape [1 ]
182- # beta = 1.0 * scaled_x_train.shape[1]
183279
184- # Vectorized IMSEw: (corr^beta) * pred_var → mean over MCI samples
185- weighted_var = (
186- corr_matrix ** beta
187- ).T @ pred_var # shape (n_candidates, 1)
280+ # IMSEw computation: (corr^beta).T @ pred_var / N_s
281+ weighted_var = (corr_matrix ** beta ).T @ pred_var
188282 acquisition_values = weighted_var / scaled_mci_samples .shape [0 ]
189283
190- # 6. Select best candidate
284+ # Select candidate with maximum acquisition value
191285 idx = np .argmax (acquisition_values )
192286 next_point = candidate_pool [idx ]
193287 selected_points .append (next_point )
194288
195- # 7. Update training set and candidate pool
289+ # Update training set and candidate pool
196290 x_train = np .vstack ([x_train , next_point ])
197291 candidate_pool = np .delete (candidate_pool , idx , axis = 0 )
198292
293+ if use_mse_w :
294+ # For MSE: maintain alignment by deleting from mci_samples too
295+ mci_samples = np .delete (mci_samples , idx , axis = 0 )
296+ if weights is not None :
297+ weights = np .delete (weights , idx , axis = 0 )
298+ # For IMSEw: keep mci_samples constant (integration domain unchanged)
299+
199300 return np .array (selected_points )
200301
201302 def write_gp_for_doe_to_json (self , filepath : str | Path ):
202- """
203- Write DoE GP kernel hyperparameters and contributing model param_arrays to JSON.
204-
205- Parameters
206- ----------
207- filepath : str or Path
208- Output file path.
209- """
303+ """Write DoE GP kernel hyperparameters and contributing model param_arrays to JSON."""
210304 if not hasattr (self , 'gp_model_for_doe' ):
211305 msg = 'gp_model_for_doe has not been initialized.'
212306 raise RuntimeError (msg )
213307
214- kernel = self .gp_model_for_doe .kern
308+ # Ensure we have the current weighted vector available
309+ if not hasattr (self , 'weighted_hyperparameters' ):
310+ _ = self ._hyperparameters_for_doe ()
311+
312+ # Reuse the exact same weights/provenance used for aggregation
313+ weights , weights_meta = self ._compute_doe_weights ()
215314
216- # Detailed hyperparameters for the DoE model
315+ kernel = self . gp_model_for_doe . kern
217316 doe_hyperparams = {
218- param .name : {
219- 'value' : param .values .tolist () # noqa: PD011
220- if param .size > 1
221- else float (param .values ),
222- 'shape' : param .shape ,
317+ p .name : {
318+ 'value' : p .values .tolist () if p .size > 1 else float (p .values ), # noqa: PD011
319+ 'shape' : p .shape ,
223320 }
224- for param in kernel .parameters
321+ for p in kernel .parameters
225322 }
226323
227- # Aggregation weights
228- if self .gp_model .use_pca :
229- weights = np .asarray (self .gp_model .pca_info ['explained_variance_ratio' ])[
230- : self .gp_model .pca_info ['n_components' ]
231- ]
232- weights = (weights / np .sum (weights )).tolist ()
233- else :
234- weights = (
235- np .ones (len (self .gp_model .model )) / len (self .gp_model .model )
236- ).tolist ()
237-
238- # Contributing models' param_arrays only
239324 contributing_param_arrays = [
240325 gp .kern .param_array .tolist () for gp in self .gp_model .model
241326 ]
242327
243- # Output structure
244328 output = {
245329 'doe_kernel_type' : kernel .name ,
246- 'doe_ARD' : kernel . ARD ,
330+ 'doe_ARD' : getattr ( kernel , ' ARD' , None ) ,
247331 'doe_hyperparameters' : doe_hyperparams ,
248332 'weighted_param_array' : self .weighted_hyperparameters .tolist (),
249- 'aggregation_weights' : weights ,
333+ 'aggregation_weights' : weights .tolist (),
334+ 'aggregation_weights_meta' : weights_meta , # provenance for reproducibility
250335 'contributing_param_arrays' : contributing_param_arrays ,
251336 }
252337
253- # Write JSON file
254- filepath = Path (filepath )
255- filepath .parent .mkdir (parents = True , exist_ok = True )
256- with filepath .open ('w' ) as f :
338+ path = Path (filepath )
339+ path .parent .mkdir (parents = True , exist_ok = True )
340+ with path .open ('w' ) as f :
257341 json .dump (uq_utilities .make_json_serializable (output ), f , indent = 4 )
258342
259343
0 commit comments