Skip to content

Commit 5c2422d

Browse files
authored
Merge pull request NHERI-SimCenter#414 from bsaakash/master
abs - updates to surrogate-aided Bayesian calibration backend
2 parents dfe8c48 + 7f02981 commit 5c2422d

2 files changed

Lines changed: 396 additions & 154 deletions

File tree

modules/performUQ/common/adaptive_doe.py

Lines changed: 159 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,95 @@ def __init__(self, gp_model):
3939
def _scale(self, x):
4040
return self.gp_model.apply_input_scaling(x, fit=False)
4141

42+
def _safe_normalize(self, v: np.ndarray) -> np.ndarray:
43+
s = float(np.nansum(v))
44+
if not np.isfinite(s) or s <= 0:
45+
return np.full_like(v, 1.0 / max(len(v), 1))
46+
return v / s
47+
48+
def _compute_doe_weights(self) -> tuple[np.ndarray, dict]:
49+
"""
50+
Compute weights for aggregating across GP output components.
51+
52+
Return (weights, meta) for aggregating across GP output components:
53+
- PCA on -> eigenvalue weights (explained_variance_ratio)
54+
- PCA off -> per-output variance in processed training space.
55+
"""
56+
n_models = len(self.gp_model.model)
57+
58+
if self.gp_model.use_pca:
59+
pinfo = self.gp_model.pca_info
60+
ncomp = int(pinfo['n_components'])
61+
evr = np.asarray(pinfo['explained_variance_ratio'])[:ncomp].astype(float)
62+
w = self._safe_normalize(evr)
63+
if len(w) != n_models: # defensive
64+
w = np.full(n_models, 1.0 / max(n_models, 1))
65+
basis = 'uniform_fallback'
66+
basis_vals = np.ones(n_models, dtype=float)
67+
else:
68+
basis = 'pca_explained_variance_ratio'
69+
basis_vals = evr
70+
return w, {
71+
'mode': 'pca',
72+
'basis': basis,
73+
'basis_values': basis_vals.tolist(),
74+
'sum': float(np.sum(w)),
75+
'count': int(len(w)),
76+
}
77+
78+
# no PCA: weight by per-output variance in the space the GP trained on
79+
Y = getattr(self.gp_model, 'y_train', None) # noqa: N806
80+
if Y is None or Y.size == 0:
81+
w = np.full(n_models, 1.0 / max(n_models, 1))
82+
return w, {
83+
'mode': 'no_pca',
84+
'basis': 'uniform_no_training_data',
85+
'basis_values': [1.0] * n_models,
86+
'sum': float(np.sum(w)),
87+
'count': int(len(w)),
88+
}
89+
90+
if self.gp_model.output_scaler is not None:
91+
Y_proc = self.gp_model.output_scaler.transform(Y) # noqa: N806
92+
else:
93+
Y_proc = Y # noqa: N806
94+
95+
raw = np.nanvar(Y_proc, axis=0, ddof=1).astype(float)
96+
raw = np.where(np.isfinite(raw) & (raw > 0), raw, 1.0)
97+
w = self._safe_normalize(raw)
98+
99+
if len(w) != n_models: # defensive
100+
if len(w) > n_models:
101+
basis_vals = raw[:n_models]
102+
w = self._safe_normalize(basis_vals)
103+
basis = 'output_variance_truncated'
104+
else:
105+
pad = np.full(
106+
n_models - len(w), float(np.mean(raw)) if raw.size else 1.0
107+
)
108+
basis_vals = np.concatenate([raw, pad])
109+
w = self._safe_normalize(basis_vals)
110+
basis = 'output_variance_padded'
111+
else:
112+
basis = 'output_variance'
113+
basis_vals = raw
114+
115+
return w, {
116+
'mode': 'no_pca',
117+
'basis': basis,
118+
'basis_values': basis_vals.tolist(),
119+
'sum': float(np.sum(w)),
120+
'count': int(len(w)),
121+
}
122+
42123
def _hyperparameters_for_doe(self):
43124
"""
44125
Compute the weighted average of kernel hyperparameters for DoE.
45126
46127
If PCA is used, weights are based on explained variance.
47128
Otherwise, uniform averaging is used.
48129
"""
49-
if self.gp_model.use_pca:
50-
pca_info = self.gp_model.pca_info
51-
n_components = pca_info['n_components']
52-
explained_variance_ratio = np.asarray(
53-
pca_info['explained_variance_ratio']
54-
)[:n_components]
55-
if np.sum(explained_variance_ratio) == 0:
56-
w = np.full(n_components, 1.0 / n_components)
57-
else:
58-
w = explained_variance_ratio / np.sum(explained_variance_ratio)
59-
else:
60-
n_models = len(self.gp_model.model)
61-
w = np.full(n_models, 1.0 / n_models)
130+
w, _ = self._compute_doe_weights()
62131

63132
hyperparameters_matrix = [
64133
np.atleast_2d(model.kern.param_array) for model in self.gp_model.model
@@ -108,7 +177,7 @@ def select_training_points(
108177
seed=None,
109178
):
110179
"""
111-
Efficient sequential DoE using MSEw or IMSEw with vectorized acquisition.
180+
Efficient sequential DoE using MSEw (equation 10) or IMSEw (equation 9) from Taflanidis et al. (https://doi.org/10.1016/j.ymssp.2025.113014).
112181
113182
Parameters
114183
----------
@@ -119,9 +188,9 @@ def select_training_points(
119188
mci_samples : array-like
120189
Monte Carlo integration samples (original space).
121190
use_mse_w : bool
122-
Whether to use MSEw (True) or IMSEw (False).
191+
Whether to use MSE equation (10) (True) or IMSEw equation (9) (False).
123192
weights : array-like or None
124-
Optional importance weights.
193+
Optional importance weights for integration points.
125194
n_samples : int
126195
Number of candidate points to generate (IMSEw only).
127196
seed : int or None
@@ -139,121 +208,136 @@ def select_training_points(
139208

140209
selected_points = []
141210

142-
# 1. Fixed candidate pool
211+
# 1. Setup candidate pool based on acquisition function
143212
if use_mse_w:
213+
# MSE (equation 10): candidates are the integration points themselves
214+
mci_samples, weights = remove_duplicate_inputs(
215+
mci_samples,
216+
weights
217+
if weights is not None
218+
else np.ones((mci_samples.shape[0], 1)),
219+
)
144220
candidate_pool = mci_samples.copy()
145221
else:
222+
# IMSEw (equation 9): generate separate candidate pool via LHS
146223
bounds = compute_lhs_bounds(x_train, mci_samples, padding=0)
147224
candidate_pool = generate_lhs_candidates(n_samples, bounds, seed=seed)
148225

226+
# 2. Sequential selection
149227
for _ in range(n_points):
150-
# 2. Scale everything
228+
# Scale all arrays
151229
scaled_x_train = self._scale(x_train)
152230
scaled_candidates = self._scale(candidate_pool)
153231
scaled_mci_samples = self._scale(mci_samples)
154232

155-
# 3. Set GP inputs
233+
# Set GP with current training set
156234
self.gp_model_for_doe.set_XY(
157235
scaled_x_train, np.zeros((scaled_x_train.shape[0], 1))
158236
)
159237

160-
# 4. Predict variance at integration points
161-
_, pred_var = self.gp_model_for_doe.predict(scaled_mci_samples)
162-
if weights is not None:
163-
pred_var *= weights.reshape(-1, 1) # shape (n_mci, 1)
164-
165-
# 5. Vectorized acquisition
238+
# Compute acquisition values based on selected method
166239
if use_mse_w:
167-
# Just return the average variance (same for all candidates)
168-
acquisition_values = np.mean(pred_var) * np.ones(
169-
(scaled_candidates.shape[0], 1)
170-
)
240+
# MSE (equation 10): σ²(θ_new|Θ^(k))
241+
_, pred_var = self.gp_model_for_doe.predict(scaled_candidates)
242+
243+
# Apply importance weights if provided
244+
if weights is not None:
245+
pred_var *= weights.reshape(-1, 1)
246+
247+
acquisition_values = pred_var
248+
171249
else:
172-
# Covariance matrix: shape (n_mci, n_candidates)
250+
# IMSEw (equation 9): (1/N_s) * Σ[R(θ^(l), θ_new)^β * σ²(θ^(l)|Θ^(k))]
251+
252+
# Predict variance at integration points
253+
_, pred_var = self.gp_model_for_doe.predict(scaled_mci_samples)
254+
255+
# Apply importance weights to integration points
256+
if weights is not None:
257+
pred_var *= weights.reshape(-1, 1)
258+
259+
# Compute covariance K(θ^(l), θ_new)
173260
k_matrix = self.gp_model_for_doe.kern.K(
174261
scaled_mci_samples, scaled_candidates
175262
)
176263

177-
# Normalize to get correlation matrix
178-
kernel_var = self.gp_model_for_doe.kern.variance[0] # scalar
179-
corr_matrix = k_matrix / kernel_var # element-wise division
264+
# Get diagonal kernel values for covariance normalization
265+
k_diag_mci = np.diag(
266+
self.gp_model_for_doe.kern.K(scaled_mci_samples)
267+
)
268+
k_diag_candidates = np.diag(
269+
self.gp_model_for_doe.kern.K(scaled_candidates)
270+
)
271+
272+
# Correlations : R(θ^(l), θ_new)
273+
corr_matrix = k_matrix / np.sqrt(
274+
np.outer(k_diag_mci, k_diag_candidates)
275+
)
180276

277+
# Apply beta exponent
181278
beta = 2.0 * scaled_x_train.shape[1]
182-
# beta = 1.0 * scaled_x_train.shape[1]
183279

184-
# Vectorized IMSEw: (corr^beta) * pred_var → mean over MCI samples
185-
weighted_var = (
186-
corr_matrix**beta
187-
).T @ pred_var # shape (n_candidates, 1)
280+
# IMSEw computation: (corr^beta).T @ pred_var / N_s
281+
weighted_var = (corr_matrix**beta).T @ pred_var
188282
acquisition_values = weighted_var / scaled_mci_samples.shape[0]
189283

190-
# 6. Select best candidate
284+
# Select candidate with maximum acquisition value
191285
idx = np.argmax(acquisition_values)
192286
next_point = candidate_pool[idx]
193287
selected_points.append(next_point)
194288

195-
# 7. Update training set and candidate pool
289+
# Update training set and candidate pool
196290
x_train = np.vstack([x_train, next_point])
197291
candidate_pool = np.delete(candidate_pool, idx, axis=0)
198292

293+
if use_mse_w:
294+
# For MSE: maintain alignment by deleting from mci_samples too
295+
mci_samples = np.delete(mci_samples, idx, axis=0)
296+
if weights is not None:
297+
weights = np.delete(weights, idx, axis=0)
298+
# For IMSEw: keep mci_samples constant (integration domain unchanged)
299+
199300
return np.array(selected_points)
200301

201302
def write_gp_for_doe_to_json(self, filepath: str | Path):
202-
"""
203-
Write DoE GP kernel hyperparameters and contributing model param_arrays to JSON.
204-
205-
Parameters
206-
----------
207-
filepath : str or Path
208-
Output file path.
209-
"""
303+
"""Write DoE GP kernel hyperparameters and contributing model param_arrays to JSON."""
210304
if not hasattr(self, 'gp_model_for_doe'):
211305
msg = 'gp_model_for_doe has not been initialized.'
212306
raise RuntimeError(msg)
213307

214-
kernel = self.gp_model_for_doe.kern
308+
# Ensure we have the current weighted vector available
309+
if not hasattr(self, 'weighted_hyperparameters'):
310+
_ = self._hyperparameters_for_doe()
311+
312+
# Reuse the exact same weights/provenance used for aggregation
313+
weights, weights_meta = self._compute_doe_weights()
215314

216-
# Detailed hyperparameters for the DoE model
315+
kernel = self.gp_model_for_doe.kern
217316
doe_hyperparams = {
218-
param.name: {
219-
'value': param.values.tolist() # noqa: PD011
220-
if param.size > 1
221-
else float(param.values),
222-
'shape': param.shape,
317+
p.name: {
318+
'value': p.values.tolist() if p.size > 1 else float(p.values), # noqa: PD011
319+
'shape': p.shape,
223320
}
224-
for param in kernel.parameters
321+
for p in kernel.parameters
225322
}
226323

227-
# Aggregation weights
228-
if self.gp_model.use_pca:
229-
weights = np.asarray(self.gp_model.pca_info['explained_variance_ratio'])[
230-
: self.gp_model.pca_info['n_components']
231-
]
232-
weights = (weights / np.sum(weights)).tolist()
233-
else:
234-
weights = (
235-
np.ones(len(self.gp_model.model)) / len(self.gp_model.model)
236-
).tolist()
237-
238-
# Contributing models' param_arrays only
239324
contributing_param_arrays = [
240325
gp.kern.param_array.tolist() for gp in self.gp_model.model
241326
]
242327

243-
# Output structure
244328
output = {
245329
'doe_kernel_type': kernel.name,
246-
'doe_ARD': kernel.ARD,
330+
'doe_ARD': getattr(kernel, 'ARD', None),
247331
'doe_hyperparameters': doe_hyperparams,
248332
'weighted_param_array': self.weighted_hyperparameters.tolist(),
249-
'aggregation_weights': weights,
333+
'aggregation_weights': weights.tolist(),
334+
'aggregation_weights_meta': weights_meta, # provenance for reproducibility
250335
'contributing_param_arrays': contributing_param_arrays,
251336
}
252337

253-
# Write JSON file
254-
filepath = Path(filepath)
255-
filepath.parent.mkdir(parents=True, exist_ok=True)
256-
with filepath.open('w') as f:
338+
path = Path(filepath)
339+
path.parent.mkdir(parents=True, exist_ok=True)
340+
with path.open('w') as f:
257341
json.dump(uq_utilities.make_json_serializable(output), f, indent=4)
258342

259343

0 commit comments

Comments
 (0)