1 /* _______________________________________________________________________
2
3 PECOS: Parallel Environment for Creation Of Stochastics
4 Copyright (c) 2011, Sandia National Laboratories.
5 This software is distributed under the GNU Lesser General Public License.
6 For more information, see the README file in the top Pecos directory.
7 _______________________________________________________________________ */
8
9 #include "LHSDriver.hpp"
10 #include "BoostRNG_Monostate.hpp"
11 #include "RangeVariable.hpp"
12 #include "SetVariable.hpp"
13 #include "DiscreteSetRandomVariable.hpp"
14 #include "IntervalRandomVariable.hpp"
15 #include "HistogramBinRandomVariable.hpp"
16
17 static const char rcsId[]="@(#) $Id: LHSDriver.cpp 5248 2008-09-05 18:51:52Z wjbohnh $";
18
19 //#define DEBUG
20
21 #ifdef HAVE_LHS
22 #ifdef HAVE_CONFIG_H
23
24 // Use the classic, autotools Fortran name mangling macros in pecos_config.h
25 #define LHS_INIT_MEM_FC FC_FUNC_(lhs_init_mem,LHS_INIT_MEM)
26 #define LHS_PREP_FC FC_FUNC_(lhs_prep,LHS_PREP)
27 #define LHS_RUN_FC FC_FUNC_(lhs_run,LHS_RUN)
28 #define LHS_CLOSE_FC FC_FUNC_(lhs_close,LHS_CLOSE)
29 #define LHS_OPTIONS2_FC FC_FUNC_(lhs_options2,LHS_OPTIONS2)
30 #define LHS_DIST2_FC FC_FUNC_(lhs_dist2,LHS_DIST2)
31 #define LHS_UDIST2_FC FC_FUNC_(lhs_udist2,LHS_UDIST2)
32 #define LHS_CONST2_FC FC_FUNC_(lhs_const2,LHS_CONST2)
33 #define LHS_CORR2_FC FC_FUNC_(lhs_corr2,LHS_CORR2)
34 #define LHS_FILES2_FC FC_FUNC_(lhs_files2,LHS_FILES2)
35
36 #define defaultrnum1 FC_FUNC(defaultrnum1,DEFAULTRNUM1)
37 #define defaultrnum2 FC_FUNC(defaultrnum2,DEFAULTRNUM2)
38
39 #else
40
41 // Use the CMake-generated PREFIXED, fortran name mangling macros (no warnings)
42 #define LHS_INIT_MEM_FC LHS_GLOBAL_(lhs_init_mem,LHS_INIT_MEM)
43 #define LHS_PREP_FC LHS_GLOBAL_(lhs_prep,LHS_PREP)
44 #define LHS_RUN_FC LHS_GLOBAL_(lhs_run,LHS_RUN)
45 #define LHS_CLOSE_FC LHS_GLOBAL_(lhs_close,LHS_CLOSE)
46 //#define LHS_OPTIONS2_FC LHS_GLOBAL_(lhs_options2,LHS_OPTIONS2)
47 //#define LHS_DIST2_FC LHS_GLOBAL_(lhs_dist2,LHS_DIST2)
48 //#define LHS_UDIST2_FC LHS_GLOBAL_(lhs_udist2,LHS_UDIST2)
49 //#define LHS_CONST2_FC LHS_GLOBAL_(lhs_const2,LHS_CONST2)
50 //#define LHS_CORR2_FC LHS_GLOBAL_(lhs_corr2,LHS_CORR2)
51 //#define LHS_FILES2_FC LHS_GLOBAL_(lhs_files2,LHS_FILES2)
52 #define defaultrnum1 LHS_GLOBAL(defaultrnum1,DEFAULTRNUM1)
53 #define defaultrnum2 LHS_GLOBAL(defaultrnum2,DEFAULTRNUM2)
54
55 // BMA (20160315): Changed to use Fortran 2003 ISO C bindings.
56 // The Fortran symbol will be lowercase with same name as if in C
57 #define LHS_OPTIONS2_FC lhs_options2
58 #define LHS_DIST2_FC lhs_dist2
59 #define LHS_UDIST2_FC lhs_udist2
60 #define LHS_CONST2_FC lhs_const2
61 #define LHS_CORR2_FC lhs_corr2
62 #define LHS_FILES2_FC lhs_files2
63
64 #endif // HAVE_CONFIG_H
65
66 extern "C" {
67
68 // for these functions, a straight interface to F90 can be used
69 void LHS_INIT_MEM_FC( int& obs, int& seed, int& max_obs, int& max_samp_size,
70 int& max_var, int& max_interval, int& max_corr,
71 int& max_table, int& print_level, int& output_width,
72 int& ierror );
73
74 void LHS_PREP_FC( int& ierror, int& num_names, int& num_vars );
75
76 void LHS_RUN_FC( int& max_var, int& max_obs, int& max_names, int& ierror,
77 char* dist_names, int* name_order, Pecos::Real* ptvals,
78 int& num_names, Pecos::Real* sample_matrix, int& num_vars,
79 Pecos::Real* rank_matrix, int& rank_flag );
80
81 void LHS_CLOSE_FC( int& ierror );
82
83 // for these functions, bridge-function interfaces to F90 are required
84 void LHS_OPTIONS2_FC( int& num_replications, int& ptval_option,
85 const char* sampling_options, int& ierror );
86
87 void LHS_DIST2_FC( const char* label, int& ptval_flag, Pecos::Real& ptval,
88 const char* dist_type, Pecos::Real* dist_params,
89 int& num_params, int& ierror, int& dist_id, int& ptval_id );
90
91 void LHS_UDIST2_FC( const char* label, int& ptval_flag, Pecos::Real& ptval,
92 const char* dist_type, int& num_pts, Pecos::Real* x,
93 Pecos::Real* y, int& ierror, int& dist_id, int& ptval_id );
94
95 void LHS_CONST2_FC( const char* label, Pecos::Real& ptval, int& ierror,
96 int& ptval_id );
97
98 void LHS_CORR2_FC( char* label1, char* label2, Pecos::Real& corr, int& ierror );
99
100 void LHS_FILES2_FC( const char* lhsout, const char* lhsmsg, const char* lhstitl,
101 const char* lhsopts, int& ierror );
102
103 //void lhs_run2( int* max_var, int* max_obs, int* max_names, int* ierror,
104 // char* dist_names, int* name_order, double* ptvals,
105 // int* num_names, double* sample_matrix, int* num_vars );
106
107 Pecos::Real defaultrnum1( void );
108 Pecos::Real defaultrnum2( void );
109
110 //Pecos::Real rnumlhs10( void );
111 //Pecos::Real rnumlhs20( void );
112
113 }
114 #endif // HAVE_LHS
115
116 namespace Pecos {
117
118
abort_if_no_lhs()119 void LHSDriver::abort_if_no_lhs()
120 {
121 #ifndef HAVE_LHS
122 PCerr << "Error: LHSDriver not available as PECOS was configured without LHS."
123 << std::endl;
124 abort_handler(-1);
125 #endif
126 }
127
128
seed(int seed)129 void LHSDriver::seed(int seed)
130 {
131 randomSeed = seed;
132 // The Boost RNG is not set by LHS_INIT_MEM, so must be done here.
133 if (BoostRNG_Monostate::randomNum == BoostRNG_Monostate::mt19937)
134 BoostRNG_Monostate::seed(seed);
135 // This would be redundant since the f77 ISeed is set in LHS_INIT_MEM:
136 //else
137 // lhs_setseed(&seed);
138 }
139
140
rng(String unif_gen)141 void LHSDriver::rng(String unif_gen)
142 {
143
144 // check the environment (once only) for an RNG preference and cache it
145 static bool first_entry = true;
146 static const char *env_unifgen;
147 if (first_entry) {
148 env_unifgen = std::getenv("DAKOTA_LHS_UNIFGEN");
149 first_entry = false;
150 }
151
152 // the environment overrides the passed rng specification
153 if (env_unifgen) {
154 unif_gen = env_unifgen;
155 if (unif_gen != "rnum2" && unif_gen != "mt19937") {
156 PCerr << "Error: LHSDriver::rng() expected $DAKOTA_LHS_UNIFGEN to be "
157 << "\"rnum2\" or \"mt19937\", not \"" << env_unifgen << "\".\n"
158 << std::endl;
159 abort_handler(-1);
160 }
161 }
162
163 // now point the monostate RNG to the desired generator function
164 if (unif_gen == "mt19937" || unif_gen.empty()) {
165 BoostRNG_Monostate::randomNum = BoostRNG_Monostate::mt19937;
166 BoostRNG_Monostate::randomNum2 = BoostRNG_Monostate::mt19937;
167 allowSeedAdvance &= ~2; // drop 2 bit: disallow repeated seed update
168 }
169 else if (unif_gen == "rnum2") {
170 #ifdef HAVE_LHS
171 BoostRNG_Monostate::randomNum = (Rfunc)defaultrnum1;
172 BoostRNG_Monostate::randomNum2 = (Rfunc)defaultrnum2;
173 allowSeedAdvance |= 2; // add 2 bit: allow repeated seed update
174 #else
175 PCerr << "Error: LHSDriver::rng() Use of rnum2 for RNG selection is NOT "
176 << "supported in current (without-lhs) configuration" << std::endl;
177 abort_handler(-1);
178 #endif
179 }
180 else {
181 PCerr << "Error: LHSDriver::rng() expected string to be \"rnum2\" or "
182 << "\"mt19937\", not \"" << unif_gen << "\".\n" << std::endl;
183 abort_handler(-1);
184 }
185 }
186
187
188 //String LHSDriver::rng()
189 //{
190 // if (BoostRNG_Monostate::randomNum == BoostRNG_Monostate::random_num1)
191 // return "mt19937";
192 // if (BoostRNG_Monostate::randomNum == (Rfunc)rnumlhs10)
193 // return "rnum2";
194 // else
195 // return "";
196 //}
197
198
199 void LHSDriver::
lhs_dist_register(const char * var_name,const char * dist_name,size_t rv,const RealArray & dist_params)200 lhs_dist_register(const char* var_name, const char* dist_name, size_t rv,
201 const RealArray& dist_params)
202 {
203 String dist_string; f77name32(dist_name, dist_string);
204 String& var_string = lhsNames[rv]; f77name16(var_name, rv, var_string);
205 int num_params = dist_params.size(), err_code = 0, ptval_flag = 0, // inputs
206 dist_num, pv_num; // outputs (not used)
207 Real ptval = 0.;
208
209 LHS_DIST2_FC(var_string.data(), ptval_flag, ptval, dist_string.data(),
210 const_cast<Real*>(&dist_params[0]), num_params, err_code,
211 dist_num, pv_num);
212 check_error(err_code, "lhs_dist()", var_string.data());
213 }
214
215
216 void LHSDriver::
lhs_udist_register(const char * var_name,const char * dist_name,size_t rv,const RealArray & x_val,const RealArray & y_val)217 lhs_udist_register(const char* var_name, const char* dist_name, size_t rv,
218 const RealArray& x_val, const RealArray& y_val)
219 {
220 String dist_string; f77name32(dist_name, dist_string);
221 String& var_string = lhsNames[rv]; f77name16(var_name, rv, var_string);
222 int num_params = std::min(x_val.size(), y_val.size()), err_code = 0,
223 ptval_flag = 0, dist_num, pv_num;
224 Real ptval = 0.;
225
226 LHS_UDIST2_FC(var_string.data(), ptval_flag, ptval, dist_string.data(),
227 num_params, const_cast<Real*>(&x_val[0]),
228 const_cast<Real*>(&y_val[0]), err_code, dist_num, pv_num);
229 check_error(err_code, "lhs_udist()", var_string.data());
230 }
231
232
lhs_const_register(const char * var_name,size_t rv,Real pt_val)233 void LHSDriver::lhs_const_register(const char* var_name, size_t rv, Real pt_val)
234 {
235 String& var_string = lhsNames[rv]; f77name16(var_name, rv, var_string);
236 int err_code = 0, pv_num;
237
238 LHS_CONST2_FC(var_string.data(), pt_val, err_code, pv_num);
239 check_error(err_code, "lhs_const()", var_string.data());
240 }
241
242
243 /** While it would be desirable in some cases to carve this function
244 into smaller parts and allow multiple invocations of LHS_RUN
245 following a single initialization of types and arrays, the LHS
246 code does not currently allow this: it will return an error if
247 LHS_INIT/LHS_INIT_MEM, at least one distribution call (i.e.,
248 LHS_DIST, LHS_UDIST or LHS_SDIST), and LHS_PREP are not called
249 prior to each invocation of LHS_RUN. Since LHS_INIT/LHS_INIT_MEM
250 require input of a seed, the approach to computing multiple
251 distinct sample sets must employ advance_seed_sequence() to
252 re-seed multiple generate_samples() calls, rather than continuing
253 an existing random number sequence. */
254 void LHSDriver::
generate_samples(const std::vector<RandomVariable> & random_vars,const RealSymMatrix & corr,int num_samples,RealMatrix & samples,RealMatrix & sample_ranks,const BitArray & active_vars,const BitArray & active_corr)255 generate_samples(const std::vector<RandomVariable>& random_vars,
256 const RealSymMatrix& corr, int num_samples,
257 RealMatrix& samples, RealMatrix& sample_ranks,
258 const BitArray& active_vars, const BitArray& active_corr)
259 {
260 #ifdef HAVE_LHS
261 // generate samples within user-specified parameter distributions
262
263 // error check on program parameters
264 if (!num_samples) {
265 PCerr << "\nError: number of samples in LHSDriver::generate_samples() must "
266 << "be nonzero." << std::endl;
267 abort_handler(-1);
268 }
269
270 // active_vars identifies the active subset of random_vars for which we will
271 // generate samples; it will vary based on sampling context.
272 size_t i, num_rv = random_vars.size(), num_active_rv, av_cntr;
273 bool subset_rv = false;
274 if (active_vars.empty())
275 num_active_rv = num_rv;
276 else {
277 num_active_rv = active_vars.count();
278 if (num_active_rv < num_rv) subset_rv = true;
279 }
280 lhsNames.resize(num_active_rv);
281
282 // active_corr identifies the subset of random_vars for which we specify
283 // correlations; it need not vary based on sampling context, potentially
284 // indicating a static subset that admits correlations in general (e.g.,
285 // cont/disc aleatory RV in the current Dakota XML spec).
286 // > Both active subsets are relative to the full RV vector (i.e., not
287 // nested subsets) such that they overlay when specifying correlations
288 // to LHS, e.g., uncertain vars are (currently) active for sampling and
289 // correlations are (always) bound to aleatory uncertain vars, so both
290 // bits must be on to call LHS_CORR2.
291 // > avoid resizing the correlation matrix (inflating with 1's on diagonal)
292 // for active sampling context: the size of the corr matrix is defined by
293 // the (static) RV subset that admits correlations (corr.numRows()
294 // == active_corr.count()). Since this is independent of the active
295 // sampling context, a subset of corr matrix could be passed to LHS_CORR2.
296 bool correlation_flag = !corr.empty(), subset_corr = false;
297 int num_corr = 0, num_active_corr = 0;
298 if (correlation_flag) {
299 if (active_corr.empty())
300 num_corr = corr.numRows();
301 else {
302 for (i=0; i<num_rv; ++i)
303 if (active_corr[i])
304 { ++num_active_corr; if (active_vars[i]) ++num_corr; }
305 if (num_active_corr < num_rv) subset_corr = true;
306 }
307 }
308
309 int max_corr_size = (num_corr > 1) ? num_corr * (num_corr - 1) / 2 : -1,
310 max_var = num_active_rv, max_samp_size = max_var * num_samples,
311 max_interval = -1, max_table = -1, err_code = 0, print_level = 0,
312 output_width = 1;
313 // randomSeed passed below propagates to ISeed in the f77 rnum2, but does
314 // not propagate to Boost RNGs (LHSDriver::seed() must be used for that).
315 LHS_INIT_MEM_FC(num_samples, randomSeed, num_samples, max_samp_size, max_var,
316 max_interval, max_corr_size, max_table, print_level,
317 output_width, err_code);
318 check_error(err_code, "lhs_init_mem");
319
320 // set sample type to either LHS (default) or random Monte Carlo (optional)
321 bool call_lhs_option = false;
322 String option_string(" ");
323 if (sampleType == "random" || sampleType == "incremental_random")
324 { option_string = "RANDOM SAMPLE "; call_lhs_option = true; }
325 // set mixing option to either restricted pairing (default) or random pairing
326 // (optional). For enforcing user-specified correlation, restricted pairing
327 // is required. And for uncorrelated variables, restricted pairing results
328 // in lower correlation values than random pairing. For these reasons, the
329 // random pairing option is not currently active, although a specification
330 // option for it could be added in the future if a use arises.
331 bool random_pairing_flag = false; // this option hard-wired off for now
332 if (!correlation_flag && random_pairing_flag)
333 { option_string += "RANDOM PAIRING"; call_lhs_option = true; }
334 // else // use default of restricted pairing
335 option_string.resize(32, ' ');
336 if (call_lhs_option) {
337 // Don't null-terminate the string since the '\0' is not used in Fortran
338 int num_replic = 1, ptval_option = 1;
339 LHS_OPTIONS2_FC(num_replic, ptval_option, option_string.data(), err_code);
340 check_error(err_code, "lhs_options");
341 }
342
343 // Create files showing distributions and associated statistics. Avoid
344 // issues with null-terminated strings from C++ (which mess up the Fortran
345 // output) by using std::string::data().
346 String output_string("LHS_samples.out");
347 output_string.resize(32, ' ');
348 String message_string("LHS_distributions.out");
349 message_string.resize(32, ' ');
350 String title_string("Pecos::LHSDriver");
351 title_string.resize(32, ' ');
352 // From the LHS manual (p. 100): LHSRPTS is used to specify which reports LHS
353 // will print in the message output file. If LHSRPTS is omitted, the message
354 // file will contain only the title, run specifications, and descriptions of
355 // the distributions sampled. If LHSRPTS is included, it must be followed by
356 // one or more of the following three additional keywords:
357 // > CORR: Print both the achieved raw correlation matrix and the achieved
358 // rank correlation matrix.
359 // > HIST: Print a text-based histogram for each random variable.
360 // > DATA: Print the complete set of all data samples and their ranks.
361 // Pecos::LHSDriver::reportFlag is set from Dakota::Iterator::subIteratorFlag,
362 // which accomplishes two things: (1) it reduces some output when generating
363 // multiple sample sets (the report files get overwritten anyway), and (2) it
364 // avoids numerical problems with generating input variable histogram plots
365 // as trust regions become small in SBO (mainly an issue before conversion of
366 // f90 LHS to double precision).
367 String options_string = (reportFlag) ? "LHSRPTS CORR HIST DATA" : " ";
368 options_string.resize(32, ' ');
369 LHS_FILES2_FC(output_string.data(), message_string.data(),
370 title_string.data(), options_string.data(), err_code);
371 check_error(err_code, "lhs_files");
372
373 //////////////////////////////////////////////////////////
374 // Register RandomVariables with lhs_{dist,udist,const} //
375 //////////////////////////////////////////////////////////
376 RealArray dist_params;
377 for (i=0, av_cntr=0; i<num_rv; ++i) {
378 if (subset_rv && !active_vars[i]) continue; // skip this RV if not active
379
380 const RandomVariable& rv_i = random_vars[i];
381 switch (rv_i.type()) {
382 case CONTINUOUS_RANGE: {
383 std::shared_ptr<RangeVariable<Real>> rv_rep =
384 std::static_pointer_cast<RangeVariable<Real>>(rv_i.random_variable_rep());
385 Real l_bnd; rv_rep->pull_parameter(CR_LWR_BND, l_bnd);
386 Real u_bnd; rv_rep->pull_parameter(CR_UPR_BND, u_bnd);
387 check_finite(l_bnd, u_bnd);
388 if (u_bnd > l_bnd) {
389 dist_params.resize(2);
390 dist_params[0] = l_bnd; dist_params[1] = u_bnd;
391 lhs_dist_register("ContRange", "uniform", av_cntr, dist_params);
392 }
393 else {
394 check_range(l_bnd, u_bnd, true); // allow equal
395 lhs_const_register("ContRange", av_cntr, (Real)l_bnd);
396 }
397 break;
398 }
399 case DISCRETE_RANGE: {
400 std::shared_ptr<RangeVariable<int>> rv_rep =
401 std::static_pointer_cast<RangeVariable<int>>(rv_i.random_variable_rep());
402 int l_bnd; rv_rep->pull_parameter(DR_LWR_BND, l_bnd);
403 int u_bnd; rv_rep->pull_parameter(DR_UPR_BND, u_bnd);
404 check_finite(l_bnd, u_bnd);
405 if (u_bnd > l_bnd) {
406 RealArray x_val, y_val;
407 int_range_to_xy_pdf(l_bnd, u_bnd, x_val, y_val);
408 lhs_udist_register("DiscRange", "discrete histogram", av_cntr,
409 x_val, y_val);
410 }
411 else {
412 check_range(l_bnd, u_bnd, true); // allow equal
413 lhs_const_register("DiscRange", av_cntr, (Real)l_bnd);
414 }
415 break;
416 }
417 case DISCRETE_SET_INT: {
418 std::shared_ptr<SetVariable<int>> rv_rep =
419 std::static_pointer_cast<SetVariable<int>>(rv_i.random_variable_rep());
420 IntSet int_set; rv_rep->pull_parameter(DSI_VALUES, int_set);
421 size_t set_size = int_set.size();
422 if (set_size > 1) {
423 RealArray x_val, y_val;
424 set_to_xy_pdf(int_set, x_val, y_val); // set values
425 lhs_udist_register("DiscSetI", "discrete histogram", av_cntr,
426 x_val, y_val);
427 }
428 else if (set_size)
429 lhs_const_register("DiscSetI", av_cntr, (Real)(*int_set.begin()));
430 break;
431 }
432 case DISCRETE_SET_STRING: {
433 std::shared_ptr<SetVariable<String>> rv_rep =
434 std::static_pointer_cast<SetVariable<String>>(rv_i.random_variable_rep());
435 StringSet str_set; rv_rep->pull_parameter(DSS_VALUES, str_set);
436 int set_size = str_set.size();
437 if (set_size > 1) {
438 RealArray x_val, y_val;
439 int_range_to_xy_pdf(0, set_size - 1, x_val, y_val); // indices
440 lhs_udist_register("DiscSetS", "discrete histogram", av_cntr,
441 x_val, y_val);
442 }
443 else if (set_size)
444 lhs_const_register("DiscSetS", av_cntr, 0.);
445 break;
446 }
447 case DISCRETE_SET_REAL: {
448 std::shared_ptr<SetVariable<Real>> rv_rep =
449 std::static_pointer_cast<SetVariable<Real>>(rv_i.random_variable_rep());
450 RealSet real_set; rv_rep->pull_parameter(DSR_VALUES, real_set);
451 size_t set_size = real_set.size();
452 if (set_size > 1) {
453 RealArray x_val, y_val;
454 set_to_xy_pdf(real_set, x_val, y_val); // set values
455 lhs_udist_register("DiscSetR", "discrete histogram", av_cntr,
456 x_val, y_val);
457 }
458 else if (set_size)
459 lhs_const_register("DiscSetR", av_cntr, *real_set.begin());
460 break;
461 }
462 case NORMAL: case STD_NORMAL:
463 dist_params.resize(2);
464 rv_i.pull_parameter(N_MEAN, dist_params[0]);
465 rv_i.pull_parameter(N_STD_DEV, dist_params[1]);
466 lhs_dist_register("Normal", "normal", av_cntr, dist_params);
467 break;
468 case BOUNDED_NORMAL:
469 dist_params.resize(4);
470 rv_i.pull_parameter(N_MEAN, dist_params[0]);
471 rv_i.pull_parameter(N_STD_DEV, dist_params[1]);
472 rv_i.pull_parameter(N_LWR_BND, dist_params[2]);
473 rv_i.pull_parameter(N_UPR_BND, dist_params[3]);
474 check_range(dist_params[2], dist_params[3], false);
475 lhs_dist_register("Normal", "bounded normal", av_cntr, dist_params);
476 break;
477 case LOGNORMAL: // LognormalRandomVariable standardizes on Lambda/Zeta
478 dist_params.resize(2);
479 rv_i.pull_parameter(LN_LAMBDA, dist_params[0]);
480 rv_i.pull_parameter(LN_ZETA, dist_params[1]);
481 lhs_dist_register("Lognormal", "lognormal-n", av_cntr, dist_params);
482 break;
483 case BOUNDED_LOGNORMAL: // BoundedLognormalRandomVariable uses Lambda/Zeta
484 dist_params.resize(4);
485 rv_i.pull_parameter(LN_LAMBDA, dist_params[0]);
486 rv_i.pull_parameter(LN_ZETA, dist_params[1]);
487 rv_i.pull_parameter(LN_LWR_BND, dist_params[2]);
488 rv_i.pull_parameter(LN_UPR_BND, dist_params[3]);
489 check_range(dist_params[2], dist_params[3], false);
490 lhs_dist_register("Lognormal", "bounded lognormal-n",av_cntr,dist_params);
491 break;
492 case UNIFORM: case STD_UNIFORM:
493 dist_params.resize(2);
494 rv_i.pull_parameter(U_LWR_BND, dist_params[0]);
495 rv_i.pull_parameter(U_UPR_BND, dist_params[1]);
496 check_range(dist_params[0], dist_params[1], false);
497 check_finite(dist_params[0], dist_params[1]);
498 lhs_dist_register("Uniform", "uniform", av_cntr, dist_params);
499 break;
500 case LOGUNIFORM:
501 dist_params.resize(2);
502 rv_i.pull_parameter(LU_LWR_BND, dist_params[0]);
503 rv_i.pull_parameter(LU_UPR_BND, dist_params[1]);
504 check_range(dist_params[0], dist_params[1], false);
505 check_finite(dist_params[0], dist_params[1]);
506 lhs_dist_register("Loguniform", "loguniform", av_cntr, dist_params);
507 break;
508 case TRIANGULAR:
509 dist_params.resize(3);
510 rv_i.pull_parameter(T_LWR_BND, dist_params[0]);
511 rv_i.pull_parameter(T_MODE, dist_params[1]);
512 rv_i.pull_parameter(T_UPR_BND, dist_params[2]);
513 check_range(dist_params[0], dist_params[2], false);
514 check_finite(dist_params[0], dist_params[2]);
515 lhs_dist_register("Triangular", "triangular", av_cntr, dist_params);
516 break;
517 case EXPONENTIAL: case STD_EXPONENTIAL: {
518 dist_params.resize(1);
519 Real e_beta; rv_i.pull_parameter(E_BETA, e_beta);
520 dist_params[0] = 1./e_beta; // convert to LHS convention
521 lhs_dist_register("Exponential", "exponential", av_cntr, dist_params);
522 break;
523 }
524 case BETA: case STD_BETA:
525 dist_params.resize(4);
526 rv_i.pull_parameter(BE_LWR_BND, dist_params[0]);
527 rv_i.pull_parameter(BE_UPR_BND, dist_params[1]);
528 rv_i.pull_parameter(BE_ALPHA, dist_params[2]);
529 rv_i.pull_parameter(BE_BETA, dist_params[3]);
530 check_range(dist_params[0], dist_params[1], false);
531 check_finite(dist_params[0], dist_params[1]);
532 lhs_dist_register("Beta", "beta", av_cntr, dist_params);
533 break;
534 case GAMMA: case STD_GAMMA: {
535 dist_params.resize(2);
536 rv_i.pull_parameter(GA_ALPHA, dist_params[0]);
537 Real ga_beta; rv_i.pull_parameter(GA_BETA, ga_beta);
538 dist_params[1] = 1./ga_beta; // convert to LHS convention
539 lhs_dist_register("Gamma", "gamma", av_cntr, dist_params);
540 break;
541 }
542 case GUMBEL:
543 dist_params.resize(2);
544 rv_i.pull_parameter(GU_ALPHA, dist_params[0]);
545 rv_i.pull_parameter(GU_BETA, dist_params[1]);
546 lhs_dist_register("Gumbel", "gumbel", av_cntr, dist_params);
547 break;
548 case FRECHET:
549 dist_params.resize(2);
550 rv_i.pull_parameter(F_ALPHA, dist_params[0]);
551 rv_i.pull_parameter(F_BETA, dist_params[1]);
552 lhs_dist_register("Frechet", "frechet", av_cntr, dist_params);
553 break;
554 case WEIBULL:
555 dist_params.resize(2);
556 rv_i.pull_parameter(W_ALPHA, dist_params[0]);
557 rv_i.pull_parameter(W_BETA, dist_params[1]);
558 lhs_dist_register("Weibull", "weibull", av_cntr, dist_params);
559 break;
560 case HISTOGRAM_BIN: {
561 std::shared_ptr<HistogramBinRandomVariable> rv_rep =
562 std::static_pointer_cast<HistogramBinRandomVariable>
563 (rv_i.random_variable_rep());
564 RealRealMap h_bin_pairs; rv_rep->pull_parameter(H_BIN_PAIRS, h_bin_pairs);
565 RealArray x_val, y_val; bins_to_xy_cdf(h_bin_pairs, x_val, y_val);
566 // Note: continuous linear accumulates CDF with first y=0 and last y=1
567 lhs_udist_register("HistBin", "continuous linear", av_cntr, x_val, y_val);
568 break;
569 }
570 case POISSON:
571 dist_params.resize(1);
572 rv_i.pull_parameter(P_LAMBDA, dist_params[0]);
573 lhs_dist_register("Poisson", "poisson", av_cntr, dist_params);
574 break;
575 case BINOMIAL: {
576 dist_params.resize(2); unsigned int num_tr;
577 rv_i.pull_parameter(BI_P_PER_TRIAL, dist_params[0]);
578 rv_i.pull_parameter(BI_TRIALS, num_tr); dist_params[1] = (Real)num_tr;
579 lhs_dist_register("Binomial", "binomial", av_cntr, dist_params);
580 break;
581 }
582 case NEGATIVE_BINOMIAL: {
583 dist_params.resize(2); unsigned int num_tr;
584 rv_i.pull_parameter(NBI_P_PER_TRIAL, dist_params[0]);
585 rv_i.pull_parameter(NBI_TRIALS, num_tr); dist_params[1] = (Real)num_tr;
586 lhs_dist_register("NegBinomial", "negative binomial",av_cntr,dist_params);
587 break;
588 }
589 case GEOMETRIC:
590 dist_params.resize(1);
591 rv_i.pull_parameter(GE_P_PER_TRIAL, dist_params[0]);
592 lhs_dist_register("Geometric", "geometric", av_cntr, dist_params);
593 break;
594 case HYPERGEOMETRIC: {
595 dist_params.resize(3); unsigned int tot_pop, num_drw, sel_pop;
596 rv_i.pull_parameter(HGE_TOT_POP, tot_pop); dist_params[0] = (Real)tot_pop;
597 rv_i.pull_parameter(HGE_DRAWN, num_drw); dist_params[1] = (Real)num_drw;
598 rv_i.pull_parameter(HGE_SEL_POP, sel_pop); dist_params[2] = (Real)sel_pop;
599 lhs_dist_register("Hypergeom", "hypergeometric", av_cntr, dist_params);
600 break;
601 }
602 case HISTOGRAM_PT_INT: {
603 std::shared_ptr<DiscreteSetRandomVariable<int>> rv_rep =
604 std::static_pointer_cast<DiscreteSetRandomVariable<int>>
605 (rv_i.random_variable_rep());
606 IntRealMap ir_map; rv_rep->pull_parameter(H_PT_INT_PAIRS, ir_map);
607 size_t map_size = ir_map.size();
608 if (map_size > 1) {
609 RealArray x_val, y_val; map_to_xy_pdf(ir_map, x_val, y_val);
610 lhs_udist_register("HistPtInt", "discrete histogram", av_cntr,
611 x_val, y_val);
612 }
613 else if (map_size)
614 lhs_const_register("HistPtInt", av_cntr, (Real)ir_map.begin()->first);
615 break;
616 }
617 case HISTOGRAM_PT_STRING: {
618 std::shared_ptr<DiscreteSetRandomVariable<String>> rv_rep =
619 std::static_pointer_cast<DiscreteSetRandomVariable<String>>
620 (rv_i.random_variable_rep());
621 StringRealMap sr_map; rv_rep->pull_parameter(H_PT_STR_PAIRS, sr_map);
622 int map_size = sr_map.size();
623 if (map_size > 1) {
624 RealArray x_val, y_val; map_indices_to_xy_pdf(sr_map, x_val, y_val);
625 lhs_udist_register("HistPtString","discrete histogram",av_cntr,
626 x_val, y_val);
627 }
628 else if (map_size)
629 lhs_const_register("HistPtString", av_cntr, 0.);
630 break;
631 }
632 case HISTOGRAM_PT_REAL: {
633 std::shared_ptr<DiscreteSetRandomVariable<Real>> rv_rep =
634 std::static_pointer_cast<DiscreteSetRandomVariable<Real>>
635 (rv_i.random_variable_rep());
636 RealRealMap rr_map; rv_rep->pull_parameter(H_PT_REAL_PAIRS, rr_map);
637 size_t map_size = rr_map.size();
638 if (map_size > 1) {
639 RealArray x_val, y_val; map_to_xy_pdf(rr_map, x_val, y_val);
640 lhs_udist_register("HistPtReal", "discrete histogram", av_cntr,
641 x_val, y_val);
642 }
643 else if (map_size)
644 lhs_const_register("HistPtReal", av_cntr, rr_map.begin()->first);
645 break;
646 }
647 case CONTINUOUS_INTERVAL_UNCERTAIN: {
648 std::shared_ptr<IntervalRandomVariable<Real>> rv_rep =
649 std::static_pointer_cast<IntervalRandomVariable<Real>>
650 (rv_i.random_variable_rep());
651 RealRealPairRealMap ci_bpa; rv_rep->pull_parameter(CIU_BPA, ci_bpa);
652 // Note: continuous linear accumulates CDF with first y=0 and last y=1
653 RealArray x_val, y_val; intervals_to_xy_cdf(ci_bpa, x_val, y_val);
654 lhs_udist_register("ContInterval", "continuous linear", av_cntr,
655 x_val, y_val);
656 break;
657 }
658 case DISCRETE_INTERVAL_UNCERTAIN: {
659 std::shared_ptr<IntervalRandomVariable<int>> rv_rep =
660 std::static_pointer_cast<IntervalRandomVariable<int>>
661 (rv_i.random_variable_rep());
662 IntIntPairRealMap di_bpa; rv_rep->pull_parameter(DIU_BPA, di_bpa);
663 IntArray i_val; RealArray x_val, y_val;
664 intervals_to_xy_pdf(di_bpa, i_val, y_val); cast_data(i_val, x_val);
665 lhs_udist_register("DiscInterval", "discrete histogram", av_cntr,
666 x_val, y_val);
667 break;
668 }
669 case DISCRETE_UNCERTAIN_SET_INT: {
670 std::shared_ptr<DiscreteSetRandomVariable<int>> rv_rep =
671 std::static_pointer_cast<DiscreteSetRandomVariable<int>>
672 (rv_i.random_variable_rep());
673 IntRealMap ir_map; rv_rep->pull_parameter(DUSI_VALUES_PROBS, ir_map);
674 size_t map_size = ir_map.size();
675 if (map_size > 1) {
676 RealArray x_val, y_val; map_to_xy_pdf(ir_map, x_val, y_val);
677 lhs_udist_register("DiscUncSetI","discrete histogram", av_cntr,
678 x_val, y_val);
679 }
680 else if (map_size)
681 lhs_const_register("DiscUncSetI", av_cntr, (Real)ir_map.begin()->first);
682 break;
683 }
684 case DISCRETE_UNCERTAIN_SET_STRING: {
685 std::shared_ptr<DiscreteSetRandomVariable<String>> rv_rep =
686 std::static_pointer_cast<DiscreteSetRandomVariable<String>>
687 (rv_i.random_variable_rep());
688 StringRealMap sr_map; rv_rep->pull_parameter(DUSS_VALUES_PROBS, sr_map);
689 int map_size = sr_map.size();
690 if (map_size > 1) {
691 RealArray x_val, y_val; map_indices_to_xy_pdf(sr_map, x_val, y_val);
692 lhs_udist_register("DiscUncSetS","discrete histogram", av_cntr,
693 x_val, y_val);
694 }
695 else if (map_size)
696 lhs_const_register("DiscUncSetS", av_cntr, 0.);
697 break;
698 }
699 case DISCRETE_UNCERTAIN_SET_REAL: {
700 std::shared_ptr<DiscreteSetRandomVariable<Real>> rv_rep =
701 std::static_pointer_cast<DiscreteSetRandomVariable<Real>>
702 (rv_i.random_variable_rep());
703 RealRealMap rr_map; rv_rep->pull_parameter(DUSR_VALUES_PROBS, rr_map);
704 size_t map_size = rr_map.size();
705 if (map_size > 1) {
706 RealArray x_val, y_val; map_to_xy_pdf(rr_map, x_val, y_val);
707 lhs_udist_register("DiscUncSetR","discrete histogram", av_cntr,
708 x_val, y_val);
709 }
710 else if (map_size)
711 lhs_const_register("DiscUncSetR", av_cntr, rr_map.begin()->first);
712 break;
713 }
714 }
715 ++av_cntr;
716 }
717
718 /////////////////////////////////////////
719 // Register correlations with lhs_corr //
720 /////////////////////////////////////////
721 // specify the rank correlations among the RV. Only non-zero values in the
722 // lower triangular portion of the rank correlation matrix are specified.
723 if (correlation_flag) {
724 // Spec order: {cdv, ddv}, {cauv, dauv, corr}, {ceuv, deuv}, {csv, dsv}
725 // > pass in bit array for active RV's to sample + another for active corr's
726 // > Default empty arrays --> all RVs active; corr matrix applies to all RVs
727 size_t j, ac_cntr_i, ac_cntr_j, av_cntr_i, av_cntr_j;
728 bool av_i, av_j, ac_i, ac_j;
729 for (i=0, ac_cntr_i=0, av_cntr_i=0; i<num_rv; ++i) {
730 av_i = (!subset_rv || active_vars[i]);
731 ac_i = (!subset_corr || active_corr[i]);
732 if (av_i && ac_i) {
733 for (j=0, ac_cntr_j=0, av_cntr_j=0; j<i; ++j) {
734 av_j = (!subset_rv || active_vars[j]);
735 ac_j = (!subset_corr || active_corr[j]);
736 if (av_j && ac_j) {
737 Real corr_val = corr(ac_cntr_i, ac_cntr_j);
738 if (std::abs(corr_val) > SMALL_NUMBER) {
739 LHS_CORR2_FC(const_cast<char*>(lhsNames[av_cntr_i].data()),
740 const_cast<char*>(lhsNames[av_cntr_j].data()),
741 corr_val, err_code);
742 check_error(err_code, "lhs_corr");
743 }
744 }
745 if (av_j) ++av_cntr_j;
746 if (ac_j) ++ac_cntr_j;
747 }
748 }
749 if (av_i) ++av_cntr_i;
750 if (ac_i) ++ac_cntr_i;
751 }
752 }
753
754 /////////////////////
755 // RUN THE SAMPLER //
756 /////////////////////
757 // perform internal checks on input to LHS
758 int num_nam = num_rv, num_var = num_rv;
759 LHS_PREP_FC(err_code, num_nam, num_var);
760 check_error(err_code, "lhs_prep");
761
762 // allocate the memory to hold samples, pt values, variable names, etc.
763 int* index_list = new int [num_nam]; // output
764 Real* ptval_list = new Real [num_nam]; // output
765 char* dist_name_list = new char [16*num_nam]; // output
766 // dist_name_list is a bit tricky since the f90 array is declared as
767 // CHARACTER(LEN=16) :: LSTNAME(MAXNAM), which would seem to be a
768 // noncontiguous memory model. However, a char** does not map correctly to
769 // f90. Rather, f90 takes the contiguous memory block from the C++ char*
770 // allocation and indexes into it as if it were a vector of 16 char arrays
771 // arranged head to tail.
772
773 // The matrix of parameter samples from Fortran 90 is arranged in column-major
774 // order with all variables for sample 1, followed by all variables for
775 // sample 2, etc. Teuchos::SerialDenseMatrix using column-major memory layout
776 // as well, so use samples(var#,sample#) or samples[sample#][var#] for access.
777 if (samples.numRows() != num_var || samples.numCols() != num_samples)
778 samples.shapeUninitialized(num_var, num_samples);
779 if (sampleRanksMode && sample_ranks.empty()) {
780 if (sampleRanksMode == SET_RANKS || sampleRanksMode == SET_GET_RANKS) {
781 PCerr << "Error: empty sample ranks array cannot be set in Pecos::"
782 << "LHSDriver::get_parameter_sets()" << std::endl;
783 abort_handler(-1);
784 }
785 else if (sampleRanksMode == GET_RANKS)
786 sample_ranks.shapeUninitialized(num_var, num_samples);
787 }
788
789 // generate the samples
790 int rflag = sampleRanksMode; // short -> int
791 LHS_RUN_FC(max_var, num_samples, num_nam, err_code, dist_name_list,
792 index_list, ptval_list, num_nam, samples.values(), num_var,
793 sample_ranks.values(), rflag);
794 check_error(err_code, "lhs_run");
795
796 // deallocate LHS memory
797 LHS_CLOSE_FC(err_code);
798 check_error(err_code, "lhs_close");
799
800 // clean up memory
801 delete [] index_list;
802 delete [] ptval_list;
803 delete [] dist_name_list;
804 #endif // HAVE_LHS
805 }
806
807
808 void LHSDriver::
generate_unique_samples(const std::vector<RandomVariable> & random_vars,const RealSymMatrix & corr,int num_samples,RealMatrix & samples,RealMatrix & sample_ranks,const BitArray & active_vars,const BitArray & active_corr)809 generate_unique_samples(const std::vector<RandomVariable>& random_vars,
810 const RealSymMatrix& corr, int num_samples,
811 RealMatrix& samples, RealMatrix& sample_ranks,
812 const BitArray& active_vars,
813 const BitArray& active_corr)
814 {
815 // ***************************************************************************
816 // IMPORTANT NOTE: this heuristic approach emphasizes having unique discrete
817 // samples irregardless of their relative probability. As such, ANY STATISTICS
818 // GENERATED FROM THESE SAMPLES WILL BE INVALID --> routine should only be
819 // used for generating space-filling designs, e.g. for building surrogates
820 // or approximating epistemic bounds.
821 // ***************************************************************************
822 // Approach 1 (current): accept a new sample if unique in cont+disc space.
823 // > if mixed, first aggregate sample will be accepted with strata intact
824 // but discrete stratification will not be enhanced.
825 // > if discrete only, iteration will occur irregardless of strata/probs.
826 // Approach 2 (original): accept a new sample if unique in discrete subset.
827 // > if mixed or discrete, iteration will occur irregardless of strata/probs
828 // and accepted samples are only from newly generated sets.
829 // Approach 3 (TO DO?): preserve cont strata from a single cont+disc sample
830 // --> backfill only the disc subset with generated sets of discrete samples.
831 // > This approach preserves cont strata; but ignores disc strata/probs
832 // ***************************************************************************
833
834 // determine if the RV set has a finite set of possible sample points
835 size_t i, num_rv = random_vars.size();//, num_finite_dv = 0;
836 bool finite_combinations = true, no_mask = active_vars.empty();
837 size_t num_av = (no_mask) ? num_rv : active_vars.count();
838 // track discrete variables that have finite support
839 for (i=0; i<num_rv; ++i) {
840 if (no_mask || active_vars[i]) {
841 switch (random_vars[i].type()) {
842 case DISCRETE_RANGE:
843 case DISCRETE_SET_INT: case DISCRETE_SET_STRING: case DISCRETE_SET_REAL:
844 case BINOMIAL: case HYPERGEOMETRIC:
845 case HISTOGRAM_PT_INT: case HISTOGRAM_PT_STRING: case HISTOGRAM_PT_REAL:
846 case DISCRETE_INTERVAL_UNCERTAIN: case DISCRETE_UNCERTAIN_SET_INT:
847 case DISCRETE_UNCERTAIN_SET_STRING: case DISCRETE_UNCERTAIN_SET_REAL:
848 //++num_finite_dv; // if count needed, don't break out of for loop
849 break;
850 default: // any RV with countably/uncountably infinite support
851 finite_combinations = false; break;
852 }
853 }
854 if (!finite_combinations) break;
855 }
856
857 // if finite, compute number of possible discrete combinations
858 // > for range variables, use ub-lb+1
859 // > for BPA variables, overlay unique ranges
860 // > for {set,map} variables, use {set,map}.size()
861 // > for finite support in other discrete, compute #support pts
862 size_t max_unique = _NPOS;
863 //IntArray discrete_strata_1d; discrete_strata_1d.reserve(num_finite_dv);
864 if (finite_combinations) {
865 max_unique = 1;
866 for (i=0; i<num_rv; ++i)
867 if (no_mask || active_vars[i]) {
868 const RandomVariable& rv_i = random_vars[i];
869 switch (rv_i.type()) {
870 // discrete design, state
871 case DISCRETE_RANGE: {
872 int l_bnd; rv_i.pull_parameter(DR_LWR_BND, l_bnd);
873 int u_bnd; rv_i.pull_parameter(DR_UPR_BND, u_bnd);
874 //discrete_strata_1d.push_back(u_bnd - l_bnd + 1);
875 max_unique *= u_bnd - l_bnd + 1; break;
876 }
877 case DISCRETE_SET_INT: {
878 std::shared_ptr<SetVariable<int>> rv_rep =
879 std::static_pointer_cast<SetVariable<int>>
880 (rv_i.random_variable_rep());
881 IntSet i_set; rv_rep->pull_parameter(DSI_VALUES, i_set);
882 //discrete_strata_1d.push_back(i_set.size());
883 max_unique *= i_set.size(); break;
884 }
885 case DISCRETE_SET_STRING: {
886 std::shared_ptr<SetVariable<String>> rv_rep =
887 std::static_pointer_cast<SetVariable<String>>
888 (rv_i.random_variable_rep());
889 StringSet s_set; rv_rep->pull_parameter(DSS_VALUES, s_set);
890 //discrete_strata_1d.push_back(s_set.size());
891 max_unique *= s_set.size(); break;
892 }
893 case DISCRETE_SET_REAL: {
894 std::shared_ptr<SetVariable<Real>> rv_rep =
895 std::static_pointer_cast<SetVariable<Real>>
896 (rv_i.random_variable_rep());
897 RealSet r_set; rv_rep->pull_parameter(DSR_VALUES, r_set);
898 //discrete_strata_1d.push_back(r_set.size());
899 max_unique *= r_set.size(); break;
900 }
901 // discrete aleatory uncertain
902 case BINOMIAL: { // finite support
903 unsigned int num_tr; rv_i.pull_parameter(BI_TRIALS, num_tr);
904 //discrete_strata_1d.push_back(1 + num_tr);
905 max_unique *= 1 + num_tr; break;
906 }
907 case HYPERGEOMETRIC: { // finite support
908 unsigned int tot_p; rv_i.pull_parameter(HGE_TOT_POP, tot_p);
909 unsigned int sel_p; rv_i.pull_parameter(HGE_SEL_POP, sel_p);
910 unsigned int num_d; rv_i.pull_parameter(HGE_DRAWN, num_d);
911 //discrete_strata_1d.push_back(1 + std::min(num_d, sel_p) -
912 // std::max(0, sel_p + num_d - tot_p));
913 max_unique *= 1 + std::min(num_d, sel_p) + tot_p
914 - std::max(tot_p, sel_p + num_d); // care with unsigned
915 break;
916 }
917 case HISTOGRAM_PT_INT: {
918 std::shared_ptr<DiscreteSetRandomVariable<int>> rv_rep =
919 std::static_pointer_cast<DiscreteSetRandomVariable<int>>
920 (rv_i.random_variable_rep());
921 IntRealMap ir_map; rv_rep->pull_parameter(H_PT_INT_PAIRS, ir_map);
922 //discrete_strata_1d.push_back(ir_map.size());
923 max_unique *= ir_map.size(); break;
924 }
925 case HISTOGRAM_PT_STRING: {
926 std::shared_ptr<DiscreteSetRandomVariable<String>> rv_rep =
927 std::static_pointer_cast<DiscreteSetRandomVariable<String>>
928 (rv_i.random_variable_rep());
929 StringRealMap sr_map; rv_rep->pull_parameter(H_PT_STR_PAIRS, sr_map);
930 //discrete_strata_1d.push_back(sr_map.size());
931 max_unique *= sr_map.size(); break;
932 }
933 case HISTOGRAM_PT_REAL: {
934 std::shared_ptr<DiscreteSetRandomVariable<Real>> rv_rep =
935 std::static_pointer_cast<DiscreteSetRandomVariable<Real>>
936 (rv_i.random_variable_rep());
937 RealRealMap rr_map; rv_rep->pull_parameter(H_PT_REAL_PAIRS, rr_map);
938 //discrete_strata_1d.push_back(rr_map.size());
939 max_unique *= rr_map.size(); break;
940 }
941 // discrete epistemic uncertain
942 case DISCRETE_INTERVAL_UNCERTAIN: {
943 std::shared_ptr<IntervalRandomVariable<int>> rv_rep =
944 std::static_pointer_cast<IntervalRandomVariable<int>>
945 (rv_i.random_variable_rep());
946 IntIntPairRealMap di_bpa; rv_rep->pull_parameter(DIU_BPA, di_bpa);
947 // x_sort_unique contains ALL of the unique integer values for this
948 // discrete interval variable in increasing order. For example, if
949 // there are 3 intervals for a variable and the bounds are (1,4),
950 // (3,6), and [9,10], x_sorted will be (1,2,3,4,5,6,9,10).
951 IntSet x_sort_unique;
952 for (IIPRMCIter cit=di_bpa.begin(); cit!=di_bpa.end(); ++cit) {
953 const RealRealPair& bounds = cit->first;
954 int val, u_bnd = bounds.second;
955 for (val=bounds.first; val<=u_bnd; ++val)
956 x_sort_unique.insert(val);
957 }
958 //discrete_strata_1d.push_back(x_sort_unique.size());
959 max_unique *= x_sort_unique.size(); break;
960 }
961 case DISCRETE_UNCERTAIN_SET_INT: {
962 std::shared_ptr<DiscreteSetRandomVariable<int>> rv_rep =
963 std::static_pointer_cast<DiscreteSetRandomVariable<int>>
964 (rv_i.random_variable_rep());
965 IntRealMap ir_map; rv_rep->pull_parameter(DUSI_VALUES_PROBS, ir_map);
966 //discrete_strata_1d.push_back(ir_map.size());
967 max_unique *= ir_map.size(); break;
968 }
969 case DISCRETE_UNCERTAIN_SET_STRING: {
970 std::shared_ptr<DiscreteSetRandomVariable<String>> rv_rep =
971 std::static_pointer_cast<DiscreteSetRandomVariable<String>>
972 (rv_i.random_variable_rep());
973 StringRealMap sr_map;
974 rv_rep->pull_parameter(DUSS_VALUES_PROBS, sr_map);
975 //discrete_strata_1d.push_back(sr_map.size());
976 max_unique *= sr_map.size(); break;
977 }
978 case DISCRETE_UNCERTAIN_SET_REAL: {
979 std::shared_ptr<DiscreteSetRandomVariable<Real>> rv_rep =
980 std::static_pointer_cast<DiscreteSetRandomVariable<Real>>
981 (rv_i.random_variable_rep());
982 RealRealMap rr_map; rv_rep->pull_parameter(DUSR_VALUES_PROBS, rr_map);
983 //discrete_strata_1d.push_back(rr_map.size());
984 max_unique *= rr_map.size(); break;
985 }
986 }
987 }
988 }
989
990 bool complete = false;
991 RealMatrix new_samples; RealArray new_samp_i(num_av);
992 std::set<RealArray> unique_samples;
993 size_t unique_cntr = 0, iter = 0, max_iter = 1000;
994
995 // If the number of samples requested is greater than the maximum possible
996 // number of discrete samples then we must allow replicates of the discrete
997 // variables to obtain the desired number of variables. If not then we can
998 // proceed with generating a unique set of discrete samples.
999 if (max_unique < num_samples) {
1000
1001 // Allow iteration to continue if new > old
1002 PCout << "Warning: LHS backfill was requested, but the discrete variables "
1003 << "provided\n do not have enough unique values ("
1004 << max_unique << ") to obtain the number of\n samples "
1005 << "requested. Backfill iterations will be attempted to increase\n"
1006 << " the number of unique samples until no further progress "
1007 << "is detected." << std::endl;
1008
1009 size_t unique_cntr_prev = 0;
1010 while (!complete && iter < max_iter) {
1011 generate_samples(random_vars, corr, num_samples, new_samples,
1012 sample_ranks, active_vars, active_corr);
1013
1014 // Sort real-valued samples and replace until # unique >= # requested.
1015 // > For now, don't try to replace only the discrete subset.
1016 // > Preserve original sample ordering --> don't truncate an ordered set
1017 // (omitting tail of ordered set could bias coverage).
1018 for (i=0; i<num_samples; ++i) {
1019 const Real* samp_i = new_samples[i];
1020 if (test_unique(random_vars, active_vars, samp_i, unique_samples)) {
1021 copy_data(samp_i, num_av, samples[unique_cntr++]); // append
1022 if (unique_cntr >= num_samples)
1023 { complete = true; break; }
1024 }
1025 }
1026 ++iter;
1027 if (unique_cntr == unique_cntr_prev) complete = true;
1028 else unique_cntr_prev = unique_cntr;
1029 }
1030 }
1031 else {
1032 if (samples.numRows() != num_av || samples.numCols() != num_samples)
1033 samples.shapeUninitialized(num_av, num_samples);
1034
1035 // Currently sample_ranks will always be returned empty. It should only be
1036 // filled when NonDSampling::sampleRanksMode > 0. But I cannot see anywhere
1037 // in the code where this is true.
1038 //sample_ranks.shapeUninitialized( num_vars, num_samples );
1039
1040 /*
1041 // unique index of all discrete variables if any
1042 std::set<RealArray>::iterator it;
1043 std::set<RealArray> sorted_discrete_samples;
1044 RealArray discrete_sample( num_discrete_vars );
1045
1046 // Determine the columns in samples_rm that contain discrete variables
1047 IntVector discrete_samples_map( num_discrete_vars, false );
1048 for (int i=0; i<num_discrete_vars; i++)
1049 discrete_samples_map[i]=num_continuous_vars+i;
1050
1051 int num_unique_samples = 0;
1052 */
1053
1054 // Eliminate redundant samples by resampling if necessary. Could pad
1055 // num_samples in anticipation of duplicates, but this would alter LHS
1056 // stratification that could be intended, so use num_samples for now.
1057 while (!complete && iter < max_iter) {
1058 generate_samples(random_vars, corr, num_samples, new_samples,
1059 sample_ranks, active_vars, active_corr);
1060
1061 // Sort real-valued samples and replace until # unique >= # requested.
1062 // > For now, don't try to replace only the discrete subset.
1063 // > Preserve original sample ordering --> don't truncate an ordered set
1064 // (omitting tail of ordered set could bias coverage).
1065 for (i=0; i<num_samples; ++i) {
1066 const Real* samp_i = new_samples[i];
1067 if (test_unique(random_vars, active_vars, samp_i, unique_samples)) {
1068 copy_data(samp_i, num_av, samples[unique_cntr++]); // append
1069 if (unique_cntr >= num_samples)
1070 { complete = true; break; }
1071 }
1072 }
1073 ++iter;
1074
1075 ////////////////////////////////////////
1076 /*
1077 if (initial) { // pack initial sample set
1078 for (i=0; i<num_samples; ++i) { // or matrix->set<vector> ?
1079 //PCout << "[";
1080 for (int j=0; j<num_discrete_vars; j++) {
1081 int index = discrete_samples_map[j];
1082 discrete_sample[j] = samples_rm(index,i);
1083 //PCout << discrete_sample[j] << ",";
1084 }
1085 //PCout << "]\n";
1086 sorted_discrete_samples.insert(discrete_sample);
1087 if ( sorted_discrete_samples.size() > num_unique_samples ){
1088 // copy sample into samples matrix
1089 for (int j=0; j<num_vars; j++)
1090 samples(j,num_unique_samples) = samples_rm(j,i);
1091 num_unique_samples++;
1092 }
1093 }
1094 if (num_unique_samples == num_samples) complete = true;
1095 else initial = false;
1096 }
1097 else { // backfill duplicates with new samples
1098 //PCout << num_unique_samples << "," << sorted_discrete_samples.size()
1099 // << "," << num_discrete_vars << "," << num_vars << ","
1100 // << num_continuous_vars << std::endl;
1101
1102 for (i=0; i<num_samples; ++i) {
1103 if (num_unique_samples < num_samples) {
1104 //PCout << "[";
1105 for (int j=0; j<num_discrete_vars; j++) {
1106 int index = discrete_samples_map[j];
1107 discrete_sample[j] = samples_rm(index,i);
1108 //PCout << discrete_sample[j] << ",";
1109 }
1110 //PCout << "]\n";
1111 sorted_discrete_samples.insert(discrete_sample);
1112 if ( sorted_discrete_samples.size() > num_unique_samples ){
1113 // copy sample into samples matrix
1114 for (int j=0; j<num_vars; j++)
1115 samples(j,num_unique_samples) = samples_rm(j,i);
1116 num_unique_samples++;
1117 }
1118 }
1119 else
1120 { complete = true; break; }
1121 }
1122 }
1123 */
1124 ////////////////////////////////////////
1125 }
1126 }
1127 if (unique_cntr < num_samples) {
1128 PCerr << "Warning: iterations completed with number of unique samples ("
1129 << unique_cntr << ")\n less than target (" << num_samples
1130 << ")." << std::endl;
1131 samples.reshape(num_av, unique_cntr);
1132 }
1133 }
1134
1135
1136 bool LHSDriver::
test_unique(const std::vector<RandomVariable> & random_vars,const BitArray & active_vars,const Real * new_samp,std::set<RealArray> & unique_samples)1137 test_unique(const std::vector<RandomVariable>& random_vars,
1138 const BitArray& active_vars, const Real* new_samp,
1139 std::set<RealArray>& unique_samples)
1140 {
1141 bool full_match = false; // hardwire this for right now
1142
1143 size_t num_rv = random_vars.size();
1144 bool no_mask = active_vars.empty();
1145 RealArray new_samp_v;
1146 if (full_match) {
1147 size_t num_av = (no_mask) ? num_rv : active_vars.count();
1148 new_samp_v.resize(num_av);
1149 copy_data(new_samp, num_av, new_samp_v); // vector for sorting
1150 }
1151 else { // check for discrete match
1152 size_t i, cntr = 0;
1153 for (i=0; i<num_rv; ++i)
1154 if (no_mask || active_vars[i]) {
1155 switch (random_vars[i].type()) {
1156 case DISCRETE_RANGE:
1157 case DISCRETE_SET_INT: case DISCRETE_SET_STRING: case DISCRETE_SET_REAL:
1158 case POISSON: case BINOMIAL: case NEGATIVE_BINOMIAL:
1159 case GEOMETRIC: case HYPERGEOMETRIC: case HISTOGRAM_PT_INT:
1160 case HISTOGRAM_PT_STRING: case HISTOGRAM_PT_REAL:
1161 case DISCRETE_INTERVAL_UNCERTAIN: case DISCRETE_UNCERTAIN_SET_INT:
1162 case DISCRETE_UNCERTAIN_SET_STRING: case DISCRETE_UNCERTAIN_SET_REAL:
1163 new_samp_v.push_back(new_samp[cntr]); break;
1164 }
1165 ++cntr;
1166 }
1167 }
1168 // returns pair<iterator,bool>
1169 return unique_samples.insert(new_samp_v).second;
1170 }
1171
1172 } // namespace Pecos
1173