1# _______________________________________________________________________ 2# 3# DAKOTA: Design Analysis Kit for Optimization and Terascale Applications 4# Copyright 2014 Sandia Corporation. 5# This software is distributed under the GNU Lesser General Public License. 6# For more information, see the README file in the top Dakota directory. 7# _______________________________________________________________________ 8 9 10import os 11import subprocess 12import re 13import copy 14from collections import OrderedDict 15import numpy as np 16 17## Capture the output here, once. 18__OUTPUT = "" 19 20## Directory in the build tree that contains dakota and dakota_restart_util 21__BIN_DIR = "" 22 23def extract_moments(): 24 """Extract the moments from the global __OUTPUT 25 26 Returns: The moments structured as a list of dictionaries. 27 The items in the list are for executions, and the 28 key, value pairs in the dictionary are the response 29 descriptors and an numpy array of the moments 30 """ 31 global __OUTPUT 32 moments = [] 33 lines_iter = iter(__OUTPUT) 34 for line in lines_iter: 35 if line.startswith("Sample moment statistics"): 36 next(lines_iter) 37 moments.append({}) 38 moments_line = next(lines_iter).strip() 39 while(moments_line): 40 tokens = moments_line.split() 41 resp_desc = tokens[0] 42 moments_values = np.array([float(t) for t in tokens[1:]]) 43 moments[-1][resp_desc] = moments_values 44 moments_line = next(lines_iter).strip() 45 return moments 46 47def extract_moment_confidence_intervals(): 48 49 """Extract the moment confidence intervals 50 51 Returns: The confidence intervals structured as a list 52 of dictionaries. The keys of the dictionary are the 53 responses, and the values are a 2D list; 0th dimenion are 54 lower and upper, 1st dimenion are mean and standard deviation 55 """ 56 57 global __OUTPUT 58 cis = [] 59 lines_iter = iter(__OUTPUT) 60 for line in lines_iter: 61 if line.startswith("95% confidence intervals"): 62 cis.append({}) 63 nline = next(lines_iter) # discard header 64 nline = next(lines_iter).strip() # first line of data 65 while nline: 66 tokens = nline.split() 67 response = tokens[0] 68 row_values = [float(v) for v in tokens[1:]] 69 values = [] 70 values.append( [row_values[0], row_values[2]]) 71 values.append( [row_values[1], row_values[3]]) 72 cis[-1][response] = values 73 nline = next(lines_iter).strip() 74 return cis 75 76def extract_equiv_num_hf_evals(): 77 """Extract the equivalent number of high fideltiy evals 78 79 Returns a floating point value. 80 """ 81 global __OUTPUT 82 lines_iter = iter(__OUTPUT) 83 for line in lines_iter: 84 if line.startswith("<<<<< Equivalent number of high fidelity evaluations:"): 85 msg, val = line.split(':') 86 return float(val) 87 return None 88 89def extract_pdfs(): 90 """Extract the PDFs from the global __OUTPUT 91 92 Returns: The PDFs with lower and upper bins structured 93 as a list of dictionaries. The items in the list 94 are for executions, and the key, value pairs in the 95 dictionaries are the response descriptors and 2D lists 96 of the lower and upper bounds and the densities 97 with dimension (num_bins, 3) 98 """ 99 global __OUTPUT 100 pdfs = [] 101 lines_iter = iter(__OUTPUT) 102 for line in lines_iter: 103 if line.startswith("Probability Density Function (PDF)"): 104 pdfs.append({}) 105 nline = next(lines_iter).strip() # get either 'PDF for <descriptor>:" or a blank line 106 while nline: # Loop over the responses 107 desc = nline.split()[-1][:-1] 108 pdf_for_resp = [] 109 next(lines_iter) # Skip heading "Bin Lower..." 110 next(lines_iter) # Skip heading "----..." 111 nline = next(lines_iter) # Get the first line of data for this response 112 while not nline.startswith("PDF for") and nline: # loop over data 113 values = [float(t) for t in nline.split()] 114 pdf_for_resp.append(values) 115 nline = next(lines_iter).strip() 116 pdfs[-1][desc] = pdf_for_resp 117 return pdfs 118 119def extract_level_mapping_row(line): 120 """Tokenize one row of a level mappings table. 121 122 line: String containing a row 123 124 returns: A list of length 4 containing the table entries. 125 Blank table entries contain None. 126 """ 127 tokens = line.split(None, 1) 128 tlen = len(tokens[0]) 129 precision = tlen - 7 if tokens[0] == '-' else tlen - 6 130 width = precision + 7 131 132 result = 4*[None] 133 for i in range(0, 4): 134 begin = i*(width + 2) 135 try: 136 result[i] = float(line[begin:begin + width+2]) 137 except (IndexError, ValueError): 138 pass 139 return result 140 141def extract_level_mappings(): 142 """Extract the level mappings from the global __OUTPUT 143 144 Returns: The level mappings are structured as a list of 145 dictionaries. The items in the list are for executions, 146 and the key, value pairs in the dictionaries are the 147 response descriptors and 2D lists of the mappings. The 148 lists are (num_rows, 4), with None stored in empty 149 elements. 150 """ 151 152 global __OUTPUT 153 mappings = [] 154 lines_iter = iter(__OUTPUT) 155 for line in lines_iter: 156 if line.startswith("Level mappings for each"): 157 mappings.append({}) 158 nline = next(lines_iter).strip() # get 'Cumulative Distribution..' 159 while nline and not nline.startswith('--'): # Loop over the responses 160 desc = nline.split()[-1][:-1] 161 mappings_for_resp = [] 162 next(lines_iter) # Skip heading "Response Level.." 163 next(lines_iter) # Skip heading "----..." 164 nline = next(lines_iter) # Get the first line of data for this response 165 while not nline.startswith("Cumulative Distribution") and \ 166 not nline.startswith('--') and \ 167 nline: # loop over data 168 values = extract_level_mapping_row(nline) 169 mappings_for_resp.append(values) 170 nline = next(lines_iter).strip() 171 mappings[-1][desc] = mappings_for_resp 172 return mappings 173 174def extract_correlations_helper(corr_type = None): 175 """Extract the simple/simple rank correlation matrices from the global __OUTPUT 176 177 Returns: A list of all the simple (for corr_type == "pearson") or simple rank (otherwise) 178 correlation matrices. The list elements are pairs (2-tuples). The first item is a list 179 of the factor labels. The second is a "2D" list of the matrix elements. 180 """ 181 if corr_type == "pearson": 182 heading = "Simple Correlation Matrix" 183 else: 184 heading = "Simple Rank Correlation Matrix" 185 correlations = [] 186 lines_iter = iter(__OUTPUT) 187 for line in lines_iter: 188 if line.startswith(heading): 189 nline = next(lines_iter) 190 factors = nline.split() 191 num_factors = len(factors) 192 coef = [] 193 for i in range(num_factors): 194 coef.append(num_factors*[0.0]) 195 nline = next(lines_iter) 196 tokens = [float(t) for t in nline.split()[1:]] 197 for j in range(i+1): 198 coef[i][j] = tokens[j] 199 # symmetrize 200 for i in range(num_factors): 201 for j in range(i+1, num_factors): 202 coef[i][j] = coef[j][i] 203 # store 204 correlations.append( (factors, coef)) 205 return correlations 206 207def extract_simple_rank_correlations(): 208 return extract_correlations_helper("spearman") 209 210def extract_simple_correlations(): 211 return extract_correlations_helper("pearson") 212 213def extract_partial_correlations_helper(corr_type = None): 214 """Extract the partial/partial rank correlation matrices from the global __OUTPUT 215 216 Returns: A list of all the partial (for corr_type == "pearson") or partial rank (otherwise) 217 correlation matrices. The list elements are dictionaries, with response descriptors as keys. 218 The values are pairs (2-tuples). The first item is a list of variables. The second item is 219 a list of correlation values. 220 """ 221 if corr_type == "pearson": 222 heading = "Partial Correlation Matrix" 223 else: 224 heading = "Partial Rank Correlation Matrix" 225 correlations = [] 226 lines_iter = iter(__OUTPUT) 227 for line in lines_iter: 228 if line.startswith(heading): 229 nline = next(lines_iter) 230 inc_corr = {} 231 responses = nline.split() 232 for r in responses: 233 inc_corr[r] = ([], []) 234 nline = next(lines_iter).strip() 235 while(nline): 236 tokens = nline.split() 237 var = tokens[0] 238 val = [float(v) for v in tokens[1:]] 239 for i, r in enumerate(responses): 240 inc_corr[r][0].append(var) 241 inc_corr[r][1].append(val[i]) 242 nline = next(lines_iter).strip() 243 correlations.append(inc_corr) 244 return correlations 245 246def extract_partial_rank_correlations(): 247 return extract_partial_correlations_helper("spearman") 248 249def extract_partial_correlations(): 250 return extract_partial_correlations_helper("pearson") 251 252def extract_best_helper(result, labeled=False): 253 """Extract the best parameters/residuals/objectives/constraints 254 255 Return as a list of lists. The outer list may be over 256 executions solution sets. (TODO: This may not be expressive enough, 257 because there could be multiple executions and multiple sets). If 258 the result is labeled (with variable or response descriptors), the inner 259 lists contain tuples of (label, value) are returned. 260 """ 261 heading = "<<<<< Best " + result 262 best = [] 263 lines_iter = iter(__OUTPUT) 264 for line in lines_iter: 265 if line.startswith(heading): 266 nline = next(lines_iter) # the first variable 267 best_set = [] 268 while(nline[0] != "<"): 269 # variables may be float, int, or string valued. Try to convert. 270 # String variables can contain whitespace, so split from the right 271 # to separate the variable descriptor from the value. 272 if labeled: 273 val, label = nline.rsplit(None,1) 274 try: 275 val = int(val) 276 except ValueError: 277 try: 278 val = float(val) 279 except ValueError: 280 pass 281 best_set.append( (label, val) ) 282 else: 283 best_set.append(float(nline)) 284 nline = next(lines_iter) 285 best.append(best_set) 286 return best 287 288def extract_best_parameters(): 289 return extract_best_helper("parameters", True) 290 291def extract_best_residuals(): 292 return extract_best_helper("residual terms", False) 293 294def extract_best_objectives(): 295 return extract_best_helper("objective", False) 296 297def extract_best_constraints(): 298 return extract_best_helper("constraint", False) 299 300def extract_best_parameter_confidence_intervals(): 301 """Extract parameter confidence intervals 302 303 Return as a list of dictionaries. The keys of the dictionaries are variable 304 descriptors, and the values are tuples of lower and upper bounds 305 """ 306 line_re = re.compile(r"(.+?):\s+\[\s+(.+?),\s+(.+)\s+\]") 307 cis = [] 308 lines_iter = iter(__OUTPUT) 309 for line in lines_iter: 310 if line.startswith("Confidence Intervals on Cal"): 311 nline = next(lines_iter).strip() #the first variable 312 ci = {} 313 while(nline): 314 m = line_re.match(nline) 315 ci[m.group(1)] = (float(m.group(2)), float(m.group(3))) 316 nline = next(lines_iter).strip() 317 cis.append(ci) 318 return cis 319 320def extract_best_residual_norms(): 321 """Extract norm 322 323 Returns a list of norms 324 """ 325 norms = [] 326 lines_iter = iter(__OUTPUT) 327 for line in lines_iter: 328 if line.startswith("<<<<< Best residual norm"): 329 norm = line.split()[5] 330 # strip off the semicolon 331 norm = norm[:-1] 332 norms.append(float(norm)) 333 return norms 334 335def extract_multi_start_results(): 336 """Extract results summary from a multi_start study, including 337 initial points, best points, and best responses""" 338 lines_iter = iter(__OUTPUT) 339 for line in lines_iter: 340 if line.startswith("<<<<< Results summary:"): 341 break 342 label_line = next(lines_iter) 343 # 1. split the labels 344 # 2. throw away the first token ("set_id") 345 # 3. Grab labels that don't end in *. These are the starting points. 346 # 4. Grab labels that do end in *. These are the best points. This list may be 347 # longer because it can include non-continuous design variables 348 # 5. Grab remaining labels, which are all responses. 349 all_labels = label_line.split()[1:] 350 num_starts = 0 351 start_labels = [] 352 num_best = 0 353 best_labels = [] 354 for label in all_labels: 355 if label[-1] == '*': 356 break 357 num_starts += 1 358 start_labels.append(label) 359 for label in all_labels[num_starts:]: 360 if label[-1] != '*': 361 break 362 num_best += 1 363 best_labels.append(label[:-1]) # snip off * 364 num_funcs = len(all_labels) - num_best - num_starts 365 func_labels = all_labels[num_best + num_starts:] 366 # Begin reading values 367 results = {"start_labels":start_labels, 368 "best_labels":best_labels, 369 "function_labels":func_labels, 370 "starts":[], 371 "best":[], 372 "functions":[]} 373 374 while True: 375 value_line = next(lines_iter).strip() 376 if not value_line: 377 break 378 values = [] 379 for value in value_line.split()[1:]: 380 try: 381 values.append(int(value)) 382 except ValueError: 383 try: 384 values.append(float(value)) 385 except ValueError: 386 values.append(value) 387 results["starts"].append(values[:num_starts]) 388 results["best"].append(values[num_starts:num_starts+num_best]) 389 results["functions"].append(values[num_starts+num_best:]) 390 return results 391 392def extract_pareto_set_results(): 393 """Extract results summary from a pareto_set study. These 394 include the weights, variables, and functions.""" 395 lines_iter = iter(__OUTPUT) 396 for line in lines_iter: 397 if line.startswith("<<<<< Results summary:"): 398 break 399 label_line = next(lines_iter) 400 # 1. split the labels. These are weights, best params, best responses 401 # 2. throw away the first token ("set_id") 402 # 3. Grab weight labels (w\d+). These are the weights. The number responses equals the 403 # number of weights. 404 # 4. Grab the best parameters labels (by count: total number - 2*number of weights) 405 # 5. Grab the best responses 406 all_labels = label_line.split()[1:] 407 num_weights = 0 408 weight_labels = [] 409 num_best = 0 410 best_labels = [] 411 weight_re = re.compile("w\d+$") 412 for label in all_labels: 413 if weight_re.match(label) is None: 414 break 415 num_weights += 1 416 weight_labels.append(label) 417 num_best = len(all_labels) - num_weights*2 418 for label in all_labels[num_weights:-num_weights]: 419 best_labels.append(label) 420 num_funcs = num_weights 421 func_labels = all_labels[-num_weights:] 422 # Begin reading values 423 results = {"weight_labels":weight_labels, 424 "best_labels":best_labels, 425 "function_labels":func_labels, 426 "weights":[], 427 "best":[], 428 "functions":[]} 429 430 while True: 431 value_line = next(lines_iter).strip() 432 if not value_line: 433 break 434 values = [] 435 for value in value_line.split()[1:]: 436 try: 437 values.append(int(value)) 438 except ValueError: 439 try: 440 values.append(float(value)) 441 except ValueError: 442 values.append(value) 443 results["weights"].append(values[:num_weights]) 444 results["best"].append(values[num_weights:-num_funcs]) 445 results["functions"].append(values[-num_funcs:]) 446 return results 447 448 449 450 451 452 453 454def read_tabular_data(tabular_file): 455 """Read an annotated format Dakota tabular file. Convert everything 456 to floating point numbers. Return an ordered dict of string:lists""" 457 data = OrderedDict() 458 with open(tabular_file,"r") as f: 459 columns = f.readline().split() 460 for c in columns: 461 data[c] = [] 462 for row in f: 463 tokens = row.split() 464 data["%eval_id"].append(int(tokens[0])) 465 data["interface"].append(tokens[1]) 466 for c, t in zip(columns[2:], tokens[2:]): 467 try: 468 data[c].append(int(t)) 469 except ValueError: 470 try: 471 data[c].append(float(t)) 472 except ValueError: 473 data[c].append(t) 474 return data 475 476def restart_variables(row): 477 478 variables = {} 479 num_relaxed_di = int(row[18]) 480 num_relaxed_dr_index = 18+num_relaxed_di+1 481 num_relaxed_dr = int(row[num_relaxed_dr_index]) 482 483 num_cont_vars_index = num_relaxed_dr_index + num_relaxed_dr + 1 484 num_cont_vars = int(row[num_cont_vars_index]) 485 if num_cont_vars != 0: 486 cont_vars = OrderedDict() 487 start = num_cont_vars_index+1 488 end = start + num_cont_vars*2 489 for i in range(start, end, 2): 490 cont_vars[row[i+1]] = float(row[i]) 491 variables["continuous"] = cont_vars 492 num_di_vars_index = num_cont_vars_index + 2*num_cont_vars + 1 493 num_di_vars = int(row[num_di_vars_index]) 494 if num_di_vars != 0: 495 di_vars = OrderedDict() 496 start = num_di_vars_index+1 497 end = start + num_di_vars*2 498 for i in range(start, end, 2): 499 di_vars[row[i+1]] = int(row[i]) 500 variables["discrete_int"] = di_vars 501 num_ds_vars_index = num_di_vars_index + 2*num_di_vars + 1 502 num_ds_vars = int(row[num_ds_vars_index]) 503 if num_ds_vars != 0: 504 ds_vars = OrderedDict() 505 start = num_ds_vars_index+1 506 end = start + num_ds_vars*2 507 for i in range(start, end, 2): 508 ds_vars[row[i+1]] = row[i] 509 variables["discrete_string"] = ds_vars 510 num_dr_vars_index = num_ds_vars_index + 2*num_ds_vars + 1 511 num_dr_vars = int(row[num_dr_vars_index]) 512 if num_dr_vars != 0: 513 dr_vars = OrderedDict() 514 start = num_dr_vars_index+1 515 end = start + num_dr_vars*2 516 for i in range(start, end, 2): 517 dr_vars[row[i+1]] = row[i] 518 variables["discrete_real"] = dr_vars 519 return variables 520 521def restart_response(row): 522 data = {} 523 data["interface"] = row[0] 524 num_functions = int(row[2]) 525 num_deriv_vars = int(row[3]) 526 grad_flag = row[4] == "1" 527 hess_flag = row[5] == "1" 528 asv_start_index = 6 529 asv_end_index = asv_start_index + num_functions 530 data["asv"] = [int(a) for a in row[asv_start_index:asv_end_index]] 531 dvv_start_index = asv_end_index 532 dvv_end_index = dvv_start_index + num_deriv_vars 533 data["dvv"] = [int(a) for a in row[dvv_start_index:dvv_end_index]] 534 labels_start_index = dvv_end_index 535 labels_end_index = labels_start_index + num_functions 536 labels = row[labels_start_index:labels_end_index] 537 data["response"] = OrderedDict() 538 for d in labels: 539 data["response"][d] = {} 540 fv_index = labels_end_index 541 for a, d in zip(data["asv"], labels): 542 if a & 1: 543 data["response"][d]["function"] = float(row[fv_index]) 544 fv_index += 1 545 if grad_flag: 546 for a, d in zip(data["asv"], labels): 547 if a & 2: 548 g_start = fv_index 549 g_end = fv_index + num_deriv_vars 550 data["response"][d]["gradient"] = [float(c) for c in row[g_start:g_end]] 551 fv_index = g_end 552 if hess_flag: 553 for a, d in zip(data["asv"], labels): 554 if a & 4: 555 h_start = fv_index 556 h_end = fv_index + sum(range(num_deriv_vars+1)) 557 data["response"][d]["hessian"] = [float(c) for c in row[h_start:h_end]] 558 fv_index = h_end 559 data["eval_id"] = int(row[-1]) 560 return data 561 562def read_restart_file(restart_file): 563 file_stem = restart_file.rsplit(".", 1)[0] 564 neutral_file = file_stem + ".neu" 565 global __BIN_DIR 566 dru_path = os.path.join(__BIN_DIR,"dakota_restart_util") 567 with open(os.devnull,"w") as fnull: 568 subprocess.call([dru_path, 569 "to_neutral", 570 restart_file, 571 neutral_file], stdout=fnull) 572 # data structure 573 #{ 574 # "variables":{"continuous":{columns:[]}, 575 # "discrete_int":{columns:[]}, 576 # "discrete_string":{columns:[]}, 577 # "discrete_real":{columns:[]}, 578 # } 579 # "response":{"label":[{"function":val, "gradient":[], "hessian":[]}]}, 580 # "asv":[ [] ], 581 # "eval_id":[], 582 # "dvv":[ [] ], 583 # "interface":[], 584 # 585 #} 586 with open(neutral_file,"r") as f: 587 file_data = f.readlines() 588 # Allocate storage 589 data = {} 590 var_r = restart_variables(file_data[0].split()) 591 data["variables"] = {} 592 for t, vs in list(var_r.items()): # iterate continuous, di, ds, dr 593 data["variables"][t] = OrderedDict() 594 for d, v in list(vs.items()): 595 data["variables"][t][d] = [] 596 resp_r = restart_response(file_data[1].split()) 597 data["response"] = OrderedDict() 598 for d, r in list(resp_r["response"].items()): 599 data["response"][d] = [] 600 data["asv"] = [] 601 data["eval_id"] = [] 602 data["dvv"] = [] 603 data["interface"] = [] 604 605 for i in range(0,len(file_data),2): 606 var_row = file_data[i].split() 607 resp_row = file_data[i+1].split() 608 var_r = restart_variables(var_row) 609 resp_r = restart_response(resp_row) 610 for t, vs in list(var_r.items()): 611 for d, v in list(vs.items()): 612 data["variables"][t][d].append(v) 613 for d, r in list(resp_r["response"].items()): 614 data["response"][d].append(copy.deepcopy(r)) 615 data["asv"].append(resp_r["asv"][:]) 616 data["eval_id"].append(resp_r["eval_id"]) 617 data["dvv"].append(resp_r["dvv"][:]) 618 data["interface"].append(resp_r["interface"]) 619 return data 620 621 622def run_dakota(input_file): 623 """Run Dakota on the input_file and capture output 624 625 input_file: string containing a path to the input file 626 627 """ 628 global __OUTPUT 629 global __BIN_DIR 630 dakota_path = os.path.join(__BIN_DIR,"dakota") 631 output = subprocess.check_output([dakota_path,input_file], stderr=subprocess.STDOUT) 632 __OUTPUT = output.decode('utf-8').split('\n') 633 634def set_executable_dir(bindir): 635 """Set the directory in the build tree that contains the executables""" 636 global __BIN_DIR 637 __BIN_DIR = bindir 638