1#  _______________________________________________________________________
2#
3#  DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4#  Copyright 2014 Sandia Corporation.
5#  This software is distributed under the GNU Lesser General Public License.
6#  For more information, see the README file in the top Dakota directory.
7#  _______________________________________________________________________
8
9
10import os
11import subprocess
12import re
13import copy
14from collections import OrderedDict
15import numpy as np
16
17## Capture the output here, once.
18__OUTPUT = ""
19
20## Directory in the build tree that contains dakota and dakota_restart_util
21__BIN_DIR = ""
22
23def extract_moments():
24    """Extract the moments from the global __OUTPUT
25
26    Returns: The moments structured as a list of dictionaries.
27        The items in the list are for executions, and the
28        key, value pairs in the dictionary are the response
29        descriptors and an numpy array of the moments
30    """
31    global __OUTPUT
32    moments = []
33    lines_iter = iter(__OUTPUT)
34    for line in lines_iter:
35        if line.startswith("Sample moment statistics"):
36            next(lines_iter)
37            moments.append({})
38            moments_line = next(lines_iter).strip()
39            while(moments_line):
40                tokens = moments_line.split()
41                resp_desc = tokens[0]
42                moments_values = np.array([float(t) for t in tokens[1:]])
43                moments[-1][resp_desc] = moments_values
44                moments_line = next(lines_iter).strip()
45    return moments
46
47def extract_moment_confidence_intervals():
48
49    """Extract the moment confidence intervals
50
51    Returns: The confidence intervals structured as a list
52      of dictionaries. The keys of the dictionary are the
53      responses, and the values are a 2D list; 0th dimenion are
54      lower and upper, 1st dimenion are mean and standard deviation
55      """
56
57    global __OUTPUT
58    cis = []
59    lines_iter = iter(__OUTPUT)
60    for line in lines_iter:
61        if line.startswith("95% confidence intervals"):
62            cis.append({})
63            nline = next(lines_iter) # discard header
64            nline = next(lines_iter).strip() # first line of data
65            while nline:
66                tokens = nline.split()
67                response = tokens[0]
68                row_values = [float(v) for v in tokens[1:]]
69                values = []
70                values.append( [row_values[0], row_values[2]])
71                values.append( [row_values[1], row_values[3]])
72                cis[-1][response] = values
73                nline = next(lines_iter).strip()
74    return cis
75
76def extract_equiv_num_hf_evals():
77    """Extract the equivalent number of high fideltiy evals
78
79    Returns a floating point value.
80    """
81    global __OUTPUT
82    lines_iter = iter(__OUTPUT)
83    for line in lines_iter:
84        if line.startswith("<<<<< Equivalent number of high fidelity evaluations:"):
85            msg, val = line.split(':')
86            return float(val)
87    return None
88
89def extract_pdfs():
90    """Extract the PDFs from the global __OUTPUT
91
92    Returns: The PDFs with lower and upper bins structured
93        as a list of dictionaries. The items in the list
94        are for executions, and the key, value pairs in the
95        dictionaries are the response descriptors and 2D lists
96        of the lower and upper bounds and the densities
97        with dimension (num_bins, 3)
98    """
99    global __OUTPUT
100    pdfs = []
101    lines_iter = iter(__OUTPUT)
102    for line in lines_iter:
103        if line.startswith("Probability Density Function (PDF)"):
104            pdfs.append({})
105            nline = next(lines_iter).strip() # get either 'PDF for <descriptor>:" or a blank line
106            while nline:  # Loop over the responses
107                desc = nline.split()[-1][:-1]
108                pdf_for_resp = []
109                next(lines_iter)  # Skip heading "Bin Lower..."
110                next(lines_iter)  # Skip heading "----..."
111                nline = next(lines_iter) # Get the first line of data for this response
112                while not nline.startswith("PDF for") and nline:  # loop over data
113                    values = [float(t) for t in nline.split()]
114                    pdf_for_resp.append(values)
115                    nline = next(lines_iter).strip()
116                pdfs[-1][desc] = pdf_for_resp
117    return pdfs
118
119def extract_level_mapping_row(line):
120    """Tokenize one row of a level mappings table.
121
122    line: String containing a row
123
124    returns: A list of length 4 containing the table entries.
125        Blank table entries contain None.
126    """
127    tokens = line.split(None, 1)
128    tlen = len(tokens[0])
129    precision = tlen - 7 if tokens[0] == '-' else tlen - 6
130    width = precision + 7
131
132    result = 4*[None]
133    for i in range(0, 4):
134        begin = i*(width + 2)
135        try:
136            result[i] = float(line[begin:begin + width+2])
137        except (IndexError, ValueError):
138            pass
139    return result
140
141def extract_level_mappings():
142    """Extract the level mappings from the global __OUTPUT
143
144    Returns: The level mappings are structured as a list of
145        dictionaries. The items in the list are for executions,
146        and the key, value pairs in the dictionaries are the
147        response descriptors and 2D lists of the mappings. The
148        lists are (num_rows, 4), with None stored in empty
149        elements.
150    """
151
152    global __OUTPUT
153    mappings = []
154    lines_iter = iter(__OUTPUT)
155    for line in lines_iter:
156        if line.startswith("Level mappings for each"):
157            mappings.append({})
158            nline = next(lines_iter).strip() # get 'Cumulative Distribution..'
159            while nline and not nline.startswith('--'):  # Loop over the responses
160                desc = nline.split()[-1][:-1]
161                mappings_for_resp = []
162                next(lines_iter)  # Skip heading "Response Level.."
163                next(lines_iter)  # Skip heading "----..."
164                nline = next(lines_iter) # Get the first line of data for this response
165                while not nline.startswith("Cumulative Distribution") and \
166                        not nline.startswith('--') and \
167                        nline:  # loop over data
168                    values = extract_level_mapping_row(nline)
169                    mappings_for_resp.append(values)
170                    nline = next(lines_iter).strip()
171                mappings[-1][desc] = mappings_for_resp
172    return mappings
173
174def extract_correlations_helper(corr_type = None):
175    """Extract the simple/simple rank correlation matrices from the global __OUTPUT
176
177    Returns: A list of all the simple (for corr_type == "pearson") or simple rank (otherwise)
178      correlation matrices. The list elements are pairs (2-tuples). The first item is a list
179      of the factor labels. The second is a "2D" list of the matrix elements.
180    """
181    if corr_type == "pearson":
182        heading = "Simple Correlation Matrix"
183    else:
184        heading = "Simple Rank Correlation Matrix"
185    correlations = []
186    lines_iter = iter(__OUTPUT)
187    for line in lines_iter:
188        if line.startswith(heading):
189            nline = next(lines_iter)
190            factors = nline.split()
191            num_factors = len(factors)
192            coef = []
193            for i in range(num_factors):
194                coef.append(num_factors*[0.0])
195                nline = next(lines_iter)
196                tokens = [float(t) for t in nline.split()[1:]]
197                for j in range(i+1):
198                    coef[i][j] = tokens[j]
199            # symmetrize
200            for i in range(num_factors):
201                for j in range(i+1, num_factors):
202                    coef[i][j] = coef[j][i]
203            # store
204            correlations.append( (factors, coef))
205    return correlations
206
207def extract_simple_rank_correlations():
208    return extract_correlations_helper("spearman")
209
210def extract_simple_correlations():
211    return extract_correlations_helper("pearson")
212
213def extract_partial_correlations_helper(corr_type = None):
214    """Extract the partial/partial rank correlation matrices from the global __OUTPUT
215
216    Returns: A list of all the partial (for corr_type == "pearson") or partial rank (otherwise)
217      correlation matrices. The list elements are dictionaries, with response descriptors as keys.
218      The values are pairs (2-tuples). The first item is a list of variables. The second item is
219      a list of correlation values.
220      """
221    if corr_type == "pearson":
222        heading = "Partial Correlation Matrix"
223    else:
224        heading = "Partial Rank Correlation Matrix"
225    correlations = []
226    lines_iter = iter(__OUTPUT)
227    for line in lines_iter:
228        if line.startswith(heading):
229            nline = next(lines_iter)
230            inc_corr = {}
231            responses = nline.split()
232            for r in responses:
233                inc_corr[r] = ([], [])
234            nline = next(lines_iter).strip()
235            while(nline):
236                tokens = nline.split()
237                var = tokens[0]
238                val = [float(v) for v in tokens[1:]]
239                for i, r in enumerate(responses):
240                    inc_corr[r][0].append(var)
241                    inc_corr[r][1].append(val[i])
242                nline = next(lines_iter).strip()
243            correlations.append(inc_corr)
244    return correlations
245
246def extract_partial_rank_correlations():
247    return extract_partial_correlations_helper("spearman")
248
249def extract_partial_correlations():
250    return extract_partial_correlations_helper("pearson")
251
252def extract_best_helper(result, labeled=False):
253    """Extract the best parameters/residuals/objectives/constraints
254
255    Return as a list of lists. The outer list may be over
256      executions solution sets. (TODO: This may not be expressive enough,
257      because there could be multiple executions and multiple sets). If
258      the result is labeled (with variable or response descriptors), the inner
259      lists contain tuples of (label, value) are returned.
260    """
261    heading = "<<<<< Best " + result
262    best = []
263    lines_iter = iter(__OUTPUT)
264    for line in lines_iter:
265        if line.startswith(heading):
266            nline = next(lines_iter) # the first variable
267            best_set = []
268            while(nline[0] != "<"):
269                # variables may be float, int, or string valued. Try to convert.
270                # String variables can contain whitespace, so split from the right
271                # to separate the variable descriptor from the value.
272                if labeled:
273                    val, label = nline.rsplit(None,1)
274                    try:
275                        val = int(val)
276                    except ValueError:
277                        try:
278                            val = float(val)
279                        except ValueError:
280                            pass
281                    best_set.append( (label, val) )
282                else:
283                    best_set.append(float(nline))
284                nline = next(lines_iter)
285            best.append(best_set)
286    return best
287
288def extract_best_parameters():
289    return extract_best_helper("parameters", True)
290
291def extract_best_residuals():
292    return extract_best_helper("residual terms", False)
293
294def extract_best_objectives():
295    return extract_best_helper("objective", False)
296
297def extract_best_constraints():
298    return extract_best_helper("constraint", False)
299
300def extract_best_parameter_confidence_intervals():
301    """Extract parameter confidence intervals
302
303    Return as a list of dictionaries. The keys of the dictionaries are variable
304      descriptors, and the values are tuples of lower and upper bounds
305    """
306    line_re = re.compile(r"(.+?):\s+\[\s+(.+?),\s+(.+)\s+\]")
307    cis = []
308    lines_iter = iter(__OUTPUT)
309    for line in lines_iter:
310        if line.startswith("Confidence Intervals on Cal"):
311            nline = next(lines_iter).strip() #the first variable
312            ci = {}
313            while(nline):
314                m = line_re.match(nline)
315                ci[m.group(1)] = (float(m.group(2)), float(m.group(3)))
316                nline = next(lines_iter).strip()
317            cis.append(ci)
318    return cis
319
320def extract_best_residual_norms():
321    """Extract norm
322
323    Returns a list of norms
324    """
325    norms = []
326    lines_iter = iter(__OUTPUT)
327    for line in lines_iter:
328        if line.startswith("<<<<< Best residual norm"):
329            norm = line.split()[5]
330            # strip off the semicolon
331            norm = norm[:-1]
332            norms.append(float(norm))
333    return norms
334
335def extract_multi_start_results():
336    """Extract results summary from a multi_start study, including
337    initial points, best points, and best responses"""
338    lines_iter = iter(__OUTPUT)
339    for line in lines_iter:
340        if line.startswith("<<<<< Results summary:"):
341            break
342    label_line = next(lines_iter)
343    # 1. split the labels
344    # 2. throw away the first token ("set_id")
345    # 3. Grab labels that don't end in *. These are the starting points.
346    # 4. Grab labels that do end in *. These are the best points. This list may be
347    #    longer because it can include non-continuous design variables
348    # 5. Grab remaining labels, which are all responses.
349    all_labels = label_line.split()[1:]
350    num_starts = 0
351    start_labels = []
352    num_best = 0
353    best_labels = []
354    for label in all_labels:
355        if label[-1] == '*':
356            break
357        num_starts += 1
358        start_labels.append(label)
359    for label in all_labels[num_starts:]:
360        if label[-1] != '*':
361            break
362        num_best += 1
363        best_labels.append(label[:-1]) # snip off *
364    num_funcs = len(all_labels) - num_best - num_starts
365    func_labels = all_labels[num_best + num_starts:]
366    # Begin reading values
367    results = {"start_labels":start_labels,
368            "best_labels":best_labels,
369            "function_labels":func_labels,
370            "starts":[],
371            "best":[],
372            "functions":[]}
373
374    while True:
375        value_line = next(lines_iter).strip()
376        if not value_line:
377            break
378        values = []
379        for value in value_line.split()[1:]:
380            try:
381                values.append(int(value))
382            except ValueError:
383                try:
384                    values.append(float(value))
385                except ValueError:
386                    values.append(value)
387        results["starts"].append(values[:num_starts])
388        results["best"].append(values[num_starts:num_starts+num_best])
389        results["functions"].append(values[num_starts+num_best:])
390    return results
391
392def extract_pareto_set_results():
393    """Extract results summary from a pareto_set study. These
394    include the weights, variables, and functions."""
395    lines_iter = iter(__OUTPUT)
396    for line in lines_iter:
397        if line.startswith("<<<<< Results summary:"):
398            break
399    label_line = next(lines_iter)
400    # 1. split the labels. These are weights, best params, best responses
401    # 2. throw away the first token ("set_id")
402    # 3. Grab weight labels (w\d+). These are the weights. The number responses equals the
403    #    number of weights.
404    # 4. Grab the best parameters labels (by count: total number - 2*number of weights)
405    # 5. Grab the best responses
406    all_labels = label_line.split()[1:]
407    num_weights = 0
408    weight_labels = []
409    num_best = 0
410    best_labels = []
411    weight_re = re.compile("w\d+$")
412    for label in all_labels:
413        if weight_re.match(label) is None:
414            break
415        num_weights += 1
416        weight_labels.append(label)
417    num_best = len(all_labels) - num_weights*2
418    for label in all_labels[num_weights:-num_weights]:
419        best_labels.append(label)
420    num_funcs = num_weights
421    func_labels = all_labels[-num_weights:]
422    # Begin reading values
423    results = {"weight_labels":weight_labels,
424            "best_labels":best_labels,
425            "function_labels":func_labels,
426            "weights":[],
427            "best":[],
428            "functions":[]}
429
430    while True:
431        value_line = next(lines_iter).strip()
432        if not value_line:
433            break
434        values = []
435        for value in value_line.split()[1:]:
436            try:
437                values.append(int(value))
438            except ValueError:
439                try:
440                    values.append(float(value))
441                except ValueError:
442                    values.append(value)
443        results["weights"].append(values[:num_weights])
444        results["best"].append(values[num_weights:-num_funcs])
445        results["functions"].append(values[-num_funcs:])
446    return results
447
448
449
450
451
452
453
454def read_tabular_data(tabular_file):
455    """Read an annotated format Dakota tabular file. Convert everything
456    to floating point numbers. Return an ordered dict of string:lists"""
457    data = OrderedDict()
458    with open(tabular_file,"r") as f:
459        columns = f.readline().split()
460        for c in columns:
461            data[c] = []
462        for row in f:
463            tokens = row.split()
464            data["%eval_id"].append(int(tokens[0]))
465            data["interface"].append(tokens[1])
466            for c, t in zip(columns[2:], tokens[2:]):
467                try:
468                    data[c].append(int(t))
469                except ValueError:
470                    try:
471                        data[c].append(float(t))
472                    except ValueError:
473                        data[c].append(t)
474    return data
475
476def restart_variables(row):
477
478    variables = {}
479    num_relaxed_di = int(row[18])
480    num_relaxed_dr_index = 18+num_relaxed_di+1
481    num_relaxed_dr = int(row[num_relaxed_dr_index])
482
483    num_cont_vars_index = num_relaxed_dr_index + num_relaxed_dr + 1
484    num_cont_vars = int(row[num_cont_vars_index])
485    if num_cont_vars != 0:
486        cont_vars = OrderedDict()
487        start = num_cont_vars_index+1
488        end = start + num_cont_vars*2
489        for i in range(start, end, 2):
490            cont_vars[row[i+1]] = float(row[i])
491        variables["continuous"] = cont_vars
492    num_di_vars_index = num_cont_vars_index + 2*num_cont_vars + 1
493    num_di_vars = int(row[num_di_vars_index])
494    if num_di_vars != 0:
495        di_vars = OrderedDict()
496        start = num_di_vars_index+1
497        end = start + num_di_vars*2
498        for i in range(start, end, 2):
499            di_vars[row[i+1]] = int(row[i])
500        variables["discrete_int"] = di_vars
501    num_ds_vars_index = num_di_vars_index + 2*num_di_vars + 1
502    num_ds_vars = int(row[num_ds_vars_index])
503    if num_ds_vars != 0:
504        ds_vars = OrderedDict()
505        start = num_ds_vars_index+1
506        end = start + num_ds_vars*2
507        for i in range(start, end, 2):
508            ds_vars[row[i+1]] = row[i]
509        variables["discrete_string"] = ds_vars
510    num_dr_vars_index = num_ds_vars_index + 2*num_ds_vars + 1
511    num_dr_vars = int(row[num_dr_vars_index])
512    if num_dr_vars != 0:
513        dr_vars = OrderedDict()
514        start = num_dr_vars_index+1
515        end = start + num_dr_vars*2
516        for i in range(start, end, 2):
517            dr_vars[row[i+1]] = row[i]
518        variables["discrete_real"] = dr_vars
519    return variables
520
521def restart_response(row):
522    data = {}
523    data["interface"] = row[0]
524    num_functions = int(row[2])
525    num_deriv_vars = int(row[3])
526    grad_flag = row[4] == "1"
527    hess_flag = row[5] == "1"
528    asv_start_index = 6
529    asv_end_index = asv_start_index + num_functions
530    data["asv"] = [int(a) for a in row[asv_start_index:asv_end_index]]
531    dvv_start_index = asv_end_index
532    dvv_end_index = dvv_start_index + num_deriv_vars
533    data["dvv"] = [int(a) for a in row[dvv_start_index:dvv_end_index]]
534    labels_start_index = dvv_end_index
535    labels_end_index = labels_start_index + num_functions
536    labels = row[labels_start_index:labels_end_index]
537    data["response"] = OrderedDict()
538    for d in labels:
539        data["response"][d] = {}
540    fv_index = labels_end_index
541    for a, d in zip(data["asv"], labels):
542        if a & 1:
543            data["response"][d]["function"] = float(row[fv_index])
544            fv_index += 1
545    if grad_flag:
546        for a, d in zip(data["asv"], labels):
547            if a & 2:
548                g_start = fv_index
549                g_end = fv_index + num_deriv_vars
550                data["response"][d]["gradient"] = [float(c) for c in row[g_start:g_end]]
551                fv_index = g_end
552    if hess_flag:
553        for a, d in zip(data["asv"], labels):
554            if a & 4:
555                h_start = fv_index
556                h_end = fv_index + sum(range(num_deriv_vars+1))
557                data["response"][d]["hessian"] = [float(c) for c in row[h_start:h_end]]
558                fv_index = h_end
559    data["eval_id"] = int(row[-1])
560    return data
561
562def read_restart_file(restart_file):
563    file_stem = restart_file.rsplit(".", 1)[0]
564    neutral_file = file_stem + ".neu"
565    global __BIN_DIR
566    dru_path = os.path.join(__BIN_DIR,"dakota_restart_util")
567    with open(os.devnull,"w") as fnull:
568        subprocess.call([dru_path,
569                         "to_neutral",
570                          restart_file,
571                          neutral_file], stdout=fnull)
572    # data structure
573    #{
574    #    "variables":{"continuous":{columns:[]},
575    #                 "discrete_int":{columns:[]},
576    #                 "discrete_string":{columns:[]},
577    #                 "discrete_real":{columns:[]},
578    #                 }
579    #    "response":{"label":[{"function":val, "gradient":[], "hessian":[]}]},
580    #    "asv":[ [] ],
581    #    "eval_id":[],
582    #    "dvv":[ [] ],
583    #    "interface":[],
584    #
585    #}
586    with open(neutral_file,"r") as f:
587        file_data = f.readlines()
588    # Allocate storage
589    data = {}
590    var_r = restart_variables(file_data[0].split())
591    data["variables"] = {}
592    for t, vs in list(var_r.items()): # iterate continuous, di, ds, dr
593        data["variables"][t] = OrderedDict()
594        for d, v in list(vs.items()):
595            data["variables"][t][d] = []
596    resp_r = restart_response(file_data[1].split())
597    data["response"] = OrderedDict()
598    for d, r in list(resp_r["response"].items()):
599        data["response"][d] = []
600    data["asv"] = []
601    data["eval_id"] = []
602    data["dvv"] = []
603    data["interface"] = []
604
605    for i in range(0,len(file_data),2):
606        var_row = file_data[i].split()
607        resp_row = file_data[i+1].split()
608        var_r = restart_variables(var_row)
609        resp_r = restart_response(resp_row)
610        for t, vs in list(var_r.items()):
611            for d, v in list(vs.items()):
612                data["variables"][t][d].append(v)
613        for d, r in list(resp_r["response"].items()):
614            data["response"][d].append(copy.deepcopy(r))
615        data["asv"].append(resp_r["asv"][:])
616        data["eval_id"].append(resp_r["eval_id"])
617        data["dvv"].append(resp_r["dvv"][:])
618        data["interface"].append(resp_r["interface"])
619    return data
620
621
622def run_dakota(input_file):
623    """Run Dakota on the input_file and capture output
624
625    input_file: string containing a path to the input file
626
627    """
628    global __OUTPUT
629    global __BIN_DIR
630    dakota_path = os.path.join(__BIN_DIR,"dakota")
631    output = subprocess.check_output([dakota_path,input_file], stderr=subprocess.STDOUT)
632    __OUTPUT = output.decode('utf-8').split('\n')
633
634def set_executable_dir(bindir):
635    """Set the directory in the build tree that contains the executables"""
636    global __BIN_DIR
637    __BIN_DIR = bindir
638