1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4# Copyright (C) 2018 Mate Soos 5# 6# This program is free software; you can redistribute it and/or 7# modify it under the terms of the GNU General Public License 8# as published by the Free Software Foundation; version 2 9# of the License. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, write to the Free Software 18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19# 02110-1301, USA. 20 21# pylint: disable=invalid-name,line-too-long,too-many-locals,consider-using-sys-exit 22 23import numpy as np 24import pandas as pd 25import matplotlib.pyplot as plt 26import sklearn 27import sklearn.metrics 28import re 29import ast 30import math 31import time 32import os.path 33import sqlite3 34import functools 35try: 36 from termcolor import cprint 37except ImportError: 38 termcolor_avail = False 39else: 40 termcolor_avail = True 41 42from pprint import pprint 43try: 44 import mlflow 45except ImportError: 46 mlflow_avail = False 47else: 48 mlflow_avail = True 49 50 51class QueryHelper: 52 def __init__(self, dbfname): 53 if not os.path.isfile(dbfname): 54 print("ERROR: Database file '%s' does not exist" % dbfname) 55 exit(-1) 56 57 self.conn = sqlite3.connect(dbfname) 58 self.c = self.conn.cursor() 59 60 def __enter__(self): 61 return self 62 63 def __exit__(self, exc_type, exc_value, traceback): 64 self.conn.commit() 65 self.conn.close() 66 67 68class QueryFill (QueryHelper): 69 def create_indexes(self, verbose=False, used_clauses="used_clauses"): 70 t = time.time() 71 print("Recreating indexes...") 72 queries = """ 73 create index `idxclid6-4` on `reduceDB` (`clauseID`, `conflicts`) 74 create index `idxclidUCLS-2` on `{used_clauses}` ( `clauseID`, `used_at`); 75 create index `idxcl_last_in_solver-1` on `cl_last_in_solver` ( `clauseID`, `conflicts`); 76 """.format(used_clauses=used_clauses) 77 for l in queries.split('\n'): 78 t2 = time.time() 79 80 if verbose: 81 print("Creating index: ", l) 82 self.c.execute(l) 83 if verbose: 84 print("Index creation T: %-3.2f s" % (time.time() - t2)) 85 86 print("indexes created T: %-3.2f s" % (time.time() - t)) 87 88 def delete_and_create_all(self): 89 tables = ["short", "long", "forever"] 90 91 t = time.time() 92 q = """DROP TABLE IF EXISTS `used_later`;""" 93 self.c.execute(q) 94 95 for table in tables: 96 q = """ 97 DROP TABLE IF EXISTS `used_later_{name}`; 98 """ 99 self.c.execute(q.format(name=table)) 100 101 # Create and fill used_later_X tables 102 q_create = """ 103 create table `used_later_{name}` ( 104 `clauseID` bigint(20) NOT NULL, 105 `rdb0conflicts` bigint(20) NOT NULL, 106 `used_later_{name}` bigint(20), 107 `offset` bigint(20) NOT NULL 108 );""" 109 self.c.execute(q_create.format(name="short")) 110 self.c.execute(q_create.format(name="long")) 111 self.c.execute(q_create.format(name="forever")) 112 113 # Create used_later table 114 t = time.time() 115 q = """ 116 create table `used_later` ( 117 `clauseID` bigint(20) NOT NULL, 118 `rdb0conflicts` bigint(20) NOT NULL, 119 `used_later` bigint(20) 120 );""" 121 self.c.execute(q) 122 123 idxs = """ 124 create index `used_later_{name}_idx3` on `used_later_{name}` (used_later_{name}); 125 create index `used_later_{name}_idx1` on `used_later_{name}` (`clauseID`, rdb0conflicts, offset); 126 create index `used_later_{name}_idx2` on `used_later_{name}` (`clauseID`, rdb0conflicts, used_later_{name}, offset);""" 127 128 for table in tables: 129 for l in idxs.format(name=table).split('\n'): 130 self.c.execute(l) 131 132 idxs = """ 133 create index `used_later_idx3` on `used_later` (`used_later`); 134 create index `used_later_idx1` on `used_later` (`clauseID`, rdb0conflicts); 135 create index `used_later_idx2` on `used_later` (`clauseID`, rdb0conflicts, used_later); 136 """ 137 t = time.time() 138 for l in idxs.split('\n'): 139 self.c.execute(l) 140 141 print("used_later* dropped and recreated T: %-3.2f s" % (time.time() - t)) 142 143 def fill_used_later(self, used_clauses="used_clauses"): 144 # Fill used_later table 145 t = time.time() 146 q = """insert into used_later 147 ( 148 `clauseID`, 149 `rdb0conflicts`, 150 `used_later` 151 ) 152 SELECT 153 rdb0.clauseID 154 , rdb0.conflicts 155 , count(ucl.used_at) as `useful_later` 156 FROM 157 reduceDB as rdb0 158 left join {used_clauses} as ucl 159 160 -- for any point later than now 161 -- reduceDB is always present, used_later may not be, hence left join 162 on (ucl.clauseID = rdb0.clauseID 163 and ucl.used_at > rdb0.conflicts) 164 165 join cl_last_in_solver 166 on cl_last_in_solver.clauseID = rdb0.clauseID 167 168 WHERE 169 rdb0.clauseID != 0 170 and cl_last_in_solver.conflicts >= rdb0.conflicts 171 172 group by rdb0.conflicts, rdb0.clauseID;""" 173 174 self.c.execute(q.format(used_clauses=used_clauses)) 175 print("used_later filled T: %-3.2f s" % (time.time() - t)) 176 177 def fill_used_later_X(self, name, duration, offset=0, 178 used_clauses="used_clauses", 179 forever=False): 180 q_fill = """ 181 insert into used_later_{name} 182 ( 183 `clauseID`, 184 `rdb0conflicts`, 185 `used_later_{name}`, 186 `offset` 187 ) 188 SELECT 189 rdb0.clauseID 190 , rdb0.conflicts 191 , count(ucl.used_at) as `used_later_{name}` 192 , {offset} 193 194 FROM 195 reduceDB as rdb0 196 left join {used_clauses} as ucl 197 198 -- reduceDB is always present, {used_clauses} may not be, hence left join 199 on (ucl.clauseID = rdb0.clauseID 200 and ucl.used_at > (rdb0.conflicts+{offset}) 201 and ucl.used_at <= (rdb0.conflicts+{duration}+{offset})) 202 203 join cl_last_in_solver 204 on cl_last_in_solver.clauseID = rdb0.clauseID 205 206 WHERE 207 rdb0.clauseID != 0 208 and (cl_last_in_solver.conflicts >= (rdb0.conflicts + {duration} + {offset}) 209 or 1=={forever}) 210 211 group by rdb0.clauseID, rdb0.conflicts;""" 212 213 t = time.time() 214 self.c.execute(q_fill.format( 215 name=name, used_clauses=used_clauses, 216 duration=duration, offset=offset, forever=int(forever))) 217 print("used_later_%s filled T: %-3.2f s" % 218 (name, time.time() - t)) 219 220 221def write_mit_header(f): 222 f.write("""/****************************************** 223Copyright (c) 2018, Mate Soos 224 225Permission is hereby granted, free of charge, to any person obtaining a copy 226of this software and associated documentation files (the "Software"), to deal 227in the Software without restriction, including without limitation the rights 228to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 229copies of the Software, and to permit persons to whom the Software is 230furnished to do so, subject to the following conditions: 231 232The above copyright notice and this permission notice shall be included in 233all copies or substantial portions of the Software. 234 235THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 236IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 237FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 238AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 239LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 240OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 241THE SOFTWARE. 242***********************************************/\n\n""") 243 244def parse_configs(confs): 245 match = re.match(r"^([0-9]*)-([0-9]*)$", confs) 246 if not match: 247 print("ERROR: we cannot parse your config options: '%s'" % confs) 248 exit(-1) 249 250 conf_from = int(match.group(1)) 251 conf_to = int(match.group(2))+1 252 if conf_to <= conf_from: 253 print("ERROR: Conf range is not increasing") 254 exit(-1) 255 256 print("Running configs:", range(conf_from, conf_to)) 257 return conf_from, conf_to 258 259 260def get_features(fname): 261 best_features = [] 262 check_file_exists(fname) 263 with open(fname, "r") as f: 264 for l in f: 265 l = l.strip() 266 if l != "": 267 best_features.append(l) 268 269 return best_features 270 271 272def helper_divide(dividend, divisor, df, features, verb, name=None): 273 """ 274 to be used like: 275 import functools 276 divide = functools.partial(helper.divide, df=df, features=features, verb=options.verbose) 277 """ 278 279 # dividend feature not present 280 #if dividend not in features: 281 #return None 282 283 # divisorfeature not present 284 #if divisor not in features: 285 #return None 286 287 # divide 288 if verb: 289 print("Dividing. dividend: '%s' divisor: '%s' " % (dividend, divisor)) 290 291 if name is None: 292 name = "(" + dividend + "/" + divisor + ")" 293 294 df[name] = df[dividend].div(df[divisor]+0.000000001) 295 return name 296 297def helper_larger_than(lhs, rhs, df, features, verb): 298 """ 299 to be used like: 300 import functools 301 larger_than = functools.partial(helper.larger_than, df=df, features=features, verb=options.verbose) 302 """ 303 304 # divide 305 if verb: 306 print("Calulating '%s' >: '%s' " % (lhs, rhs)) 307 308 name = "(" + lhs + ">" + rhs + ")" 309 df[name] = (df[lhs] > df[rhs]).astype(int) 310 return name 311 312def helper_add(toadd, df, features, verb): 313 """ 314 to be used like: 315 import functools 316 larger_than = functools.partial(helper.larger_than, df=df, features=features, verb=options.verbose) 317 """ 318 319 # add 320 if verb: 321 print("Calulating: the feature addition of: %s", toadd) 322 323 name = "(" 324 for i in range(1, len(toadd)): 325 name = toadd[i] 326 if i < len(toadd)-1: 327 name+="+" 328 name += ")" 329 330 df[name] = df[toadd[0]] 331 for i in range(1, len(toadd)): 332 df[name] += df[toadd[i]] 333 return name 334 335 336def dangerous(conn): 337 conn.execute("PRAGMA journal_mode = MEMORY") 338 conn.execute("PRAGMA synchronous = OFF") 339 340 341def drop_idxs(conn): 342 q = """ 343 SELECT name FROM sqlite_master WHERE type == 'index' 344 """ 345 conn.execute(q) 346 rows = conn.fetchall() 347 queries = "" 348 for row in rows: 349 print("Will delete index:", row[0]) 350 queries += "drop index if exists `%s`;\n" % row[0] 351 352 t = time.time() 353 for q in queries.split("\n"): 354 conn.execute(q) 355 356 print("Removed indexes: T: %-3.2f s"% (time.time() - t)) 357 358 359def get_columns(tablename, verbose, conn): 360 q = "pragma table_info(%s);" % tablename 361 conn.execute(q) 362 rows = conn.fetchall() 363 columns = [] 364 for row in rows: 365 if verbose: 366 print("Using column in table {tablename}: {col}".format( 367 tablename=tablename, col=row[1])) 368 columns.append(row[1]) 369 370 return columns 371 372def query_fragment(tablename, not_cols, short_name, verbose, conn): 373 cols = get_columns(tablename, verbose, conn) 374 filtered_cols = list(set(cols).difference(not_cols)) 375 ret = "" 376 for col in filtered_cols: 377 ret += ", {short_name}.`{col}` as `{short_name}.{col}`\n".format( 378 col=col, short_name=short_name) 379 380 if verbose: 381 print("query for short name {short_name}: {ret}".format( 382 short_name=short_name, ret=ret)) 383 384 return ret 385 386 387def not_inside(not_these, inside_here): 388 for not_this in not_these: 389 if not_this in inside_here: 390 return False 391 392 return True 393 394def add_computed_szfeat_for_clustering(df): 395 print("Adding computed clustering features...") 396 397 todiv = [] 398 for x in list(df): 399 not_these = ["std", "min", "mean", "_per_", "binary", "vcg", "pnr", "horn", "max"] 400 if "szfeat_cur" in x and x[-3:] != "var" and not_inside(not_these, x): 401 todiv.append(x) 402 403 # relative data 404 cols = list(df) 405 for col in cols: 406 if "szfeat_cur" in col: 407 for divper in todiv: 408 df["("+col+"/"+divper+")"] = df[col]/df[divper] 409 df["("+col+"<"+divper+")"] = (df[col] < df[divper]).astype(int) 410 411 print("Added computed features.") 412 413 414# to check for too large or NaN values: 415def check_too_large_or_nan_values(df, features): 416 print("Checking for too large or NaN values...") 417 # features = df.columns.values.flatten().tolist() 418 index = 0 419 for index, row in df[features].iterrows(): 420 for x, name in zip(row, features): 421 try: 422 np.isfinite(x) 423 except: 424 print("Name:", name) 425 print("Prolbem with value:", x) 426 print(row) 427 428 if not np.isfinite(x) or x > np.finfo(np.float32).max: 429 print("issue with data for features: ", name, x) 430 index += 1 431 432 print("Checking finished.") 433 434 435def print_confusion_matrix(cm, 436 normalize=False, 437 title='Confusion matrix', 438 cmap=plt.cm.Blues): 439 """ 440 This function prints and plots the confusion matrix. 441 Normalization can be applied by setting `normalize=True`. 442 """ 443 if normalize: 444 cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 445 446 print(title) 447 if mlflow_avail: 448 mlflow.log_metric(title, cm[0][0]) 449 np.set_printoptions(precision=2) 450 print(cm) 451 452 453def calc_min_split_point(df, min_samples_split): 454 split_point = int(float(df.shape[0])*min_samples_split) 455 if split_point < 10: 456 split_point = 10 457 print("Minimum split point: ", split_point) 458 return split_point 459 460 461def calc_regression_error(data, features, to_predict, clf, toprint, 462 average="binary", highlight=False): 463 X_data = data[features] 464 y_data = data[to_predict] 465 print("Number of elements:", X_data.shape) 466 if data.shape[0] <= 1: 467 print("Cannot calculate regression error, too few elements") 468 return None 469 y_pred = clf.predict(X_data) 470 main_error = sklearn.metrics.mean_squared_error(y_data, y_pred) 471 print("Mean squared error is: ", main_error) 472 473 for start,end in [(0,10), (1,10), (10, 100), (100, 1000), (1000,10000), (10000, 1000000)]: 474 x = "--> Strata %10d <= %20s < %10d " % (start, to_predict, end) 475 myfilt = data[(data[to_predict] >= start) & (data[to_predict] < end)] 476 X_data = myfilt[features] 477 y_data = myfilt[to_predict] 478 y = " -- elements: {:20}".format(str(X_data.shape)) 479 if myfilt.shape[0] <= 1: 480 #print("Cannot calculate regression error, too few elements") 481 error = -1 482 else: 483 y_pred = clf.predict(X_data) 484 error = sklearn.metrics.mean_squared_error(y_data, y_pred) 485 print("%s %s msqe: %13.1lf" % (x, y, error)) 486 487 for start,end in [(0,3), (3,8), (8, 15), (15, 25), (25,50), (50, 100), (100, 1000000)]: 488 x = "--> Strata %10d <= %20s < %10d " % (start, "rdb0.glue", end) 489 myfilt = data[(data["rdb0.glue"] >= start) & (data["rdb0.glue"] < end)] 490 X_data = myfilt[features] 491 y_data = myfilt[to_predict] 492 y = " -- elements: {:20}".format(str(X_data.shape)) 493 if myfilt.shape[0] <= 1: 494 error = -1 495 else: 496 y_pred = clf.predict(X_data) 497 error = sklearn.metrics.mean_squared_error(y_data, y_pred) 498 print("%s %s msqe: %13.1lf" % (x, y, error)) 499 500 return main_error 501 502 503def conf_matrixes(data, features, to_predict, clf, toprint, 504 average="binary", highlight=False): 505 # get data 506 X_data = data[features] 507 y_data = data[to_predict] 508 print("Number of elements:", X_data.shape) 509 if data.shape[0] <= 1: 510 print("Cannot calculate confusion matrix, too few elements") 511 return None, None, None, None 512 513 # Preform prediction 514 def f(x): 515 if x > 0.5: 516 return 1 517 else: 518 return 0 519 520 y_pred = clf.predict(X_data) 521 #print("type(y_pred[0]): ", type(y_pred[0])) 522 if type(y_pred[0]) == np.float32: 523 y_pred = np.array([f(x) for x in y_pred]) 524 525 # calc acc, precision, recall 526 accuracy = sklearn.metrics.accuracy_score( 527 y_data, y_pred) 528 529 precision = sklearn.metrics.precision_score( 530 y_data, y_pred, pos_label=1, average=average) 531 532 533 recall = sklearn.metrics.recall_score( 534 y_data, y_pred, pos_label=1, average=average) 535 536 # ROC AUC 537 predsi = np.array(y_pred) 538 y_testi = pd.DataFrame(y_data)["x.class"].squeeze() 539 try: 540 roc_auc = sklearn.metrics.roc_auc_score(y_testi, predsi) 541 except: 542 print("NOTE: ROC AUC is set to 0 because of completely one-sided OK/BAD") 543 roc_auc = 0 544 545 # record to mlflow 546 if mlflow_avail: 547 mlflow.log_metric(toprint + " -- accuracy", accuracy) 548 mlflow.log_metric(toprint + " -- precision", precision) 549 mlflow.log_metric(toprint + " -- recall", recall) 550 mlflow.log_metric(toprint + " -- roc_auc", roc_auc) 551 552 color = "white" 553 bckgrnd = "on_grey" 554 if highlight: 555 color="green" 556 bckgrnd = "on_grey" 557 558 txt = "%s prec : %-3.4f recall: %-3.4f accuracy: %-3.4f roc_auc: %-3.4f" 559 vals = (toprint, precision, recall, accuracy, roc_auc) 560 if termcolor_avail: 561 cprint(txt % vals , color, bckgrnd) 562 else: 563 cprint(txt % vals) 564 565 # Plot confusion matrix 566 cnf_matrix = sklearn.metrics.confusion_matrix( 567 y_true=y_data, y_pred=y_pred) 568 print_confusion_matrix( 569 cnf_matrix, 570 title='Confusion matrix without normalization -- %s' % toprint) 571 print_confusion_matrix( 572 cnf_matrix, normalize=True, 573 title='Normalized confusion matrix -- %s' % toprint) 574 575 return roc_auc 576 577 578def check_file_exists(fname): 579 try: 580 f = open(fname) 581 except IOError: 582 print("File '%s' not accessible" % fname) 583 exit(-1) 584 finally: 585 f.close() 586 587 588def clear_data_from_str(df): 589 values2nums = {'luby': 0, 'glue': 1, 'geom': 2} 590 df.loc[:, ('cl.cur_restart_type')] = \ 591 df.loc[:, ('cl.cur_restart_type')].map(values2nums) 592 593 df.loc[:, ('rdb0.cur_restart_type')] = \ 594 df.loc[:, ('rdb0.cur_restart_type')].map(values2nums) 595 596 df.loc[:, ('rst_cur.restart_type')] = \ 597 df.loc[:, ('rst_cur.restart_type')].map(values2nums) 598 599 if "rdb1.cur_restart_type" in df: 600 df.loc[:, ('rdb1.cur_restart_type')] = \ 601 df.loc[:, ('rdb1.cur_restart_type')].map(values2nums) 602 603 604def output_to_classical_dot(clf, features, fname): 605 sklearn.tree.export_graphviz(clf, out_file=fname, 606 feature_names=features, 607 class_names=clf.classes_, 608 filled=True, rounded=True, 609 special_characters=True, 610 proportion=True) 611 print("Run dot:") 612 print("dot -Tpng {fname} -o {fname}.png".format(fname=fname)) 613 print("gwenview {fname}.png".format(fname=fname)) 614 615 616def print_feature_ranking(clf, X_train, top_num_features, features, plot=False): 617 best_features = [] 618 importances = clf.feature_importances_ 619 std = np.std( 620 [tree.feature_importances_ for tree in clf.estimators_], axis=0) 621 indices = np.argsort(importances)[::-1] 622 indices = indices[:top_num_features] 623 myrange = min(X_train.shape[1], top_num_features) 624 625 # Print the feature ranking 626 print("Feature ranking:") 627 628 for f in range(myrange): 629 print("%-3d %-55s -- %8.4f" % 630 (f + 1, features[indices[f]], importances[indices[f]])) 631 best_features.append(features[indices[f]]) 632 633 # Plot the feature importances of the clf 634 if plot: 635 plot_feature_importances(importances, indices, myrange, std, features) 636 637 return best_features 638 639 640def plot_feature_importances(importances, indices, myrange, std, features): 641 plt.figure() 642 plt.title("Feature importances") 643 plt.bar(range(myrange), importances[indices], 644 color="r", align="center", 645 yerr=std[indices]) 646 plt.xticks(range(myrange), [features[x] 647 for x in indices], rotation=45) 648 plt.xlim([-1, myrange]) 649 650 651def cldata_add_computed_features(df, verbose): 652 print("Adding computed features...") 653 del df["cl.conflicts"] 654 divide = functools.partial(helper_divide, df=df, features=list(df), verb=verbose) 655 larger_than = functools.partial(helper_larger_than, df=df, features=list(df), verb=verbose) 656 add = functools.partial(helper_add, df=df, features=list(df), verb=verbose) 657 658 # relative overlaps 659 print("Relative overlaps...") 660 divide("cl.num_total_lits_antecedents", "cl.antec_sum_size_hist") 661 662 # deleting this feature which is NONE 663 del df["cl.antecedents_glue_long_reds_avg"] 664 del df["cl.antecedents_glue_long_reds_max"] 665 del df["cl.antecedents_glue_long_reds_min"] 666 del df["cl.antecedents_glue_long_reds_var"] 667 del df["cl.antecedents_long_red_age_avg"] 668 del df["cl.antecedents_long_red_age_var"] 669 del df["cl.decision_level_hist"] 670 del df["sum_cl_use.first_confl_used"] 671 del df["sum_cl_use.last_confl_used"] 672 673 # ************ 674 # TODO decision level and branch depth are the same, right??? 675 # ************ 676 print("size/glue/trail rel...") 677 divide("cl.trail_depth_level", "cl.trail_depth_level_hist") 678 679 rst_cur_all_props = add(["rst_cur.propBinRed", 680 "rst_cur.propBinIrred", 681 "rst_cur.propLongRed", 682 "rst_cur.propLongIrred"]) 683 684 divide("cl.num_total_lits_antecedents", "cl.num_antecedents") 685 686 # sum RDB 687 orig_cols = list(df) 688 for col in orig_cols: 689 if ("rdb0" in col) and "restart_type" not in col: 690 col2 = col.replace("rdb0", "rdb1") 691 cboth = "("+col+"+"+col2+")" 692 df[cboth] = df[col]+df[col2] 693 694 rdb0_act_ranking_rel = divide("rdb0.act_ranking", "rdb0.tot_cls_in_db", name="rdb0_act_ranking_rel") 695 rdb1_act_ranking_rel = divide("rdb1.act_ranking", "rdb1.tot_cls_in_db", name="rdb1_act_ranking_rel") 696 rdb0_plus_rdb1_ranking_rel = add([rdb0_act_ranking_rel, rdb1_act_ranking_rel]) 697 698 divide("rdb0.sum_uip1_used", "cl.time_inside_solver") 699 divide("rdb1.sum_uip1_used", "cl.time_inside_solver") 700 divide("rdb0.sum_propagations_made", "cl.time_inside_solver") 701 divide("rdb1.sum_propagations_made", "cl.time_inside_solver") 702 703 divisors = [ 704 "cl.size_hist" 705 , "cl.glue_hist" 706 , "rdb0.glue" 707 , "cl.orig_glue" 708 , "cl.glue_before_minim" 709 , "cl.glue_hist_queue" 710 , "cl.glue_hist_long" 711 # , "cl.decision_level_hist" 712 , "cl.num_resolutions_hist_lt" 713 # , "cl.trail_depth_level_hist" 714 # , "cl.backtrack_level_hist" 715 , "cl.branch_depth_hist_queue" 716 , "cl.antec_overlap_hist" 717 , "(cl.num_total_lits_antecedents/cl.num_antecedents)" 718 , "cl.num_antecedents" 719 , rdb0_act_ranking_rel 720 , rdb1_act_ranking_rel 721 #, "szfeat_cur.var_cl_ratio" 722 , "cl.time_inside_solver" 723 #, "((double)(rdb0.act_ranking_rel+rdb1.act_ranking_rel)/2.0)" 724 #, "sqrt(rdb0.act_ranking_rel)" 725 #, "sqrt(rdb1.act_ranking_rel)" 726 #, "sqrt(rdb0_and_rdb1.act_ranking_rel_avg)" 727 # , "cl.num_overlap_literals" 728 # , "rst_cur.resolutions" 729 #, "rdb0.act_ranking_top_10" 730 ] 731 732 # Thanks to Chai Kian Ming Adam for the idea of using LOG instead of SQRT 733 # add LOG 734 if True: 735 toadd = [] 736 for divisor in divisors: 737 x = "log2("+divisor+")" 738 df[x] = df[divisor].apply(np.log2) 739 toadd.append(x) 740 divisors.extend(toadd) 741 742 # relative data 743 cols = list(df) 744 for col in cols: 745 if ("rdb" in col or "cl." in col or "rst" in col) and "restart_type" not in col and "tot_cls_in" not in col and "rst_cur" not in col: 746 for divisor in divisors: 747 divide(divisor, col) 748 divide(col, divisor) 749 750 divisors.extend([ 751 rst_cur_all_props 752 , "rdb0.last_touched_diff" 753 , "rdb0.used_for_uip_creation" 754 , "rdb1.used_for_uip_creation" 755 , "rdb0.propagations_made" 756 , "rdb1.propagations_made" 757 , "rdb0.sum_propagations_made" 758 , "(rdb0.sum_propagations_made/cl.time_inside_solver)" 759 , "(rdb1.sum_propagations_made/cl.time_inside_solver)" 760 , "(rdb0.sum_uip1_used/cl.time_inside_solver)" 761 , "(rdb1.sum_uip1_used/cl.time_inside_solver)"]) 762 763 # smaller/larger than 764 if False: 765 for col in cols: 766 if ("rdb" in col or "cl." in col or "rst" in col) and "restart_type" not in col: 767 for divisor in divisors: 768 larger_than(col, divisor) 769 770 # satzilla stuff 771 if False: 772 divisors = [ 773 "szfeat_cur.numVars", 774 "szfeat_cur.numClauses", 775 "szfeat_cur.var_cl_ratio", 776 "szfeat_cur.avg_confl_size", 777 "szfeat_cur.avg_branch_depth", 778 "szfeat_cur.red_glue_distr_mean" 779 ] 780 for col in orig_cols: 781 if "szfeat" in col: 782 for divisor in divisors: 783 if "min" not in divisor: 784 divide(col, divisor) 785 larger_than(col, divisor) 786 787 # relative RDB 788 if True: 789 print("Relative RDB...") 790 for col in orig_cols: 791 if "rdb0" in col and "restart_type" not in col: 792 rdb0 = col 793 rdb1 = col.replace("rdb0", "rdb1") 794 larger_than(rdb0, rdb1) 795 796 raw_col = col.replace("rdb0.", "") 797 if raw_col not in ["sum_propagations_made", "propagations_made", "dump_no", "conflicts_made", "used_for_uip_creation", "sum_uip1_used", "activity_rel", "last_touched_diff", "ttl"]: 798 print(rdb0) 799 divide(rdb0, rdb1) 800 801 # smaller-or-greater comparisons 802 print("smaller-or-greater comparisons...") 803 larger_than("cl.antec_sum_size_hist", "cl.num_total_lits_antecedents") 804 larger_than("cl.antec_overlap_hist", "cl.num_overlap_literals") 805 806 # print("flatten/list...") 807 #old = set(df.columns.values.flatten().tolist()) 808 #df = df.dropna(how="all") 809 #new = set(df.columns.values.flatten().tolist()) 810 #if len(old - new) > 0: 811 #print("ERROR: a NaN number turned up") 812 #print("columns: ", (old - new)) 813 #assert(False) 814 #exit(-1) 815