1#!/usr/local/bin/python3.8 2 3# python3 status: compatible 4 5# ver 1.0: July, 2014 6# Update, ver 1.1: Sept 2014 7# Updated, ver 1.2: Sept 2014 8# [PT: Aug 31, 2020] updated to Python 3 with "2to3 -w .." 9# [PT: Sep 1, 2020] continuing to tweak, based on 2to3 updates 10# 11# File of helper functions for fat_mvm_*.py. 12# 13# 14# 15############################################################################# 16 17 18import numpy as np 19import csv 20import sys 21 22from glob import glob 23 24np.set_printoptions(linewidth=200) 25 26# Have to check 5 lines in 27NLinesGridHead = 5 28 29HEADER_Nroi = 'Number_of_network_ROIs' 30HEADER_Ngrid = 'Number_of_grid_matrices' 31HEADER_EleID = 'Special_element_ID' 32HEADER_Labels = 'WITH_ROI_LABELS' 33 34HEADER_ALL = [HEADER_Nroi, HEADER_Ngrid, HEADER_EleID, HEADER_Labels] 35 36# This will be ROI name connector, in line with ZSS usage in SUMA, 37# etc. 38ConnNames = '__' 39 40HeaderRowTrue = 1 41FirstColLabels = 1 # first read in col is subj name-labels 42GridCheckVar = 'NT' 43 44# for writing file for MVM 45ColOne_SUB = 'Subj' 46ColEnding = ['ROI', 'matrPar', 'Ausgang_val'] 47 48MVM_file_postfix = '_MVMtbl.txt' 49MVM_matchlog_postfix = '_MVMprep.log' 50 51LOG_LABEL_colmatr = 'Matrixfile_ID' 52LOG_LABEL_colsubj = 'CSV_Subj' 53LOG_LABEL_roilist = 'ROI_list' 54LOG_LABEL_parlist = 'Parameter_list' 55VAR_LABEL_roilistQ = 'Qvar_list' 56VAR_LABEL_roilistC = 'Cvar_list' 57VAR_LABEL_roilistI = 'Ivar_list' 58LOG_LABEL_command = 'Made_by_command' 59 60OldFash_parnames = ['NT', 'fNT', 'FA', 'sFA', 'MD', \ 61 'sMD', 'RD', 'sRD', 'L1', 'sL1', \ 62 'NV'] 63 64 65#class MVM_rev: 66# def __init__(self, var, par): 67# self.var = var 68# self.par = par 69# self.isquant = 1 70# self.mean_tstat = 0 71# self.minp_ROI = '' 72 73 74###------------------------------------------------------------------ 75###------------------------------------------------------------------ 76###---------------- START: review functions and output -------------- 77###------------------------------------------------------------------ 78###------------------------------------------------------------------ 79 80def BreakLineList(x): 81 82 y = [] 83 for line in x: 84 y.append(line.split()) 85 86 87 return y 88 89###------------------------------------------------------------------ 90 91def Find_ANOVAE(x): 92 ''' input is a line list broken into lines of words.''' 93 94 Nline = len(x) 95 96 RESULTS = [] 97 list_par = [] 98 list_var = [] 99 100 for i in range(Nline): 101 if len(x[i]) > 0 : 102 if (x[i][1] == 'RESULTS:') and (x[i][2] == 'ANOVA') : 103 par = x[i][-1] # name of DTI/FMRI par 104 nval = int(x[i+1][0]) # where the num of vars is stored 105 values = [] 106 names = [] 107 for j in range( nval ): 108 ii = i + 3 + j # skip to vars 109 if x[ii][-1] != '(Intercept)' : 110 values.append(x[ii][-3]) 111 names.append(x[ii][-1]) 112 RESULTS.append(values) 113 list_par.append(par) 114 list_var.append(names) 115 116 117 npar = len(RESULTS) 118 if npar <1: 119 print("ERROR! No ANOVA results?!?") 120 sys.exit(33) 121 122 nvar = len( RESULTS[0] ) 123 if nvar < 1 : 124 print("ERROR! No ANOVA variables?!?") 125 sys.exit(34) 126 127 z = np.array(RESULTS, dtype = float) 128 129 # return the array, and it's two lists of labels 130 return z, list_par, list_var[0] 131 132 133###------------------------------------------------------------------ 134 135 136 137 138 139def Find_POSTHOC(x): 140 ''' input is a line list broken into lines of words.''' 141 142 Nline = len(x) 143 144 RESULTS = [] 145 list_par = [] 146# list_var = [] 147 148 for i in range(Nline): 149 if len(x[i]) > 0 : 150 if (x[i][1] == 'RESULTS:') and (x[i][2] == 'Post') : 151 par = x[i][-1] # name of DTI/FMRI par 152 nglt = int(x[i+1][0]) # where the num of glts is stored 153 values = [] 154 names = [] 155 ii = i + 3 # jump ahead to data 156 phrois, phvars = Find_PosthocVars_and_NROI( x[ii:ii+nglt] ) 157 Nphrois = len(phrois) 158 Nphvars = len(phvars) 159 tmpv = np.zeros( Nphvars ) 160 for j in range( Nphvars ): 161 for k in range( Nphrois ): 162 ll = ii + j + k*Nphvars 163 tmpv[j]+= np.float(x[ll][-5]) # t-stat 164 tmpv[j]/= Nphrois 165 values.append(tmpv[j]) 166 RESULTS.append(values) 167 list_par.append(par) 168# list_var.append(names) 169 170 npar = len(RESULTS) 171 if npar <1: 172 print("No post hoc results-- guess you didn't want any?") 173 return [],[],[] 174 else: 175 nvar = len( RESULTS[0] ) 176 if nvar < 1 : 177 print("ERROR! No post hoc variables?!?") 178 sys.exit(35) 179 180 z = np.array(RESULTS, dtype = float) 181 182 # return the array, and it's two lists of labels 183 return z, list_par, phvars 184 185###------------------------------------------------------------------ 186 187def Find_PosthocVars_and_NROI( x ): 188 189 nvals = len(x) 190 191 phrois = [] 192 phvars = [] 193 194 for y in x: 195 twopiece = y[-1].split('--') # Sep,2015: new parser 196 if not(phrois.__contains__(twopiece[0])): 197 phrois.append(twopiece[0]) 198 199 if not(phvars.__contains__(twopiece[1])): 200 phvars.append(twopiece[1]) 201 202 if len(phvars)*len(phrois) != nvals: 203 print("Problem! number of posthoc vars, ROIs and tests not matching!") 204 sys.exit(189) 205 206 return phrois, phvars 207 208###------------------------------------------------------------------ 209 210###### [PT: Sep 1, 2020] comment this func out, because the 211###### fat_mvm_review.py program that calls this func (a program that 212###### never even fully made it to beta testing) is being deprecated. 213# 214## ref mat allows us to use group pvalue mat to censor 215#def ScreenAndFileOutput(PO, TO, FI_mat, FI_par, FI_var, FI_indi, TS, 216# REF_mat): 217# 218# STARTER = 0 219# if not(TS): 220# STARTER = 1 221# 222# nvar = len(FI_var) 223# npar = len(FI_par) 224# 225# calc_indent = 14*nvar+1 226# 227# if (npar, nvar) != np.shape(FI_mat) : 228# print("Weird error in numbers not matching internally!") 229# sys.exit(35) 230# 231# # first lining ... 232# if STARTER: TS.append('# %12s' % (FI_var[0])) 233# for j in range(nvar): 234# print("%14s" % (FI_var[j]), end=' ') 235# if (j>0): 236# if STARTER: TS.append("%14s" % FI_var[j]) 237# print("") 238# if STARTER: TS.append("\n") 239# 240# TS.append('#%s# %s\n' % ('-'*calc_indent, FI_indi)) 241# 242# 243# 244# 245# # ... the rest. 246# # use of 'REF_mat' to allow censoring by ANOVA values 247# for i in range(npar): 248# for j in range(nvar): 249# #if FI_mat[i,j] < TO : 250# if REF_mat[i,j] < TO : 251# out = "%.4e" % (FI_mat[i,j]) 252# fout = out 253# else: 254# out = '-' 255# fout = '0' 256# print("%14s" % (out), end=' ') 257# TS.append("%14s" % (fout)) 258# print(" %s" % FI_par[i]) 259# TS.append(" # %s\n" % FI_par[i]) 260# 261# 262# if not(TS): 263# print("**ERROR! Empty file?") 264# sys.exit(12) 265# 266# return TS 267 268 269''' OLD version 270 271def ScreenAndFileOutput(PO, TO, FI_mat, FI_par, FI_var): 272 273 nvar = len(FI_var) 274 npar = len(FI_par) 275 276 if (npar, nvar) != np.shape(FI_mat) : 277 print "Weird error in numbers not matching internally!" 278 sys.exit(35) 279 280 if PO: 281 outname = PO + '_REV.txt' 282 f = open(outname, 'w') 283 284 # first lining ... 285 if PO: f.write('# %12s' % (FI_var[0])) 286 for j in range(nvar): 287 print "%14s" % (FI_var[j]), 288 if (j>0) and PO: 289 f.write("%14s" % FI_var[j]) 290 print "" 291 if PO: f.write("\n") 292 293 # ... the rest. 294 for i in range(npar): 295 for j in range(nvar): 296 if FI_mat[i,j] < TO: 297 out = "%.4e" % (FI_mat[i,j]) 298 fout = out 299 else: 300 out = '-' 301 fout = '0' 302 print "%14s" % (out), 303 if PO: f.write("%14s" % (fout)) 304 print " %s" % FI_par[i] 305 if PO: f.write(" # %s\n" % FI_par[i]) 306 if PO: f.close() 307 308 return 1 309''' 310 311 312 313###------------------------------------------------------------------ 314###------------------------------------------------------------------ 315###----------------- sTOP: review functions and output -------------- 316###------------------------------------------------------------------ 317###------------------------------------------------------------------ 318 319###------------------------------------------------------------------ 320###------------------------------------------------------------------ 321###---------------- START: Subsets of categorical vars -------------- 322###------------------------------------------------------------------ 323###------------------------------------------------------------------ 324 325def SplitVar_Sublist(L, idx): 326 '''Take in a list, and a 'which iteration is this' value called 327 idx. Return either the first or second list in a lone-comma 328 separated line.''' 329 330 N = L.count(',') 331 332 # only one list, can only go with first 333 if N==0: 334 if idx == 0 : 335 return set(L) 336 else: 337 return set([]) 338 elif N==1: 339 mm = L.index(',') 340 if idx == 0 : 341 return set(L[:mm]) 342 else: 343 return set(L[mm+1:]) 344 else: 345 print("Error reading variable list: too many commas!") 346 print("\t There should only be a single comma, separating at most " 347 "two lists.") 348 sys.exit(18) 349 350 351 352 353 354def Pars_CatVars_in_Listfile( file_listvars, 355 par_list, 356 var_iscateg, 357 var_isinterac, 358 Ninter ): 359 '''User can specify subset of cat var for modelling. Might be 360 useful if there are a lot of subcategories, and we want some 361 pairwise tests. Performed after we know where and what are the 362 categorical variables (both lone and/or interaction ones. 363 Returns: updated lists of categorical and interacting vars.''' 364 365 coldat = ReturnFirstUncommentedSection_ofReadlines(file_listvars) 366 for i in range(len(coldat)): 367 if len(coldat[i]) > 1: 368 print("Found a variable for subsetting") 369 370 if var_iscateg[i]: 371 newsub_set = set(coldat[i][1:]) # all other vars in that row 372 oldvar_set = set(var_iscateg[i]) 373 nomatch = list(oldvar_set.__ixor__(newsub_set)) 374 print("Going to remove:", nomatch) 375 for x in nomatch: 376 var_iscateg[i].remove(x) 377 378 elif var_isinterac[i]: 379 ### with variable interaction terms, use a "lone" 380 ### comma to separate the sub-lists; if you only want 381 ### a subset of the second variable, then still use a 382 ### lone comma before it 383 for j in range(2): 384 if var_isinterac[i][2][j]: 385 newsub_set = SplitVar_Sublist(coldat[i][1:], j) 386 oldvar_set = set(var_isinterac[i][2][j]) 387 nomatch = list(oldvar_set.__ixor__(newsub_set)) 388 print("Going to remove:", nomatch) 389 for x in nomatch: 390 var_isinterac[i][2][j].remove(x) 391 392 393 return var_iscateg, var_isinterac 394 395 396 397###------------------------------------------------------------------ 398###------------------------------------------------------------------ 399###---------------- STOP: Subsets of categorical vars --------------- 400###------------------------------------------------------------------ 401###------------------------------------------------------------------ 402 403 404###------------------------------------------------------------------ 405###------------------------------------------------------------------ 406###--------------- START: Interaction modeling features ------------- 407###------------------------------------------------------------------ 408###------------------------------------------------------------------ 409 410def VarLists_to_Strings_for_MVM(var_list, 411 var_isinterac, 412 var_iscateg, 413 CAT_PAIR_COMP ): 414 '''Get counts and things from varlists, interaction and category 415 info.''' 416 417 Nvar = len(var_list) 418 419 # calc number of GLTs/ROI (= Nvartout) and number of Quant var 420 Nvartout = Nvar 421 var_list_quant = [] 422 varQ_str = '' 423 var_list_inter = [] 424 varI_str = '' 425 extra_qVar = '' # Jan,2015 -- added to pick up qvar in interac term 426 varC_str = '' 427 for i in range(Nvar): 428 x = var_list[i] 429 430 if not(var_isinterac[i][0][0]): 431 if var_list.__contains__(x): 432 if not( var_iscateg[i] ): 433 var_list_quant.append(x) 434 varQ_str+= " %s" % x 435 else: 436 nc = len(var_iscateg[i]) 437 Nvartout+= nc-1 # individual ttests 438 if CAT_PAIR_COMP : 439 Nvartout+= (nc*(nc-1))/2 # pairwise combinatorial 440 else: 441 varI_str = " " 442 ncombo = 1 443 for j in range( 2 ): 444 varI_str+= var_isinterac[i][1][j] 445 if var_isinterac[i][2][j] : 446 # binomial-type comparisons for each 447 nc = len(var_isinterac[i][2][j]) 448 ncombo*= (nc*(nc-1))/2 449 varI_str+= "(" 450 for ii in var_isinterac[i][2][j]: 451 varI_str+= " %s" % ii 452 varI_str+= " )" 453 else: # Jan,2015 454 extra_qVar = "%s" % var_isinterac[i][1][j] 455 if j == 0 : 456 varI_str+= " %s " % var_isinterac[i][0][0] 457 Nvartout+= ncombo-1 458 459 for i in range(Nvar): 460 if var_iscateg[i]: 461 varC_str+= " %s(" % var_list[i] 462 for y in var_iscateg[i]: 463 varC_str+= " %s" % y 464 varC_str+= " )" 465 466 467 468 469 return Nvartout, var_list_quant, varQ_str, var_list_inter, \ 470 varI_str, varC_str, extra_qVar 471 472 473 474 475 476 477 478 479###------------------------------------------------------------------ 480 481def CheckFor_Cats_and_Inters( tab_data, 482 tab_colvars, 483 tab_coltypes, 484 var_list ): 485 '''Return 1 if there is an interaction, and ''' 486 487 simple_cat = [] 488 489 interac_terms = [] 490 count_interac = 0 491 492 for x in var_list: 493 # return either an empty list (no interac) or a list of 494 # the interacting ones 495 496 cats = [] # default if not cat var or interac 497 inter_names = [] # default if no interacs 498 Ncats = 0 499 500 #print "TEST1 for var:",x 501 check, inter_type = IsEntry_interaction(x) 502 503 #print "\t->has check:", check,", and intertype:", inter_type 504 505 #interac_terms = [inter_type] 506 507 # if there is a non-empty list returned, check that both terms 508 # in it are ok to use 509 if check: 510 # inter_names will be a list of: lists (of categorical vars) 511 # or empty lists (quant var) 512 inter_names = CheckVar_and_FindCategVar( tab_data, \ 513 tab_colvars, \ 514 tab_coltypes, \ 515 check ) 516 count_interac+=1 517 518 # want to ensure categorical variable is first in list 519 if not( inter_names[0] ): 520 inter_names.reverse() 521 check.reverse() 522 523 for i in range(len(inter_names)): 524 if inter_names[i]: 525 Ncats+=1 526 527 528 # when there is interaction, have: 529 # [0] type of interaction (or empty for none) 530 # [1] var names (cat first or both) 531 # [2] cat var names (or empty for quant) 532 #interac_terms.append(check) 533 #interac_terms.append(inter_names) 534 else: 535 # redefine cats if coming through here 536 cats = CheckVar_and_FindCategVar( tab_data, \ 537 tab_colvars, \ 538 tab_coltypes, \ 539 [x] ) 540 cats = list(cats[0]) 541 #print "\t\> has cat:", cats 542 simple_cat.append(cats) 543 interac_terms.append( [ [inter_type, Ncats], check, inter_names] ) 544 return simple_cat, interac_terms, count_interac 545 546 547 548def IsEntry_interaction(str_var): 549 '''Return an empty list if there is no interaction, or return a 550 list of the interacting terms if there be/were one.''' 551 552 type_col = ':' # this was a mistake to include! Ooops. 553 type_ast = '*' 554 555 num_colon = str_var.count(type_col) 556 num_aster = str_var.count(type_ast) 557 558 if (num_colon + num_aster) > 1: 559 print("Error! At most 1 interaction per var is permitted currently!") 560 sys.exit(14) 561 562 if num_colon : 563 print("**ERROR! The ':' does not exist as a usable interaction call!") 564 sys.exit(156) 565 #return str_var.split(type_col), type_col 566 elif num_aster : 567 return str_var.split(type_ast), type_ast 568 else: 569 return [], '' 570 571 572###------------------------------------------------------------------ 573###------------------------------------------------------------------ 574###---------------- STOP: Interaction modeling features ------------- 575###------------------------------------------------------------------ 576###------------------------------------------------------------------ 577 578 579###------------------------------------------------------------------ 580###------------------------------------------------------------------ 581###-------------------- START: Row selection features --------------- 582###------------------------------------------------------------------ 583###------------------------------------------------------------------ 584 585#''' Input the grid file name, and return: 586# 1) a list of data [NMAT, NROI, NROI]; 587# 2) a list of labels [NMAT]; 588# 3) a supplementary dictionary for each calling use; 589# 4) and a list of (int) ROI labels (which may not be consecutive).''' 590 591def Select_Row_From_FATmat(X4, outname, ele, ele_ind, UseL, ExternLabsOK): 592 593 Ngrid = len( X4[1] ) 594 ROI_LABS = list(X4[UseL]) 595 Nroi = len( ROI_LABS ) 596 597 # have tested previously to make sure that ROI_LABS contains ele 598 #ele_ind = ROI_LABS.index(ele) 599 600 f = open(outname, 'w') 601 f.write('# %d # %s\n' % (Nroi, HEADER_Nroi)) 602 f.write('# %d # %s\n' % (Ngrid, HEADER_Ngrid)) 603 604 if ExternLabsOK: 605 # everything should have labels now 606 f.write('# %s # %s\n' % (ele, HEADER_EleID)) 607 for roi in X4[4]: 608 f.write("%12s\t" % roi) 609 f.write('\n') 610 611 for roi in X4[3]: 612 f.write("%12s\t" % roi) 613 f.write('\n') 614 615 616 617 for i in range(Ngrid): 618 f.write('# %s\n' % X4[1][i]) 619 for k in range(Nroi): 620 if X4[1][i]=='NT': 621 f.write('%12d\t' % X4[0][i][ele_ind][k]) 622 else: 623 f.write('%12e\t' % X4[0][i][ele_ind][k]) 624 f.write('\n') 625 626 f.close() 627 628 629 630 631 632 633###------------------------------------------------------------------ 634 635def MakeRowFile_From_FATmat(fname, outname, ele, ExternLabsOK): 636 637 X4 = LoadInGridOrNetcc(fname) 638 639 UseL = 3 640 if (len(X4) >= 4) and ExternLabsOK: 641 if X4[4]: 642 UseL = 4 643 644 # make labels for WITH_LABELS... 645 if not(X4[4]): 646 for x in X4[3]: 647 # take half of element id 648 name = ElementNameMaker(x, x, 0) 649 650 #### [PT: Aug 31, 2020] in going from py 2to3, had to 651 #### update this to integer div, in order to be able to 652 #### use as index 653 # lele = (len(name)-len(ConnNames))/2 654 lele = (len(name)-len(ConnNames))//2 655 X4[4].extend( name[:lele] ) 656 657 if X4[UseL].__contains__(ele): 658 ele_ind = X4[UseL].index(ele) 659 y = Select_Row_From_FATmat(X4, 660 outname, 661 ele, 662 ele_ind, 663 UseL, 664 ExternLabsOK) 665 # extra check: even if LABELS are used, might still refer to ID 666 # number ok 667 elif (UseL == 4) and X4[3].__contains__(ele): 668 print("*+ Even though you have labels in the file, it looks like " 669 "your 'roi' selection is just a digit.") 670 print("\t-> Not a problem-- row selection will be done based on ROI " 671 "number (and not on labeltable-info).") 672 ele_ind = X4[3].index(ele) 673 y = Select_Row_From_FATmat(X4, 674 outname, 675 ele, 676 ele_ind, 677 3, 678 ExternLabsOK) 679 else: 680 print("** Error: chosen element " 681 "'%s' doesn't appear in file's ROI list!" % ele) 682 print("For reference, the list of ROIs is:") 683 for x in X4[UseL]: 684 print("\t",x) 685 sys.exit(55) 686 687 return y 688 689###------------------------------------------------------------------ 690 691def DefaultNamingOutRowfile(list_all, ele, ExternLabsOK): 692 693 out = [] 694 695 # take half of element id 696 name = ElementNameMaker(ele, ele, ExternLabsOK) 697 #### [PT: Aug 31, 2020] in going from py 2to3, had to update this 698 #### to integer div, in order to be able to use as index 699 # lele = (len(name)-len(ConnNames))/2 700 lele = (len(name)-len(ConnNames))//2 701 roi = name[:lele] 702 703 for x in list_all: 704 if not(x[-5:] == '.grid') and not(x[-6:] == '.netcc'): 705 print("ERROR-- doesn't look like this is a *.grid or *netcc file!") 706 elif x[-5:] == '.grid' : 707 out.append(x[:-5]+'_grid_'+roi+'.row') 708 else: 709 out.append(x[:-6]+'_netcc_'+roi+'.row') 710 711 return out 712 713###------------------------------------------------------------------ 714###------------------------------------------------------------------ 715###--------------------- STOP: Row selection features --------------- 716###------------------------------------------------------------------ 717###------------------------------------------------------------------ 718 719 720###------------------------------------------------------------------ 721###------------------------------------------------------------------ 722###----------------------- START: Assorted -------------------------- 723###------------------------------------------------------------------ 724###------------------------------------------------------------------ 725 726def RecapAttach(comm_str, opt, arg): 727 '''Silly parsing of commandline options+arguments in order to be able 728 to return the command with delineating apostrophes around user-input 729 lists.''' 730 731 comm_str+=' '+opt 732 if arg: 733 if opt[:2]=='--': 734 comm_str+="=" 735 else: 736 comm_str+=" " 737 comm_str+="'"+arg+"' " 738 else: 739 comm_str+=" " 740 741 return comm_str 742 743###------------------------------------------------------------------ 744 745def LoadInTable(fname): 746 ''' open up a file, and return a 2-tuple: (raw) data set and header.''' 747 748 fff = open(fname, 'r') 749 x = fff.readlines() 750 fff.close() 751 752 N = len(x) 753 754 print('++ Opening and reading table file...') 755 756 data = [] 757 header = [] 758 759 idx = 0 760 for row in x[:]: 761 if not(idx) and HeaderRowTrue: 762 tt = row.split() 763 if tt[-1] == '\\': 764 header = tt[:-1] 765 else: 766 header = tt 767 idx+= 1 768 else: 769 #print row.split() 770 tt = row.split() 771 if tt[-1] == '\\': 772 data.append(tt[:-1]) 773 else: 774 data.append(tt) 775 776 header = Check_Header_Spaces(header) 777 778 print('++ ... done opening and reading table file.') 779 780 return data, header 781 782#---------------------------------------------------------------- 783 784# Sep,2015: new function to make table from subnetwork of ROIs 785def MakeSubTable(file_table, pref_subnet, roi_list): 786 787 tab_raw, tab_colvars = LoadInTable(file_table) 788 Ncol = len(tab_colvars) 789 Nrois = len(roi_list) 790 791 # check for 'ROI' in col heading 792 indroi = Check_EleIn_ROIlist(tab_colvars, ColEnding[0]) 793 if indroi < 0: 794 print("** ERROR: can't find 'ROI' in column headings of") 795 print(" the table file: %s." % file_table) 796 sys.exit(543) 797 798 list_count = np.zeros(Nrois, dtype=int) 799 tab_subnet = [] 800 # go through row by row and keep those that have the 'right' ROI 801 for x in tab_raw: 802 if roi_list.__contains__(x[indroi]) : 803 tab_subnet.append(x) 804 list_count[roi_list.index(x[indroi])] = 1 805 806 # check that each chosen ROI was found 807 if not(list_count.all()) : 808 for i in range(Nrois): 809 if not( list_count[i]): 810 # fixed indexing mistake in roi_list[]. [PT, March 27, 2017] 811 print("** ERROR: can't find selected ROI %s in the original table list" % roi_list[i]) 812 print("** Please select again!") 813 sys.exit(544) 814 else: 815 print("++ Found all %d ROIs in the desired subnet list:" % Nrois) 816 for x in roi_list: 817 print("\t %s" % x) 818 819 Lx, Ly = np.shape(tab_subnet) 820 821 # write output 822 fff = open(pref_subnet, 'w') 823 for x in tab_colvars: 824 fff.write("%s " % x) 825 fff.write(" \\\n") 826 for i in range(Lx-1): 827 for x in tab_subnet[i]: 828 fff.write("%s " % x) 829 fff.write(" \\\n") 830 for x in tab_subnet[Lx-1]: 831 fff.write("%s " % x) 832 fff.write("\n") 833 834 fff.close() 835 836 print("++ New subnetwork table has been made: %s" % pref_subnet) 837 838 return 1 839 840 841#---------------------------------------------------------------- 842 843def CheckVar_and_FindCategVar(tab_data, tab_colvars, tab_coltypes, par_list): 844 845 Npar = len(par_list) 846 Nsub, Ncol = np.shape(tab_data) 847 iscateg = [] 848 for i in range(Npar): 849 if tab_colvars.__contains__(par_list[i]): 850 j = tab_colvars.index(par_list[i]) 851 if tab_coltypes[j]==str : 852 terms = set() 853 for k in range(Nsub): 854 855 terms.add(tab_data[k][j]) 856 listterms = list(terms) 857 listterms.sort() 858 iscateg.append(listterms) 859 else: 860 iscateg.append([]) 861 else: 862 print("** Error! Variable %s is not in the data table" \ 863 % par_list[i]) 864 sys.exit(11) 865 866 return iscateg 867 868 869#---------------------------------------------------------------- 870 871def GetFromLogFile(fname, ltype): 872 873 if not(fname): 874 print("** Error: can't open log file: '%s'." % fname) 875 sys.exit(9) 876 877 fff = open(fname,'r') 878 x = fff.readlines() 879 fff.close() 880 881 N = len(x) 882 883 for start in range(N): 884 if (x[start].split()[0] == '#') and (x[start].split()[1] == ltype): 885 #print "FOUND", x[start].split() 886 break 887 888 outlist = [] 889 for i in range(start+1,N): 890 if x[i].split()[0] == '#': 891 break 892 else: 893 outlist.append(x[i].split()[0]) 894 895 if not(outlist): 896 print("** Error: empty list from log file: '%s'." % fname) 897 sys.exit(10) 898 899 return outlist 900 901 902###------------------------------------------------------------------ 903###------------------------------------------------------------------ 904###------------------------ STOP: Assorted -------------------------- 905###------------------------------------------------------------------ 906###------------------------------------------------------------------ 907 908 909 910###------------------------------------------------------------------ 911###------------------------------------------------------------------ 912###--------------- START: Final prep output operations -------------- 913###------------------------------------------------------------------ 914###------------------------------------------------------------------ 915 916def Write_MVM_File(PREFIX, \ 917 csv_data, \ 918 csv_colvars, \ 919 grid_ROIlist, \ 920 grid_VARlist, \ 921 grid_tabled): 922 923 fff = open(PREFIX+MVM_file_postfix,'w') 924 925 ### 'csv_colvars': rename 0th col 926 ### and we add three more col to output: ROI, VarName, VarVal 927 csv_colvars[0] = ColOne_SUB 928 Nsub, Ncsvar = np.shape(csv_data) 929 Nroi = len(grid_ROIlist) 930 Npar = len(grid_VARlist) 931 Ncol = Ncsvar+len(ColEnding) 932 933 934 for i in csv_colvars: 935 fff.write("%s " % i) 936 for i in ColEnding: 937 fff.write("%s " % i) 938 fff.write(" \\\n") 939 940 for i in range(Nsub): 941 for x in range(Nroi): 942 for y in range(Npar): 943 for j in range(Ncsvar): 944 fff.write("%s " % str(csv_data[i][j])) 945 fff.write("%s " % grid_ROIlist[x]) 946 fff.write("%s " % grid_VARlist[y]) 947 fff.write("%s " % str(grid_tabled[i,x,y])) 948 if not( i==(Nsub-1) and x==(Nroi-1) and y==(Npar-1) ): 949 fff.write(" \\\n") 950 else: 951 fff.write(" \n") 952 953 fff.close() 954 955 return 1 956 957#-------------------------------------------------------------------- 958 959def Check_EleIn_ROIlist(ROIlist, ele): 960 try: 961 return ROIlist.index(ele) 962 except ValueError: 963 return -1 964 965#-------------------------------------------------------------------- 966 967def MakeGridTableau(DICT_CSV_grid, \ 968 csv_data, \ 969 grid_data, \ 970 grid_ROIlist, \ 971 grid_VARlist, \ 972 grid_subj, 973 ExternLabsOK): 974 '''Take a lot of matrix file info in, as well as the CSV file and 975 conversion dictionary, and return a tableau of necessary 976 variable¶meter values from all the subjects' matrix file data. 977 Output is a list ordered by CSV subject, for ease in final 978 writing.''' 979 980 # here, gridVARlist is actually a list of PARAMETERS. 981 982 print('++ Starting to select and convert matrix data to usable ' 983 'form for output...') 984 985 Nsub = len(csv_data) 986 Nroi = len(grid_ROIlist) 987 Nvar = len(grid_VARlist) 988 MM = np.zeros((Nsub, Nroi, Nvar)) 989 990 # put in order of CSV stuff 991 for i in range(Nsub): 992 csv_subb = csv_data[i][0] 993 # which sub in grid_data 994 grid_subb = DICT_CSV_grid[csv_subb] 995 ii = Check_EleIn_ROIlist(grid_subj, grid_subb) 996 #print "HI!", i, ii 997 if ii>=0: 998 UseL = 3 999 if (len(grid_data[ii]) >= 4) and ExternLabsOK: 1000 if grid_data[ii][4]: 1001 UseL = 4 1002 Ntar = len(grid_data[ii][UseL]) 1003 for j in range(Nroi): 1004 for x in range(Ntar): 1005 for y in range(x+1,Ntar): 1006 # inefficient search... 1007 ele = ElementNameMaker(grid_data[ii][UseL][x], 1008 grid_data[ii][UseL][y], 1009 UseL-3) 1010 if grid_ROIlist[j] == ele: 1011 #print ii,j, ele 1012 for k in range(Nvar): 1013 var = grid_VARlist[k] 1014 idx = grid_data[ii][2][var] 1015 # which matrix in grid[i][0] 1016 MM[i,j,k] = grid_data[ii][0][idx][x][y] 1017 1018 print('++ ... done converting matrix stuff for outputting in table.') 1019 return MM 1020 1021###------------------------------------------------------------------ 1022###------------------------------------------------------------------ 1023###---------------- STOP: Final prep output operations -------------- 1024###------------------------------------------------------------------ 1025###------------------------------------------------------------------ 1026 1027###------------------------------------------------------------------ 1028###------------------------------------------------------------------ 1029###-------- START: Make/Check/Print CSV and matrix matching --------- 1030###------------------------------------------------------------------ 1031###------------------------------------------------------------------ 1032 1033def MakeDict_CSV_to_grid(grid_subj, csv_subj, file_match): 1034 1035 if IsFirstUncommentedSection_Multicol(file_match): 1036 CtG = MakeDict_CSV_to_grid_FILE(grid_subj, csv_subj,file_match) 1037 else: 1038 CtG = MakeDict_CSV_to_grid_NOFILE(grid_subj, csv_subj) 1039 return CtG 1040 1041 1042#--------------------------------------------------------------------- 1043 1044def IsFirstUncommentedSection_Multicol(fname): 1045 '''Check all lines of input to see if there's more than one 1046 column present.''' 1047 tt = ReturnFirstUncommentedSection_ofReadlines(fname) 1048 if tt: 1049 for i in range(len(tt)): 1050 if len(tt[i]) > 1: 1051 return 1 1052 return 0 1053 1054#--------------------------------------------------------------------- 1055 1056def ReturnFirstUncommentedSection_ofReadlines(fname): 1057 1058 if not(fname): 1059 return [] 1060 1061 fff = open(fname,'r') 1062 x = fff.readlines() 1063 fff.close() 1064 1065 N = len(x) 1066 1067 # ignore preliminary comments 1068 start = 0 1069 while x[start].split()[0] == '#': 1070 start+=1 1071 1072 temp = [] 1073 for i in range(start,N): 1074 y = x[i].split() 1075 if y[0] == '#': # stop if you hit a comment 1076 break 1077 else: 1078 temp.append(y) 1079 return temp 1080 1081def MakeDict_CSV_to_grid_FILE(grid_subj, csv_subj,file_match): 1082 1083 print("++ Start matching of CSV and matrix subject data (with file)...") 1084 1085 temp = ReturnFirstUncommentedSection_ofReadlines(file_match) 1086 print("LIST IS:") 1087 Nsub = len(temp) 1088 print("++ Looks like there are %d subjects in matching file: '%s'." \ 1089 % (Nsub, file_match )) 1090 1091 CtG = {} 1092 for i in range(Nsub): 1093 CtG[ temp[i][1] ] = temp[i][0] 1094 print(temp[i][1], temp[i][0]) 1095 1096 print("++ ... done matching CSV and matrix subject data.") 1097 1098 return CtG 1099 1100#------------------------------------------------------------------------ 1101 1102def MakeDict_CSV_to_grid_NOFILE(grid_subj, csv_subj): 1103 1104 print("++ Start matching of CSV and matrix subject data (without file)...") 1105 1106 CtG = {} 1107 for a in csv_subj: 1108 for b in grid_subj: 1109 if b.find(a) != -1: 1110 if a in CtG: 1111 print('Error: OVERWRITING MATCHES! Need uniquity.') 1112 #return {} 1113 else: 1114 CtG[a]=b 1115 1116 print("++ ... done matching CSV and matrix subject data.") 1117 1118 return CtG 1119 1120###-------------------------------------------------------------------- 1121 1122def CheckDict_CSV_to_grid(CtG, grid_subj, csv_subj, csv_data): 1123 1124 print("++ Start checking dictionary of CSV and matrix subject data...") 1125 Nsub0 = len(csv_subj) 1126 1127 csv_subj_new = [] 1128 csv_data_new = [] 1129 1130 BADNESS = 0 1131 # check if all csv sub are used 1132 for i in range( Nsub0 ): 1133 a = csv_subj[i] 1134 if a in CtG: 1135 csv_subj_new.append(a) 1136 csv_data_new.append(csv_data[i]) 1137 else: 1138 print('*+ Warning: CSV subj %s does not have a matrix match!' \ 1139 % a) 1140 BADNESS+=1 1141 # check if all grid matr subs are used: 1142 y = list(CtG.values()) 1143 for b in grid_subj: 1144 if not( y.__contains__(b) ): 1145 print('*+ Warning: matrix subj %s does not have a CSV subj match!' \ 1146 % b) 1147 #BADNESS+=1 1148 1149# if BADNESS : 1150# print "++ Possibly bad CSV<->matrix matching: ", 1151# print "either incomplete or nonunique." 1152# print "\tBut perhaps you are purposefully selecting subset(s)?" 1153# else: 1154# print "++ Successful matching: each CSV entry had single matrix match." 1155 1156 print("++ ... done checking dictionary.") 1157 1158 return csv_data_new, csv_subj_new 1159 1160###---------------------------------------------------------------------- 1161 1162def Write_CSVandMatrix_log(PREFIX, \ 1163 CtG, \ 1164 ROIlist, \ 1165 PARlist, \ 1166 ARGstr): 1167 '''Printout file of what I think the CSV and matrix file matches 1168 are, so user can doublecheck. Output file is called 1169 'PREFIX_MVM_var.log'.''' 1170 1171 fname = PREFIX+MVM_matchlog_postfix 1172 fff = open(fname,'w') 1173 x = list(CtG.keys()) 1174 x.sort() 1175 1176 fff.write("# %s :\n# %s\n" % (LOG_LABEL_command, ARGstr) ) 1177 1178 # CSV and matrix matching 1179 fff.write("# %-50s\t%s\n" % (LOG_LABEL_colmatr, LOG_LABEL_colsubj)) 1180 for i in x: 1181 fff.write("%-52s\t%s\n" % (CtG[i], i) ) 1182 1183 # list of ROIs 1184 fff.write("# %s\n" % (LOG_LABEL_roilist)) 1185 for i in ROIlist: 1186 fff.write("%s\n" % (i) ) 1187 1188 # list of ROIs 1189 fff.write("# %s\n" % (LOG_LABEL_parlist)) 1190 for i in PARlist: 1191 fff.write("%s\n" % (i) ) 1192 1193 fff.close() 1194 1195# print "\tYou can check file '%s' for CSV<->matrix matching log." % fname 1196 return 1 1197 1198###------------------------------------------------------------------ 1199###------------------------------------------------------------------ 1200###--------- STOP: Make/Check/Print CSV and matrix matching --------- 1201###------------------------------------------------------------------ 1202###------------------------------------------------------------------ 1203 1204 1205###------------------------------------------------------------------ 1206###------------------------------------------------------------------ 1207###------------ START: Matrix element and set operations ------------ 1208###------------------------------------------------------------------ 1209###------------------------------------------------------------------ 1210 1211def ReadSection_and_Column(file_list, colnum): 1212 1213 temp = ReturnFirstUncommentedSection_ofReadlines(file_list) 1214 N = len(temp) 1215 list_of_subj = [] 1216 for i in range(N): 1217 if len(temp[i]) > colnum: 1218 list_of_subj.append(temp[i][colnum]) 1219 1220 if not( list_of_subj): 1221 print("*+ No output column number %d in file %s." % \ 1222 (colnum, file_list)) 1223 1224 return list_of_subj 1225 1226def GroupListOfFullMatrs(file_IDer, file_listmatch, colnum): 1227 '''Take file ID argument (probably from user commandline input), 1228 happily read all the grids/matrices, and return: 1229 1) a list of 4-tuples of the data matrices; 1230 2) list of the subjects (mainly for checking)''' 1231 1232 print('++ Starting to find and open matrix files...') 1233 if file_listmatch: 1234 list_of_subj = ReadSection_and_Column(file_listmatch, colnum) 1235 1236# fff = open(file_listmatch,'r') 1237# x = fff.readlines() 1238# fff.close() 1239# temp = ReturnFirstUncommentedSection_ofReadlines(x) 1240# N = len(temp) 1241# list_of_subj = [] 1242# for i in range(N): 1243# list_of_subj.append(temp[i][1]) 1244 elif file_IDer: 1245 list_of_subj = glob(file_IDer) 1246 else: 1247 print("** Error! Cannot read in matrix files.") 1248 sys.exit(4) 1249 1250 All_sub_grid = [] 1251 for ff in list_of_subj: 1252 x = LoadInGridOrNetcc(ff) 1253 All_sub_grid.append(x) 1254 1255 if len(All_sub_grid) == 0: 1256 print('** ERROR! No subjects found when looking for group!') 1257 sys.exit(5) #return [], [] 1258 else: 1259 print('\tFound %d subjects in the group.' % len(All_sub_grid)) 1260 1261 1262 print('++ ... done finding and opening matrix files.') 1263 1264 return All_sub_grid, list_of_subj 1265 1266###------------------------------------------------------------------ 1267 1268def Check_Matr_type(LM): 1269 1270 N = len(LM) 1271 counts = np.zeros(3) # GRID: 0; NETCC: 1; OTHER:2 1272 1273 for i in range(N): 1274 mpost = LM[i][-5:] 1275 if mpost =='.grid': 1276 counts[0]+=1 1277 elif mpost =='netcc': 1278 counts[1]+=1 1279 else: 1280 counts[2]+=1 1281 1282 if counts[0] == N: 1283 print('++ Looks like all matrices are GRID. Fine.') 1284 return 'GRID' 1285 elif counts[1] == N: 1286 print('++ Looks like all matrices are NETCC. Fine.') 1287 return 'NETCC' 1288 else: 1289 print('*+ Warning: nonuniform or foreign type of matrix files!') 1290 return 'OTHER' 1291 1292 return 1293 1294def FindGroupwiseTargets(All_sub_grid, ftype, ExternLabsOK, UNION=0): 1295 '''Take a list of 4-tuples representing subject data (and a string 1296 of what filetype it is), go through all, and find set of 1297 intersecting matrix elements based on labels. When done, return 1298 that list of matrix elements, AND a list of the variables.''' 1299 1300 print('++ Going through sets of matrix element ROIs...') 1301 Nsub = len(All_sub_grid) 1302 1303 if Nsub == 0: 1304 print('** ERROR! No subjects found when looking for matrix set!') 1305 return [] 1306 else: 1307 print('\tFound %d subjects for finding ROI elements.' % Nsub) 1308 1309 ## For finding the set of ROI elements 1310 1311 # START EDITING HERE WITH USING LABELS 1312 a = GetSet(All_sub_grid[0], ftype, ExternLabsOK) 1313 1314 otxt = '\tThe number of elements in the ROI matrix set is:\n\t ' 1315 #print('\tThe number of elements in the ROI matrix set is:\n\t ', end=' ') 1316 for i in range(1,Nsub): 1317 otxt+= '{}, '.format(len(a)) 1318 #print('%d,' % len(a), end=' ') 1319 if UNION: 1320 # updating with the *union* of regions 1321 a.update(GetSet(All_sub_grid[i], ftype, ExternLabsOK)) 1322 else: 1323 a.intersection_update(GetSet(All_sub_grid[i], ftype, ExternLabsOK)) 1324 otxt+= '{}.'.format(len(a)) 1325 print(otxt) 1326 #print('%d.' % len(a)) 1327 1328 temp = list(a) 1329 temp.sort() 1330 print('\tThe set of ROIs is:') 1331 for t in temp: 1332 print('\t %s' % t) 1333 1334 ## For finding the set of parameters 1335 b = set(All_sub_grid[0][1]) 1336 for i in range(1,Nsub): 1337 b.intersection_update(set(All_sub_grid[i][1])) 1338 print('\tThe (final) number of parameters in the ROI matrix set is:' 1339 '{}.'.format(len(b))) 1340 1341 temp2 = list(b) 1342 temp2.sort() 1343 print('\tThe set of parameters is (alphabetically):') 1344 #print('\t ', end=' ') 1345 otxt2 = '\t ' 1346 for t in temp2: 1347 otxt2+= ' {} '.format(t) 1348 #print(' %s ' % t, end=' ') 1349 #print('') 1350 print(otxt2) 1351 print('++ ... done getting set of ROIs.') 1352 return temp, temp2 #a, b 1353 1354 1355 1356####### [PT: Sep 1, 2020] the original version of this func after 1357####### 2to3; going back to replace the parts with end=' ' (see 1358####### above), for backward compatability. 1359#def FindGroupwiseTargets(All_sub_grid, ftype, ExternLabsOK, UNION=0): 1360# '''Take a list of 4-tuples representing subject data (and a string 1361# of what filetype it is), go through all, and find set of 1362# intersecting matrix elements based on labels. When done, return 1363# that list of matrix elements, AND a list of the variables.''' 1364# 1365# print('++ Going through sets of matrix element ROIs...') 1366# Nsub = len(All_sub_grid) 1367# 1368# if Nsub == 0: 1369# print('** ERROR! No subjects found when looking for matrix set!') 1370# return [] 1371# else: 1372# print('\tFound %d subjects for finding ROI elements.' % Nsub) 1373# 1374# ## For finding the set of ROI elements 1375# 1376# # START EDITING HERE WITH USING LABELS 1377# a = GetSet(All_sub_grid[0], ftype, ExternLabsOK) 1378# print('\tThe number of elements in the ROI matrix set is:\n\t ', end=' ') 1379# for i in range(1,Nsub): 1380# print('%d,' % len(a), end=' ') 1381# if UNION: 1382# # updating with the *union* of regions 1383# a.update(GetSet(All_sub_grid[i], ftype, ExternLabsOK)) 1384# else: 1385# a.intersection_update(GetSet(All_sub_grid[i], ftype, ExternLabsOK)) 1386# print('%d.' % len(a)) 1387# 1388# temp = list(a) 1389# temp.sort() 1390# print('\tThe set of ROIs is:') 1391# for t in temp: 1392# print('\t %s' % t) 1393# 1394# ## For finding the set of parameters 1395# b = set(All_sub_grid[0][1]) 1396# for i in range(1,Nsub): 1397# b.intersection_update(set(All_sub_grid[i][1])) 1398# print('\tThe (final) number of parameters in the ROI matrix set is:' 1399# '{}.'.format(len(b))) 1400# 1401# temp2 = list(b) 1402# temp2.sort() 1403# print('\tThe set of parameters is (alphabetically):') 1404# print('\t ', end=' ') 1405# for t in temp2: 1406# print(' %s ' % t, end=' ') 1407# print('') 1408# print('++ ... done getting set of ROIs.') 1409# return temp, temp2 #a, b 1410 1411 1412###------------------------------------------------------------------ 1413 1414def GetSet(x, ftype, ExternLabsOK): 1415 '''Input one of the 4-tuples returned by LoadInGridOrNetcc. 1416 Output is a 'set' of nonempty target-connecting elements. 1417 FOR NOW: this is specifically for a GRID file, where there's 1418 a nice 'NT' set of ints.''' 1419 1420 if not(ftype=='GRID' or ftype=='NETCC'): 1421 print("** ERROR: unrecognized matrix type. Don't know what to do.") 1422 sys.exit(34) 1423 1424 # default: use the boring old numbers themselves as ROI labels, 1425 # just zero-padding them. 1426 # switch: if there's an externally input list of labels, such as 1427 # from a labeltable, then use those. 1428 UseL = 3 1429 if (len(x) >= 4) and ExternLabsOK: 1430 if x[4]: 1431 UseL = 4 1432 1433 1434 #if ftype=='GRID': # later, grid vs netcc 1435 Ntar = len(x[UseL]) 1436 roi_set = set() # empty set 1437 # should be UHT, nondiagonal selector 1438 for i in range(Ntar): 1439 for j in range(i+1,Ntar): 1440 # NETCC: all UHT elements 1441 # GRID: UHT elements, with nonzero 'NT' values 1442 if ((ftype=='GRID') and x[0][x[2][GridCheckVar]][i,j]) \ 1443 or (ftype=='NETCC'): 1444 roi_set.add(ElementNameMaker(x[UseL][i], x[UseL][j],UseL-3)) 1445 1446 return roi_set 1447 1448###------------------------------------------------------------------ 1449 1450def ElementNameMaker(A, B, AS_IS): 1451 '''Current format for turning integer ROI labels A and B into ROI 1452 element name. If AS_IS=0, then zero pad. Else: use names as they 1453 is.''' 1454 # above, if UseL=3, then AS_IS=0 --> zero pad. 1455 1456 # paranoia 1457 if (type(A) != str) or (type(B) != str): 1458 print("** ERROR: problem in element name-maker. Help!") 1459 sys.exit(43) 1460 1461 # first case is original style 1462 # second case is if data labels are used 1463 if not(AS_IS) and A.isdigit() and B.isdigit(): 1464 aint = int(A) 1465 bint = int(B) 1466 1467 if aint > 999: 1468 print("*+ POSSIBLE error in element maker: big label %d > 999!" % A) 1469 print("\tUgliness may ensue!") 1470 if bint > 999: 1471 print("*+ POSSIBLE error in element maker: big label %d > 999!" % B) 1472 print("\tUgliness may ensue!") 1473 1474 x = str("%03d" % (aint)) 1475 y = str("%03d" % (bint)) 1476 1477 else: 1478 x = str(A) 1479 y = str(B) 1480 1481 return x+ConnNames+y 1482 1483#### Old version: used to treat ROI labels of elements as ints; 1484#### new possibility that they will be strings now-- so treat all as str 1485#def ElementNameMaker(A, B): 1486# '''Current format for turning integer ROI labels into 1487# ROI element name.''' 1488# # paranoia 1489# if (type(A) != int) or (type(A) != int): 1490# print "ERROR in element maker: why not 'int' element labels?" 1491# if A > 999: 1492# print "POSSIBLE error in element maker: big label %d > 999!" % A 1493# if B > 999: 1494# print "POSSIBLE error in element maker: big label %d > 999!" % B 1495# 1496# x = str("%03d" % (A)) 1497# y = str("%03d" % (B)) 1498# return x+'_'+y 1499 1500 1501 1502###------------------------------------------------------------------ 1503###------------------------------------------------------------------ 1504###------------- STOP: Matrix element and set operations ------------ 1505###------------------------------------------------------------------ 1506###------------------------------------------------------------------ 1507 1508 1509###------------------------------------------------------------------ 1510###------------------------------------------------------------------ 1511###----------------- START: Reading in CSV files -------------------- 1512###------------------------------------------------------------------ 1513###------------------------------------------------------------------ 1514 1515# [PT: Aug. 31, 2020] this had to be updated a bit in the 2to3 shift 1516def LoadInCSV(fname): 1517 ''' open up a file, and return a 2-tuple: (raw) data set and header.''' 1518 1519 print('++ Opening and reading CSV file...') 1520 1521 data = [] 1522 CSVheader = [] 1523 1524 idx = 0 1525 1526 # [PT: Sep 1, 2020] In order to have backwards compatability with 1527 # Python2, the """newline=''""" was removed from this open 1528 # command. This doesn't appear to negatively affect Python3 1529 # results, so keeping like this now. 1530 with open(fname) as csvfile : 1531 #with open(fname, newline='') as csvfile : 1532 csvread = csv.reader(csvfile) #, delimiter=' ', quotechar='|') 1533 1534 for row in csvread: 1535 if not(idx) and HeaderRowTrue: 1536 CSVheader = row 1537 idx+= 1 1538 else: 1539 data.append(row) #print row 1540 1541 CSVheader = Check_Header_Spaces(CSVheader) 1542 1543 print('++ ... done opening and reading CSV file.') 1544 1545 return data, CSVheader 1546 1547###### [PT: Aug 31, 2020] ... and this is the old Python v2 form of 1548###### the above func 1549#def LoadInCSV(fname): 1550# ''' open up a file, and return a 2-tuple: (raw) data set and header.''' 1551# 1552# filein = open(fname, 'rb') 1553# csvread = csv.reader(filein) 1554# 1555# print '++ Opening and reading CSV file...' 1556# 1557# data = [] 1558# CSVheader = [] 1559# 1560# idx = 0 1561# for row in csvread: 1562# if not(idx) and HeaderRowTrue: 1563# CSVheader = row 1564# idx+= 1 1565# else: 1566# data.append(row) #print row 1567# filein.close() 1568# 1569# CSVheader = Check_Header_Spaces(CSVheader) 1570# 1571# print '++ ... done opening and reading CSV file.' 1572# 1573# return data, CSVheader 1574 1575###------------------------------------------------------------------ 1576 1577def Check_Header_Spaces(X): 1578 1579 N = len(X) 1580 1581 for s in range(N): 1582 X[s] = X[s].strip() # first strip away leading/trailing WS; aug,2015 1583 nn = X[s].rfind(' ') 1584 if nn >= 0: 1585 bleep = str(X[s]) # cheap copy 1586 X[s] = X[s].replace(' ','_') 1587 print("\tWarning: space in header name; replacing: '%s' -> '%s'." \ 1588 % (bleep, X[s])) 1589 1590 return X 1591 1592###------------------------------------------------------------------ 1593 1594def Choose_IntFloatStr(s): 1595 '''Check and see if the input string 's' represents a number: 1596 -> if it does, then return int (default) or float version; 1597 -> else, return the same string back. 1598 Returns error if the input is not of type 'str'. 1599 ''' 1600 1601 if not(type(s) == str): 1602 print("BAD USAGE OF THIS FUNCTION: " 1603 "\tinput argument should be a string, not %s." % type(s)) 1604 return s 1605 1606 try: 1607 float(s) 1608 try: 1609 return int(s) 1610 except ValueError: 1611 return float(s) 1612 except: 1613 return s 1614 1615###------------------------------------------------------------------ 1616 1617def ConvertCSVfromStr(dat1, head1, NA_WARN): 1618 '''Take the all-string input and header lists in, 1619 and return a copy with ints, floats and strings.''' 1620 1621 print('++ Converting (hopefully appropriate) strings to numbers...') 1622 1623 # careful copying... need to recursively copy each list 1624 # unattachedly 1625 1626 # Jan,2015 !!!! 1627 # dat2 = [list(x) for x in dat1] # OLD method 1628 # ---> new method: skip empty lines 1629 FOUND_WS = 0 1630 FOUND_EL = 0 1631 dat2 = [] 1632 for x in dat1: 1633 if x: # check if empty line 1634 if x[0].split() : # check if just whitespace 1635 dat2.append(x) 1636 else: 1637 FOUND_WS = 1 1638 else: 1639 FOUND_EL = 1 1640 1641 if FOUND_WS: 1642 print("\t FYI: removed at least one whitespace line from CSV file.") 1643 if FOUND_EL: 1644 print("\t FYI: removed at least one empty line from CSV file.") 1645 1646 head2 = list(head1) 1647 1648 Lx,Ly = np.shape(dat2) 1649 for i in range(Lx): 1650 # usually, have first column be name labels, even if numeric 1651 for j in range(FirstColLabels, Ly): 1652 dat2[i][j] = Choose_IntFloatStr(dat2[i][j]) 1653 1654 # initialize 1655 for j in range(Ly): 1656 head2[j] = type(dat2[0][j]) 1657 1658 for j in range(Ly): 1659 ty0 = head2[j] 1660 for i in range(1,Lx): 1661 ty = type(dat2[i][j]) 1662 if not(ty0 == ty): 1663 if ty0 == str : 1664 print("Odd mixing in types of data in Col {} (={}) " 1665 "\t-> seeing both {} and {}." 1666 "".format(j, head1[j], ty0, ty)) 1667 elif ty0 == int: 1668 if ty == float: 1669 print("\tCSV col {} -> making all floats. {}" 1670 "".format(head1[j], dat2[i][j])) 1671 head2[j] = float # Oct, 2014 fix 1672 for k in range(Lx): 1673 if type(dat2[k][j]) == int: 1674 dat2[k][j] = float(dat2[k][j]) 1675 else: 1676 "Odd combo..." 1677 elif (dat2[i][j] == 'NA'): 1678 if NA_WARN: 1679 #print "Loc (%d, %d) has value 'NA'." % (i,j) 1680 print("*+ Warning: 'NA' value in Col %d (='%s') !" \ 1681 % ( j, head1[j] )) 1682 else: 1683 print("Odd mixing in types of data in Col {} (={}) " 1684 "\t-> seeing both {} and {}." 1685 "".format( j, head1[j], ty0, ty)) 1686 elif ty0 == float: 1687 if ty == int: 1688 #print "\tin CSV col %d ->" % j, 1689 #print "making %d float." % dat2[i][j] 1690 dat2[i][j] = float(dat2[i][j]) 1691 elif dat2[i][j] == 'NA': 1692 if NA_WARN: 1693 #print "Loc (%d, %d) has value 'NA'." % (i,j) 1694 print("*+ Warning: 'NA' value in Col %d (='%s') !" \ 1695 % ( j, head1[j] )) 1696 else: 1697 print("Odd mixing in types of data in Col {} (={}) " 1698 "\t-> seeing both {} and {}." 1699 "".format( j, head1[j], ty0, ty)) 1700 else: 1701 print("Extra odd: non str, int or float type? %s" % ty0) 1702 1703 print('++ ... done with CSV str->numeric conversions.') 1704 1705 return dat2, head2 1706 1707 1708###------------------------------------------------------------------ 1709###------------------------------------------------------------------ 1710###----------------- STOP: Reading in CSV files --------------------- 1711###------------------------------------------------------------------ 1712###------------------------------------------------------------------ 1713 1714 1715###------------------------------------------------------------------ 1716###------------------------------------------------------------------ 1717###--------------START: Reading in GRID/NETCC files ----------------- 1718###------------------------------------------------------------------ 1719###------------------------------------------------------------------ 1720 1721def LoadInGridOrNetcc(fname): 1722 ''' Input the grid file name, and return: 1723 1) a list of data [NMAT, NROI, NROI]; 1724 2) a list of labels [NMAT]; 1725 3) a supplementary dictionary for each calling use; 1726 4) and a list of (int) ROI labels (which may not be consecutive).''' 1727 1728 x = ReadInGridOrNetcc(fname) 1729 y1,y2,y3,y4,y5 = SepHeaderFile(x, fname) 1730 return y1,y2,y3,y4,y5 1731 1732###------------------------------------------------------------------ 1733 1734def ReadInGridOrNetcc(fname): 1735 ''' open, read file, return raw/unsplit output.''' 1736 fff = open(fname,'r') 1737 x = fff.readlines() 1738 fff.close() 1739 return x 1740 1741###------------------------------------------------------------------ 1742 1743def SepHeaderFile(RawX, fname): 1744 ''' Take in the data and return: 1745 1) a list of data [NMAT, NROI, NROI]; 1746 2) a list of labels [NMAT]; 1747 3) a supplementary dictionary for each calling use; 1748 4) and a list of (int) ROI labels (which may not be consecutive).''' 1749 1750 Nroi,Nmat,ListLabels,ReadNext,ROI_str_labs = \ 1751 HeaderInfo(RawX[:NLinesGridHead]) # header 1752 1753 y = RawX[ReadNext:] # info grids 1754 1755 y = RemoveEmptyWhitespaceFromReadlines(y, fname) 1756 1757 Ly = len(y) 1758 1759 if not( Ly == ((Nroi+1) * Nmat) ): 1760 print("ERROR reading grid file!") 1761 print("Number of non-header lines don't match Nmatrices (%d)" % (Nmat)) 1762 return [0,0] 1763 1764 OUT_data = [] 1765 OUT_labs = [] 1766 OUT_dlab = {} 1767 ctr = 0 1768 for i in range(Nmat): 1769 OUT_labs.append(y[ctr].split()[1]) 1770 OUT_dlab[OUT_labs[i]]=i 1771 ctr+=1 1772 if (OUT_labs[i] == 'NT'): 1773 temp = np.zeros((Nroi,Nroi),dtype=int) 1774 for j in range(Nroi): 1775 temp[j,:] = [int(xx) for xx in y[ctr].split()] 1776 ctr+=1 1777 else: 1778 temp = np.zeros((Nroi,Nroi)) 1779 for j in range(Nroi): 1780 temp[j,:] = [float(xx) for xx in y[ctr].split()] 1781 ctr+=1 1782 1783 OUT_data.append(np.array(temp)) 1784 return OUT_data, OUT_labs, OUT_dlab,ListLabels,ROI_str_labs 1785 1786###------------------------------------------------------------------ 1787 1788def RemoveEmptyWhitespaceFromReadlines(x, fname): 1789 1790 N = len(x) 1791 whitespace = 0 1792 for i in range(1,N+1,1): 1793 if x[-i].split() == []: 1794 whitespace = i 1795 else: 1796 break 1797 1798 if whitespace: 1799 print("\tNB: ignored %d lines of whitespace from the end of '%s'." \ 1800 % ( whitespace, fname )) 1801 return x[:-whitespace] 1802 else: 1803 return x 1804 1805###------------------------------------------------------------------ 1806 1807def HeaderInfo(RawX): 1808 ''' Read in necessary numbers from first few lines. ''' 1809 if not(RawX[0].split()[0] == '#'): 1810 print("** ERROR: the file format doesn't look modern!") 1811 print("**\t-> run Grid_Modernizer?") 1812 sys.exit(52) 1813 1814 Nroi = int(RawX[0].split()[1]) 1815 if Nroi <=0: 1816 print("ERROR reading header! Number of ROIs is: %d" % Nroi) 1817 sys.exit(53) 1818 1819 Nmat = int(RawX[1].split()[1]) 1820 if Nmat <=0: 1821 print("ERROR reading header! Number of Matrices is: %d" % Nmat) 1822 sys.exit(54) 1823 1824 # Sept 2014: might have labels here. 1825 # defaults 1826 #HAVE_LABS = 0 1827 listreadline = 2 1828 ReadNext = 3 1829 ROI_str_labs = [] 1830 # check about changing if LABELS are input 1831 temp = RawX[2].split() 1832 if len(temp) > 1: 1833 if temp[1] == HEADER_Labels: 1834 #HAVE_LABS = 1 1835 listreadline = 4 1836 ReadNext = 5 1837 ROI_str_labs = RawX[3].split() 1838 ll = len(ROI_str_labs) 1839 if not( ll == Nroi ): 1840 print("ERROR reading header!") 1841 print(" Number of ROIs (%d) doesn't match stringnames (%d)" % \ 1842 (Nroi,ll)) 1843 sys.exit(55) 1844 1845 #if HAVE_LABS : 1846 # print "++ Have ROI LABELS: reading." 1847 1848 ListLabels=RawX[listreadline].split() 1849 ll = len(ListLabels) 1850 if not( ll == Nroi ): 1851 print("ERROR reading header!") 1852 print(" Number of ROIs (%d) doesn't match Labels (%d)" % (Nroi,ll)) 1853 sys.exit(55) 1854 1855 ### Don't do this--> may have non-numeric labels 1856# for i in range(ll): 1857# ListLabels[i] = int(ListLabels[i]) 1858# if ListLabels[i] <=0: 1859# print "ERROR reading header! ListLabel[%d]=%d." % \ 1860# (i,ListLabels[i]) 1861# sys.exit(56) 1862 1863 return Nroi, Nmat, ListLabels, ReadNext, ROI_str_labs 1864 1865###------------------------------------------------------------------ 1866###------------------------------------------------------------------ 1867###-------------- STOP: Reading in GRID/NETCC files ----------------- 1868###------------------------------------------------------------------ 1869###------------------------------------------------------------------ 1870 1871 1872 1873###------------------------------------------------------------------ 1874###------------------------------------------------------------------ 1875###---------- START: Modernizing/rewriting info into better --------- 1876###------------------------------------------------------------------ 1877###------------------------------------------------------------------ 1878 1879 1880def OldFash_Grid_modernize(fname, outname): 1881 1882 x = ReadInGridOrNetcc(fname) 1883 y = mod_func_break(x, fname, outname) 1884 1885 return y 1886 1887###------------------------------------------------------------------ 1888 1889def mod_func_break(X, fname, outname): 1890 1891 lnum = 4 # starting line number for reading as of Sept 2013 1892 1893 # 1) 1894 Nroi = int(X[0].split()[0]) 1895 1896 # 2) 1897 ROI_LABS = X[2].split() 1898 1899 # 3) 1900 Npar = len(OldFash_parnames) 1901 Y = np.zeros((Npar,Nroi,Nroi)) 1902 1903 for i in range( Npar ): 1904 k = 0 1905 for j in range(lnum,lnum+Nroi): 1906 Y[i,k,:] = np.array(X[j].split(),dtype=np.float32) 1907 k+=1 1908 lnum = j+2 1909 1910 z = mod_func_write(Nroi, ROI_LABS, Y, fname, outname) 1911 1912 return Y, z 1913 1914###------------------------------------------------------------------ 1915 1916def DefaultNamingOutGrid(list_all): 1917 1918 out = [] 1919 1920 for x in list_all: 1921 if not(x[-5:] == '.grid'): 1922 print("ERROR-- doesn't look like you're modernizing a *.grid file!") 1923 1924 out.append(x[:-5]+'_MOD'+x[-5:]) 1925 1926 return out 1927 1928 1929def mod_func_write(Nroi, ROI_LABS, Y, fname, outname): 1930 1931 Ngrid=np.shape(Y)[0] 1932 if not(Ngrid == len(OldFash_parnames)): 1933 print("ERROR-- doesn't look like you have the right number of") 1934 print("\tmatrices in the *.grid file!") 1935 1936 f = open(outname, 'w') 1937 f.write('# %d # %s\n' % (Nroi, HEADER_Nroi)) 1938 f.write('# %d # %s\n' % (Ngrid, HEADER_Ngrid)) 1939 1940 #f.write('# %s \n' % HEADER_Labels) 1941 #for roi in ROI_LABS: 1942 # tt = "Funcky%dFresh" % int(roi) 1943 # f.write("%12s\t" % tt) 1944 #f.write('\n') 1945 1946 for roi in ROI_LABS: 1947 f.write("%12s\t" % roi) 1948 f.write('\n') 1949 1950 for i in range(Ngrid): 1951 f.write('# %s\n' % OldFash_parnames[i]) 1952 for j in range(Nroi): 1953 for k in range(Nroi): 1954 if OldFash_parnames[i]=='NT': 1955 f.write('%12d\t' % Y[i][j][k]) 1956 else: 1957 f.write('%12e\t' % Y[i][j][k]) 1958 f.write('\n') 1959 1960 f.close() 1961 1962 return (1) 1963 1964###------------------------------------------------------------------ 1965###------------------------------------------------------------------ 1966###------------ STOP: Modernizing/rewriting info into better --------- 1967###------------------------------------------------------------------ 1968###------------------------------------------------------------------ 1969 1970 1971 1972 1973 1974### ----------------------------------------------------------------- 1975### ----------------------------------------------------------------- 1976 1977