1#!/usr/local/bin/python3.8
2
3# python3 status: compatible
4
5# ver 1.0:  July, 2014
6# Update, ver 1.1: Sept 2014
7# Updated, ver 1.2: Sept 2014
8# [PT: Aug 31, 2020] updated to Python 3 with "2to3 -w .."
9# [PT: Sep  1, 2020] continuing to tweak, based on 2to3 updates
10#
11# File of helper functions for fat_mvm_*.py.
12#
13#
14#
15#############################################################################
16
17
18import numpy as np
19import csv
20import sys
21
22from glob import glob
23
24np.set_printoptions(linewidth=200)
25
26# Have to check 5 lines in
27NLinesGridHead = 5
28
29HEADER_Nroi = 'Number_of_network_ROIs'
30HEADER_Ngrid = 'Number_of_grid_matrices'
31HEADER_EleID = 'Special_element_ID'
32HEADER_Labels = 'WITH_ROI_LABELS'
33
34HEADER_ALL = [HEADER_Nroi, HEADER_Ngrid, HEADER_EleID, HEADER_Labels]
35
36# This will be ROI name connector, in line with ZSS usage in SUMA,
37# etc.
38ConnNames = '__'
39
40HeaderRowTrue = 1
41FirstColLabels = 1    # first read in col is subj name-labels
42GridCheckVar = 'NT'
43
44# for writing file for MVM
45ColOne_SUB = 'Subj'
46ColEnding = ['ROI', 'matrPar', 'Ausgang_val']
47
48MVM_file_postfix = '_MVMtbl.txt'
49MVM_matchlog_postfix = '_MVMprep.log'
50
51LOG_LABEL_colmatr = 'Matrixfile_ID'
52LOG_LABEL_colsubj = 'CSV_Subj'
53LOG_LABEL_roilist = 'ROI_list'
54LOG_LABEL_parlist = 'Parameter_list'
55VAR_LABEL_roilistQ = 'Qvar_list'
56VAR_LABEL_roilistC = 'Cvar_list'
57VAR_LABEL_roilistI = 'Ivar_list'
58LOG_LABEL_command = 'Made_by_command'
59
60OldFash_parnames = ['NT', 'fNT', 'FA', 'sFA', 'MD',         \
61                        'sMD', 'RD', 'sRD', 'L1', 'sL1',    \
62                        'NV']
63
64
65#class MVM_rev:
66#    def __init__(self, var, par):
67#        self.var = var
68#        self.par = par
69#        self.isquant = 1
70#        self.mean_tstat = 0
71#        self.minp_ROI = ''
72
73
74###------------------------------------------------------------------
75###------------------------------------------------------------------
76###---------------- START: review functions and output --------------
77###------------------------------------------------------------------
78###------------------------------------------------------------------
79
80def BreakLineList(x):
81
82    y = []
83    for line in x:
84        y.append(line.split())
85
86
87    return y
88
89###------------------------------------------------------------------
90
91def Find_ANOVAE(x):
92    ''' input is a line list broken into lines of words.'''
93
94    Nline = len(x)
95
96    RESULTS = []
97    list_par = []
98    list_var = []
99
100    for i in range(Nline):
101        if len(x[i]) > 0 :
102            if (x[i][1] == 'RESULTS:') and (x[i][2] == 'ANOVA') :
103                par = x[i][-1]      # name of DTI/FMRI par
104                nval = int(x[i+1][0])    # where the num of vars is stored
105                values = []
106                names = []
107                for j in range( nval ):
108                    ii = i + 3 + j  # skip to vars
109                    if x[ii][-1] != '(Intercept)' :
110                        values.append(x[ii][-3])
111                        names.append(x[ii][-1])
112                RESULTS.append(values)
113                list_par.append(par)
114                list_var.append(names)
115
116
117    npar = len(RESULTS)
118    if npar <1:
119        print("ERROR! No ANOVA results?!?")
120        sys.exit(33)
121
122    nvar = len( RESULTS[0] )
123    if nvar < 1 :
124        print("ERROR! No ANOVA variables?!?")
125        sys.exit(34)
126
127    z = np.array(RESULTS, dtype = float)
128
129    # return the array, and it's two lists of labels
130    return z, list_par, list_var[0]
131
132
133###------------------------------------------------------------------
134
135
136
137
138
139def Find_POSTHOC(x):
140    ''' input is a line list broken into lines of words.'''
141
142    Nline = len(x)
143
144    RESULTS = []
145    list_par = []
146#    list_var = []
147
148    for i in range(Nline):
149        if len(x[i]) > 0 :
150            if (x[i][1] == 'RESULTS:') and (x[i][2] == 'Post') :
151                par = x[i][-1]           # name of DTI/FMRI par
152                nglt = int(x[i+1][0])    # where the num of glts is stored
153                values = []
154                names = []
155                ii = i + 3               # jump ahead to data
156                phrois, phvars = Find_PosthocVars_and_NROI( x[ii:ii+nglt] )
157                Nphrois = len(phrois)
158                Nphvars = len(phvars)
159                tmpv = np.zeros( Nphvars )
160                for j in range( Nphvars ):
161                    for k in range( Nphrois ):
162                        ll = ii + j + k*Nphvars
163                        tmpv[j]+= np.float(x[ll][-5]) # t-stat
164                    tmpv[j]/= Nphrois
165                    values.append(tmpv[j])
166                RESULTS.append(values)
167                list_par.append(par)
168#                list_var.append(names)
169
170    npar = len(RESULTS)
171    if npar <1:
172        print("No post hoc results-- guess you didn't want any?")
173        return [],[],[]
174    else:
175        nvar = len( RESULTS[0] )
176        if nvar < 1 :
177            print("ERROR! No post hoc variables?!?")
178            sys.exit(35)
179
180        z = np.array(RESULTS, dtype = float)
181
182        # return the array, and it's two lists of labels
183        return z, list_par, phvars
184
185###------------------------------------------------------------------
186
187def Find_PosthocVars_and_NROI( x ):
188
189    nvals = len(x)
190
191    phrois = []
192    phvars = []
193
194    for y in x:
195        twopiece = y[-1].split('--') # Sep,2015: new parser
196        if not(phrois.__contains__(twopiece[0])):
197            phrois.append(twopiece[0])
198
199        if not(phvars.__contains__(twopiece[1])):
200            phvars.append(twopiece[1])
201
202    if len(phvars)*len(phrois) != nvals:
203        print("Problem! number of posthoc vars, ROIs and tests not matching!")
204        sys.exit(189)
205
206    return phrois, phvars
207
208###------------------------------------------------------------------
209
210###### [PT: Sep 1, 2020] comment this func out, because the
211###### fat_mvm_review.py program that calls this func (a program that
212###### never even fully made it to beta testing) is being deprecated.
213#
214## ref mat allows us to use group pvalue mat to censor
215#def ScreenAndFileOutput(PO, TO, FI_mat, FI_par, FI_var, FI_indi, TS,
216#                        REF_mat):
217#
218#    STARTER = 0
219#    if not(TS):
220#        STARTER = 1
221#
222#    nvar = len(FI_var)
223#    npar = len(FI_par)
224#
225#    calc_indent = 14*nvar+1
226#
227#    if  (npar, nvar) != np.shape(FI_mat) :
228#        print("Weird error in numbers not matching internally!")
229#        sys.exit(35)
230#
231#    # first lining ...
232#    if STARTER: TS.append('# %12s' % (FI_var[0]))
233#    for j in range(nvar):
234#        print("%14s" % (FI_var[j]), end=' ')
235#        if (j>0):
236#            if STARTER: TS.append("%14s" % FI_var[j])
237#    print("")
238#    if STARTER: TS.append("\n")
239#
240#    TS.append('#%s#   %s\n' % ('-'*calc_indent, FI_indi))
241#
242#
243#
244#
245#    # ... the rest.
246#    # use of 'REF_mat' to allow censoring by ANOVA values
247#    for i in range(npar):
248#        for j in range(nvar):
249#            #if FI_mat[i,j] < TO :
250#            if REF_mat[i,j] < TO :
251#                out = "%.4e" % (FI_mat[i,j])
252#                fout = out
253#            else:
254#                out = '-'
255#                fout = '0'
256#            print("%14s" % (out), end=' ')
257#            TS.append("%14s" % (fout))
258#        print("  %s" % FI_par[i])
259#        TS.append("  # %s\n" % FI_par[i])
260#
261#
262#    if not(TS):
263#        print("**ERROR! Empty file?")
264#        sys.exit(12)
265#
266#    return TS
267
268
269''' OLD version
270
271def ScreenAndFileOutput(PO, TO, FI_mat, FI_par, FI_var):
272
273    nvar = len(FI_var)
274    npar = len(FI_par)
275
276    if  (npar, nvar) != np.shape(FI_mat) :
277        print "Weird error in numbers not matching internally!"
278        sys.exit(35)
279
280    if PO:
281        outname = PO + '_REV.txt'
282        f = open(outname, 'w')
283
284    # first lining ...
285    if PO: f.write('# %12s' % (FI_var[0]))
286    for j in range(nvar):
287        print "%14s" % (FI_var[j]),
288        if (j>0) and PO:
289            f.write("%14s" % FI_var[j])
290    print ""
291    if PO: f.write("\n")
292
293    # ... the rest.
294    for i in range(npar):
295        for j in range(nvar):
296            if FI_mat[i,j] < TO:
297                out = "%.4e" % (FI_mat[i,j])
298                fout = out
299            else:
300                out = '-'
301                fout = '0'
302            print "%14s" % (out),
303            if PO: f.write("%14s" % (fout))
304        print "  %s" % FI_par[i]
305        if PO: f.write("  # %s\n" % FI_par[i])
306    if PO: f.close()
307
308    return 1
309'''
310
311
312
313###------------------------------------------------------------------
314###------------------------------------------------------------------
315###----------------- sTOP: review functions and output --------------
316###------------------------------------------------------------------
317###------------------------------------------------------------------
318
319###------------------------------------------------------------------
320###------------------------------------------------------------------
321###---------------- START: Subsets of categorical vars --------------
322###------------------------------------------------------------------
323###------------------------------------------------------------------
324
325def SplitVar_Sublist(L, idx):
326    '''Take in a list, and a 'which iteration is this' value called
327    idx. Return either the first or second list in a lone-comma
328    separated line.'''
329
330    N = L.count(',')
331
332    # only one list, can only go with first
333    if N==0:
334        if idx == 0 :
335            return set(L)
336        else:
337            return set([])
338    elif N==1:
339        mm = L.index(',')
340        if idx == 0 :
341            return set(L[:mm])
342        else:
343            return set(L[mm+1:])
344    else:
345        print("Error reading variable list: too many commas!")
346        print("\t There should only be a single comma, separating at most "
347              "two lists.")
348        sys.exit(18)
349
350
351
352
353
354def Pars_CatVars_in_Listfile( file_listvars,
355                              par_list,
356                              var_iscateg,
357                              var_isinterac,
358                              Ninter ):
359    '''User can specify subset of cat var for modelling. Might be
360    useful if there are a lot of subcategories, and we want some
361    pairwise tests.  Performed after we know where and what are the
362    categorical variables (both lone and/or interaction ones.
363    Returns: updated lists of categorical and interacting vars.'''
364
365    coldat = ReturnFirstUncommentedSection_ofReadlines(file_listvars)
366    for i in range(len(coldat)):
367        if len(coldat[i]) > 1:
368            print("Found a variable for subsetting")
369
370            if var_iscateg[i]:
371                newsub_set = set(coldat[i][1:]) # all other vars in that row
372                oldvar_set = set(var_iscateg[i])
373                nomatch = list(oldvar_set.__ixor__(newsub_set))
374                print("Going to remove:", nomatch)
375                for x in nomatch:
376                    var_iscateg[i].remove(x)
377
378            elif var_isinterac[i]:
379                ### with variable interaction terms, use a "lone"
380                ### comma to separate the sub-lists; if you only want
381                ### a subset of the second variable, then still use a
382                ### lone comma before it
383                for j in range(2):
384                    if var_isinterac[i][2][j]:
385                        newsub_set = SplitVar_Sublist(coldat[i][1:], j)
386                        oldvar_set = set(var_isinterac[i][2][j])
387                        nomatch = list(oldvar_set.__ixor__(newsub_set))
388                        print("Going to remove:", nomatch)
389                        for x in nomatch:
390                            var_isinterac[i][2][j].remove(x)
391
392
393    return var_iscateg, var_isinterac
394
395
396
397###------------------------------------------------------------------
398###------------------------------------------------------------------
399###---------------- STOP: Subsets of categorical vars ---------------
400###------------------------------------------------------------------
401###------------------------------------------------------------------
402
403
404###------------------------------------------------------------------
405###------------------------------------------------------------------
406###--------------- START: Interaction modeling features -------------
407###------------------------------------------------------------------
408###------------------------------------------------------------------
409
410def VarLists_to_Strings_for_MVM(var_list,
411                                var_isinterac,
412                                var_iscateg,
413                                CAT_PAIR_COMP ):
414    '''Get counts and things from varlists, interaction and category
415    info.'''
416
417    Nvar = len(var_list)
418
419    # calc number of GLTs/ROI (= Nvartout) and number of Quant var
420    Nvartout = Nvar
421    var_list_quant = []
422    varQ_str = ''
423    var_list_inter = []
424    varI_str = ''
425    extra_qVar = '' # Jan,2015 -- added to pick up qvar in interac term
426    varC_str = ''
427    for i in range(Nvar):
428        x = var_list[i]
429
430        if not(var_isinterac[i][0][0]):
431            if var_list.__contains__(x):
432                if not( var_iscateg[i] ):
433                    var_list_quant.append(x)
434                    varQ_str+= "  %s" % x
435                else:
436                    nc = len(var_iscateg[i])
437                    Nvartout+= nc-1              # individual ttests
438                    if CAT_PAIR_COMP :
439                        Nvartout+= (nc*(nc-1))/2 # pairwise combinatorial
440        else:
441            varI_str = "  "
442            ncombo = 1
443            for j in range( 2 ):
444                varI_str+= var_isinterac[i][1][j]
445                if var_isinterac[i][2][j] :
446                    # binomial-type comparisons for each
447                    nc = len(var_isinterac[i][2][j])
448                    ncombo*= (nc*(nc-1))/2
449                    varI_str+= "("
450                    for ii in var_isinterac[i][2][j]:
451                        varI_str+= " %s" % ii
452                    varI_str+= " )"
453                else: # Jan,2015
454                    extra_qVar = "%s" % var_isinterac[i][1][j]
455                if j == 0 :
456                    varI_str+= " %s " % var_isinterac[i][0][0]
457            Nvartout+= ncombo-1
458
459    for i in range(Nvar):
460        if var_iscateg[i]:
461            varC_str+= "  %s(" % var_list[i]
462            for y in var_iscateg[i]:
463                varC_str+= " %s" % y
464            varC_str+= " )"
465
466
467
468
469    return Nvartout, var_list_quant, varQ_str, var_list_inter, \
470        varI_str, varC_str, extra_qVar
471
472
473
474
475
476
477
478
479###------------------------------------------------------------------
480
481def CheckFor_Cats_and_Inters( tab_data,
482                              tab_colvars,
483                              tab_coltypes,
484                              var_list ):
485    '''Return 1 if there is an interaction, and  '''
486
487    simple_cat = []
488
489    interac_terms = []
490    count_interac = 0
491
492    for x in var_list:
493        # return either an empty list (no interac) or a list of
494        # the interacting ones
495
496        cats = []          # default if not cat var or interac
497        inter_names = []    # default if no interacs
498        Ncats = 0
499
500        #print "TEST1 for var:",x
501        check, inter_type = IsEntry_interaction(x)
502
503        #print "\t->has check:", check,", and intertype:", inter_type
504
505        #interac_terms = [inter_type]
506
507        # if there is a non-empty list returned, check that both terms
508        # in it are ok to use
509        if check:
510            # inter_names will be a list of: lists (of categorical vars)
511            # or empty lists (quant var)
512            inter_names = CheckVar_and_FindCategVar( tab_data,       \
513                                                       tab_colvars,  \
514                                                       tab_coltypes, \
515                                                       check )
516            count_interac+=1
517
518            # want to ensure categorical variable is first in list
519            if not( inter_names[0] ):
520                inter_names.reverse()
521                check.reverse()
522
523            for i in range(len(inter_names)):
524                if inter_names[i]:
525                    Ncats+=1
526
527
528            # when there is interaction, have:
529            # [0] type of interaction (or empty for none)
530            # [1] var names (cat first or both)
531            # [2] cat var names (or empty for quant)
532            #interac_terms.append(check)
533            #interac_terms.append(inter_names)
534        else:
535            # redefine cats if coming through here
536            cats = CheckVar_and_FindCategVar( tab_data,       \
537                                                  tab_colvars,  \
538                                                  tab_coltypes, \
539                                                  [x] )
540            cats = list(cats[0])
541        #print "\t\> has cat:", cats
542        simple_cat.append(cats)
543        interac_terms.append( [ [inter_type, Ncats], check, inter_names] )
544    return simple_cat, interac_terms, count_interac
545
546
547
548def IsEntry_interaction(str_var):
549    '''Return an empty list if there is no interaction, or return a
550    list of the interacting terms if there be/were one.'''
551
552    type_col = ':' # this was a mistake to include!  Ooops.
553    type_ast = '*'
554
555    num_colon = str_var.count(type_col)
556    num_aster = str_var.count(type_ast)
557
558    if (num_colon + num_aster) > 1:
559        print("Error! At most 1 interaction per var is permitted currently!")
560        sys.exit(14)
561
562    if num_colon :
563        print("**ERROR! The ':' does not exist as a usable interaction call!")
564        sys.exit(156)
565        #return str_var.split(type_col), type_col
566    elif num_aster :
567        return str_var.split(type_ast), type_ast
568    else:
569        return [], ''
570
571
572###------------------------------------------------------------------
573###------------------------------------------------------------------
574###---------------- STOP: Interaction modeling features -------------
575###------------------------------------------------------------------
576###------------------------------------------------------------------
577
578
579###------------------------------------------------------------------
580###------------------------------------------------------------------
581###-------------------- START: Row selection features ---------------
582###------------------------------------------------------------------
583###------------------------------------------------------------------
584
585#''' Input the grid file name, and return:
586#    1)  a list of data [NMAT, NROI, NROI];
587#    2)  a list of labels [NMAT];
588#    3)  a supplementary dictionary for each calling use;
589#    4)  and a list of (int) ROI labels (which may not be consecutive).'''
590
591def Select_Row_From_FATmat(X4, outname, ele, ele_ind, UseL, ExternLabsOK):
592
593    Ngrid = len( X4[1] )
594    ROI_LABS = list(X4[UseL])
595    Nroi = len( ROI_LABS )
596
597    # have tested previously to make sure that ROI_LABS contains ele
598    #ele_ind = ROI_LABS.index(ele)
599
600    f = open(outname, 'w')
601    f.write('# %d # %s\n' % (Nroi, HEADER_Nroi))
602    f.write('# %d # %s\n' % (Ngrid, HEADER_Ngrid))
603
604    if ExternLabsOK:
605        # everything should have labels now
606        f.write('# %s # %s\n' % (ele, HEADER_EleID))
607        for roi in X4[4]:
608            f.write("%12s\t" % roi)
609        f.write('\n')
610
611    for roi in X4[3]:
612        f.write("%12s\t" % roi)
613    f.write('\n')
614
615
616
617    for i in range(Ngrid):
618        f.write('# %s\n' % X4[1][i])
619        for k in range(Nroi):
620            if X4[1][i]=='NT':
621                f.write('%12d\t' % X4[0][i][ele_ind][k])
622            else:
623                f.write('%12e\t' % X4[0][i][ele_ind][k])
624        f.write('\n')
625
626    f.close()
627
628
629
630
631
632
633###------------------------------------------------------------------
634
635def MakeRowFile_From_FATmat(fname, outname, ele, ExternLabsOK):
636
637    X4 = LoadInGridOrNetcc(fname)
638
639    UseL = 3
640    if (len(X4) >= 4) and ExternLabsOK:
641        if X4[4]:
642            UseL = 4
643
644    # make labels for WITH_LABELS...
645    if not(X4[4]):
646        for x in X4[3]:
647            # take half of element id
648            name = ElementNameMaker(x, x, 0)
649
650            #### [PT: Aug 31, 2020] in going from py 2to3, had to
651            #### update this to integer div, in order to be able to
652            #### use as index
653            # lele = (len(name)-len(ConnNames))/2
654            lele = (len(name)-len(ConnNames))//2
655            X4[4].extend( name[:lele] )
656
657    if X4[UseL].__contains__(ele):
658        ele_ind = X4[UseL].index(ele)
659        y = Select_Row_From_FATmat(X4,
660                                   outname,
661                                   ele,
662                                   ele_ind,
663                                   UseL,
664                                   ExternLabsOK)
665    # extra check: even if LABELS are used, might still refer to ID
666    # number ok
667    elif (UseL == 4) and X4[3].__contains__(ele):
668        print("*+ Even though you have labels in the file, it looks like "
669              "your 'roi' selection is just a digit.")
670        print("\t-> Not a problem-- row selection will be done based on ROI "
671              "number (and not on labeltable-info).")
672        ele_ind = X4[3].index(ele)
673        y = Select_Row_From_FATmat(X4,
674                                   outname,
675                                   ele,
676                                   ele_ind,
677                                   3,
678                                   ExternLabsOK)
679    else:
680        print("** Error: chosen element "
681              "'%s' doesn't appear in file's ROI list!" % ele)
682        print("For reference, the list of ROIs is:")
683        for x in X4[UseL]:
684            print("\t",x)
685        sys.exit(55)
686
687    return y
688
689###------------------------------------------------------------------
690
691def DefaultNamingOutRowfile(list_all, ele, ExternLabsOK):
692
693    out = []
694
695    # take half of element id
696    name = ElementNameMaker(ele, ele, ExternLabsOK)
697    #### [PT: Aug 31, 2020] in going from py 2to3, had to update this
698    #### to integer div, in order to be able to use as index
699    # lele = (len(name)-len(ConnNames))/2
700    lele = (len(name)-len(ConnNames))//2
701    roi = name[:lele]
702
703    for x in list_all:
704        if not(x[-5:] == '.grid') and not(x[-6:] == '.netcc'):
705            print("ERROR-- doesn't look like this is a *.grid or *netcc file!")
706        elif x[-5:] == '.grid' :
707            out.append(x[:-5]+'_grid_'+roi+'.row')
708        else:
709            out.append(x[:-6]+'_netcc_'+roi+'.row')
710
711    return out
712
713###------------------------------------------------------------------
714###------------------------------------------------------------------
715###--------------------- STOP: Row selection features ---------------
716###------------------------------------------------------------------
717###------------------------------------------------------------------
718
719
720###------------------------------------------------------------------
721###------------------------------------------------------------------
722###----------------------- START: Assorted --------------------------
723###------------------------------------------------------------------
724###------------------------------------------------------------------
725
726def RecapAttach(comm_str, opt, arg):
727    '''Silly parsing of commandline options+arguments in order to be able
728    to return the command with delineating apostrophes around user-input
729    lists.'''
730
731    comm_str+=' '+opt
732    if arg:
733        if opt[:2]=='--':
734            comm_str+="="
735        else:
736            comm_str+=" "
737        comm_str+="'"+arg+"' "
738    else:
739        comm_str+=" "
740
741    return comm_str
742
743###------------------------------------------------------------------
744
745def LoadInTable(fname):
746    ''' open up a file, and return a 2-tuple: (raw) data set and header.'''
747
748    fff = open(fname, 'r')
749    x = fff.readlines()
750    fff.close()
751
752    N = len(x)
753
754    print('++ Opening and reading table file...')
755
756    data = []
757    header = []
758
759    idx = 0
760    for row in x[:]:
761        if not(idx) and HeaderRowTrue:
762            tt = row.split()
763            if tt[-1] == '\\':
764                header = tt[:-1]
765            else:
766                header = tt
767            idx+= 1
768        else:
769            #print row.split()
770            tt = row.split()
771            if tt[-1] == '\\':
772                data.append(tt[:-1])
773            else:
774                data.append(tt)
775
776    header = Check_Header_Spaces(header)
777
778    print('++ ... done opening and reading table file.')
779
780    return data, header
781
782#----------------------------------------------------------------
783
784# Sep,2015: new function to make table from subnetwork of ROIs
785def MakeSubTable(file_table, pref_subnet, roi_list):
786
787    tab_raw, tab_colvars = LoadInTable(file_table)
788    Ncol = len(tab_colvars)
789    Nrois = len(roi_list)
790
791    # check for 'ROI' in col heading
792    indroi = Check_EleIn_ROIlist(tab_colvars, ColEnding[0])
793    if indroi < 0:
794        print("** ERROR: can't find 'ROI' in column headings of")
795        print("    the table file:  %s." % file_table)
796        sys.exit(543)
797
798    list_count = np.zeros(Nrois, dtype=int)
799    tab_subnet = []
800    # go through row by row and keep those that have the 'right' ROI
801    for x in tab_raw:
802        if roi_list.__contains__(x[indroi]) :
803            tab_subnet.append(x)
804            list_count[roi_list.index(x[indroi])] = 1
805
806    # check that each chosen ROI was found
807    if not(list_count.all()) :
808        for i in range(Nrois):
809            if not( list_count[i]):
810                # fixed indexing mistake in roi_list[].  [PT, March 27, 2017]
811                print("** ERROR: can't find selected ROI %s in the original table list" % roi_list[i])
812        print("** Please select again!")
813        sys.exit(544)
814    else:
815        print("++ Found all %d ROIs in the desired subnet list:" % Nrois)
816        for x in roi_list:
817            print("\t  %s" % x)
818
819    Lx, Ly = np.shape(tab_subnet)
820
821    # write output
822    fff = open(pref_subnet, 'w')
823    for x in tab_colvars:
824        fff.write("%s " % x)
825    fff.write("  \\\n")
826    for i in range(Lx-1):
827        for x in tab_subnet[i]:
828            fff.write("%s " % x)
829        fff.write("  \\\n")
830    for x in tab_subnet[Lx-1]:
831        fff.write("%s " % x)
832    fff.write("\n")
833
834    fff.close()
835
836    print("++ New subnetwork table has been made: %s" % pref_subnet)
837
838    return 1
839
840
841#----------------------------------------------------------------
842
843def CheckVar_and_FindCategVar(tab_data, tab_colvars, tab_coltypes, par_list):
844
845    Npar = len(par_list)
846    Nsub, Ncol = np.shape(tab_data)
847    iscateg = []
848    for i in range(Npar):
849        if tab_colvars.__contains__(par_list[i]):
850            j = tab_colvars.index(par_list[i])
851            if tab_coltypes[j]==str :
852                terms = set()
853                for k in range(Nsub):
854
855                    terms.add(tab_data[k][j])
856                listterms = list(terms)
857                listterms.sort()
858                iscateg.append(listterms)
859            else:
860                iscateg.append([])
861        else:
862            print("** Error! Variable %s is not in the data table" \
863             % par_list[i])
864            sys.exit(11)
865
866    return iscateg
867
868
869#----------------------------------------------------------------
870
871def GetFromLogFile(fname, ltype):
872
873    if not(fname):
874        print("** Error: can't open log file: '%s'." % fname)
875        sys.exit(9)
876
877    fff = open(fname,'r')
878    x = fff.readlines()
879    fff.close()
880
881    N = len(x)
882
883    for start in range(N):
884        if (x[start].split()[0] == '#') and (x[start].split()[1] == ltype):
885            #print "FOUND", x[start].split()
886            break
887
888    outlist = []
889    for i in range(start+1,N):
890        if x[i].split()[0] == '#':
891            break
892        else:
893            outlist.append(x[i].split()[0])
894
895    if not(outlist):
896        print("** Error: empty list from log file: '%s'." % fname)
897        sys.exit(10)
898
899    return outlist
900
901
902###------------------------------------------------------------------
903###------------------------------------------------------------------
904###------------------------ STOP: Assorted --------------------------
905###------------------------------------------------------------------
906###------------------------------------------------------------------
907
908
909
910###------------------------------------------------------------------
911###------------------------------------------------------------------
912###--------------- START: Final prep output operations --------------
913###------------------------------------------------------------------
914###------------------------------------------------------------------
915
916def Write_MVM_File(PREFIX,            \
917                   csv_data,          \
918                   csv_colvars,       \
919                   grid_ROIlist,      \
920                   grid_VARlist,      \
921                   grid_tabled):
922
923    fff = open(PREFIX+MVM_file_postfix,'w')
924
925    ### 'csv_colvars':  rename 0th col
926    ### and we add three more col to output: ROI, VarName, VarVal
927    csv_colvars[0] = ColOne_SUB
928    Nsub, Ncsvar = np.shape(csv_data)
929    Nroi = len(grid_ROIlist)
930    Npar = len(grid_VARlist)
931    Ncol = Ncsvar+len(ColEnding)
932
933
934    for i in csv_colvars:
935        fff.write("%s " % i)
936    for i in ColEnding:
937        fff.write("%s " % i)
938    fff.write(" \\\n")
939
940    for i in range(Nsub):
941        for x in range(Nroi):
942            for y in range(Npar):
943                for j in range(Ncsvar):
944                    fff.write("%s " % str(csv_data[i][j]))
945                fff.write("%s " % grid_ROIlist[x])
946                fff.write("%s " % grid_VARlist[y])
947                fff.write("%s " % str(grid_tabled[i,x,y]))
948                if not( i==(Nsub-1) and x==(Nroi-1) and y==(Npar-1) ):
949                    fff.write("  \\\n")
950                else:
951                    fff.write("  \n")
952
953    fff.close()
954
955    return 1
956
957#--------------------------------------------------------------------
958
959def Check_EleIn_ROIlist(ROIlist, ele):
960    try:
961        return ROIlist.index(ele)
962    except ValueError:
963        return -1
964
965#--------------------------------------------------------------------
966
967def MakeGridTableau(DICT_CSV_grid, \
968                    csv_data,      \
969                    grid_data,     \
970                    grid_ROIlist,  \
971                    grid_VARlist,  \
972                    grid_subj,
973                    ExternLabsOK):
974    '''Take a lot of matrix file info in, as well as the CSV file and
975    conversion dictionary, and return a tableau of necessary
976    variable&parameter values from all the subjects' matrix file data.
977    Output is a list ordered by CSV subject, for ease in final
978    writing.'''
979
980    # here, gridVARlist is actually a list of PARAMETERS.
981
982    print('++ Starting to select and convert matrix data to usable '
983          'form for output...')
984
985    Nsub = len(csv_data)
986    Nroi = len(grid_ROIlist)
987    Nvar = len(grid_VARlist)
988    MM = np.zeros((Nsub, Nroi, Nvar))
989
990    # put in order of CSV stuff
991    for i in range(Nsub):
992        csv_subb = csv_data[i][0]
993        # which sub in grid_data
994        grid_subb = DICT_CSV_grid[csv_subb]
995        ii = Check_EleIn_ROIlist(grid_subj, grid_subb)
996        #print "HI!", i, ii
997        if ii>=0:
998            UseL = 3
999            if (len(grid_data[ii]) >= 4) and ExternLabsOK:
1000                if grid_data[ii][4]:
1001                    UseL = 4
1002            Ntar = len(grid_data[ii][UseL])
1003            for j in range(Nroi):
1004                for x in range(Ntar):
1005                    for y in range(x+1,Ntar):
1006                        # inefficient search...
1007                        ele = ElementNameMaker(grid_data[ii][UseL][x],
1008                                               grid_data[ii][UseL][y],
1009                                               UseL-3)
1010                        if grid_ROIlist[j] == ele:
1011                            #print ii,j, ele
1012                            for k in range(Nvar):
1013                                var = grid_VARlist[k]
1014                                idx = grid_data[ii][2][var]
1015                                # which matrix in grid[i][0]
1016                                MM[i,j,k] = grid_data[ii][0][idx][x][y]
1017
1018    print('++ ... done converting matrix stuff for outputting in table.')
1019    return MM
1020
1021###------------------------------------------------------------------
1022###------------------------------------------------------------------
1023###---------------- STOP: Final prep output operations --------------
1024###------------------------------------------------------------------
1025###------------------------------------------------------------------
1026
1027###------------------------------------------------------------------
1028###------------------------------------------------------------------
1029###-------- START: Make/Check/Print CSV and matrix matching ---------
1030###------------------------------------------------------------------
1031###------------------------------------------------------------------
1032
1033def MakeDict_CSV_to_grid(grid_subj, csv_subj, file_match):
1034
1035    if IsFirstUncommentedSection_Multicol(file_match):
1036        CtG = MakeDict_CSV_to_grid_FILE(grid_subj, csv_subj,file_match)
1037    else:
1038        CtG = MakeDict_CSV_to_grid_NOFILE(grid_subj, csv_subj)
1039    return CtG
1040
1041
1042#---------------------------------------------------------------------
1043
1044def IsFirstUncommentedSection_Multicol(fname):
1045    '''Check all lines of input to see if there's more than one
1046    column present.'''
1047    tt = ReturnFirstUncommentedSection_ofReadlines(fname)
1048    if tt:
1049        for i in range(len(tt)):
1050            if len(tt[i]) > 1:
1051                return 1
1052    return 0
1053
1054#---------------------------------------------------------------------
1055
1056def ReturnFirstUncommentedSection_ofReadlines(fname):
1057
1058    if not(fname):
1059        return []
1060
1061    fff = open(fname,'r')
1062    x = fff.readlines()
1063    fff.close()
1064
1065    N = len(x)
1066
1067    # ignore preliminary comments
1068    start = 0
1069    while x[start].split()[0] == '#':
1070        start+=1
1071
1072    temp = []
1073    for i in range(start,N):
1074        y = x[i].split()
1075        if y[0] == '#': # stop if you hit a comment
1076            break
1077        else:
1078            temp.append(y)
1079    return temp
1080
1081def MakeDict_CSV_to_grid_FILE(grid_subj, csv_subj,file_match):
1082
1083    print("++ Start matching of CSV and matrix subject data (with file)...")
1084
1085    temp = ReturnFirstUncommentedSection_ofReadlines(file_match)
1086    print("LIST IS:")
1087    Nsub = len(temp)
1088    print("++ Looks like there are %d subjects in matching file: '%s'."  \
1089     % (Nsub, file_match ))
1090
1091    CtG = {}
1092    for i in range(Nsub):
1093        CtG[ temp[i][1] ] = temp[i][0]
1094        print(temp[i][1], temp[i][0])
1095
1096    print("++ ... done matching CSV and matrix subject data.")
1097
1098    return CtG
1099
1100#------------------------------------------------------------------------
1101
1102def MakeDict_CSV_to_grid_NOFILE(grid_subj, csv_subj):
1103
1104    print("++ Start matching of CSV and matrix subject data (without file)...")
1105
1106    CtG = {}
1107    for a in csv_subj:
1108        for b in grid_subj:
1109            if b.find(a) != -1:
1110                if a in CtG:
1111                    print('Error: OVERWRITING MATCHES! Need uniquity.')
1112                    #return {}
1113                else:
1114                    CtG[a]=b
1115
1116    print("++ ... done matching CSV and matrix subject data.")
1117
1118    return CtG
1119
1120###--------------------------------------------------------------------
1121
1122def CheckDict_CSV_to_grid(CtG, grid_subj, csv_subj, csv_data):
1123
1124    print("++ Start checking dictionary of CSV and matrix subject data...")
1125    Nsub0 = len(csv_subj)
1126
1127    csv_subj_new = []
1128    csv_data_new = []
1129
1130    BADNESS = 0
1131    # check if all csv sub are used
1132    for i in range( Nsub0 ):
1133        a = csv_subj[i]
1134        if a in CtG:
1135            csv_subj_new.append(a)
1136            csv_data_new.append(csv_data[i])
1137        else:
1138            print('*+ Warning: CSV subj %s does not have a matrix match!' \
1139             % a)
1140            BADNESS+=1
1141    # check if all grid matr subs are used:
1142    y = list(CtG.values())
1143    for b in grid_subj:
1144        if not( y.__contains__(b) ):
1145            print('*+ Warning: matrix subj %s does not have a CSV subj match!' \
1146             % b)
1147            #BADNESS+=1
1148
1149#    if BADNESS :
1150#        print "++ Possibly bad CSV<->matrix matching: ",
1151#        print "either incomplete or nonunique."
1152#        print "\tBut perhaps you are purposefully selecting subset(s)?"
1153#    else:
1154#        print "++ Successful matching: each CSV entry had single matrix match."
1155
1156    print("++ ... done checking dictionary.")
1157
1158    return csv_data_new, csv_subj_new
1159
1160###----------------------------------------------------------------------
1161
1162def Write_CSVandMatrix_log(PREFIX,             \
1163                              CtG,             \
1164                              ROIlist,         \
1165                              PARlist,         \
1166                              ARGstr):
1167    '''Printout file of what I think the CSV and matrix file matches
1168    are, so user can doublecheck.  Output file is called
1169    'PREFIX_MVM_var.log'.'''
1170
1171    fname = PREFIX+MVM_matchlog_postfix
1172    fff = open(fname,'w')
1173    x = list(CtG.keys())
1174    x.sort()
1175
1176    fff.write("# %s :\n# %s\n" % (LOG_LABEL_command, ARGstr) )
1177
1178    # CSV and matrix matching
1179    fff.write("# %-50s\t%s\n" % (LOG_LABEL_colmatr, LOG_LABEL_colsubj))
1180    for i in x:
1181        fff.write("%-52s\t%s\n" % (CtG[i], i) )
1182
1183    # list of ROIs
1184    fff.write("# %s\n" % (LOG_LABEL_roilist))
1185    for i in ROIlist:
1186        fff.write("%s\n" % (i) )
1187
1188    # list of ROIs
1189    fff.write("# %s\n" % (LOG_LABEL_parlist))
1190    for i in PARlist:
1191        fff.write("%s\n" % (i) )
1192
1193    fff.close()
1194
1195#    print "\tYou can check file '%s' for CSV<->matrix matching log." % fname
1196    return 1
1197
1198###------------------------------------------------------------------
1199###------------------------------------------------------------------
1200###--------- STOP: Make/Check/Print CSV and matrix matching ---------
1201###------------------------------------------------------------------
1202###------------------------------------------------------------------
1203
1204
1205###------------------------------------------------------------------
1206###------------------------------------------------------------------
1207###------------ START: Matrix element and set operations ------------
1208###------------------------------------------------------------------
1209###------------------------------------------------------------------
1210
1211def ReadSection_and_Column(file_list, colnum):
1212
1213    temp = ReturnFirstUncommentedSection_ofReadlines(file_list)
1214    N = len(temp)
1215    list_of_subj = []
1216    for i in range(N):
1217        if len(temp[i]) > colnum:
1218            list_of_subj.append(temp[i][colnum])
1219
1220    if not( list_of_subj):
1221        print("*+ No output column number %d in file %s." % \
1222         (colnum, file_list))
1223
1224    return list_of_subj
1225
1226def GroupListOfFullMatrs(file_IDer, file_listmatch, colnum):
1227    '''Take file ID argument (probably from user commandline input),
1228    happily read all the grids/matrices, and return:
1229    1) a list of 4-tuples of the data matrices;
1230    2) list of the subjects (mainly for checking)'''
1231
1232    print('++ Starting to find and open matrix files...')
1233    if file_listmatch:
1234        list_of_subj = ReadSection_and_Column(file_listmatch, colnum)
1235
1236#        fff = open(file_listmatch,'r')
1237#        x = fff.readlines()
1238#        fff.close()
1239#        temp = ReturnFirstUncommentedSection_ofReadlines(x)
1240#        N = len(temp)
1241#        list_of_subj = []
1242#        for i in range(N):
1243#            list_of_subj.append(temp[i][1])
1244    elif file_IDer:
1245        list_of_subj = glob(file_IDer)
1246    else:
1247        print("** Error! Cannot read in matrix files.")
1248        sys.exit(4)
1249
1250    All_sub_grid = []
1251    for ff in list_of_subj:
1252        x = LoadInGridOrNetcc(ff)
1253        All_sub_grid.append(x)
1254
1255    if len(All_sub_grid) == 0:
1256        print('** ERROR! No subjects found when looking for group!')
1257        sys.exit(5) #return [], []
1258    else:
1259        print('\tFound %d subjects in the group.' % len(All_sub_grid))
1260
1261
1262    print('++ ... done finding and opening matrix files.')
1263
1264    return All_sub_grid, list_of_subj
1265
1266###------------------------------------------------------------------
1267
1268def Check_Matr_type(LM):
1269
1270    N = len(LM)
1271    counts = np.zeros(3) # GRID: 0;  NETCC: 1;  OTHER:2
1272
1273    for i in range(N):
1274        mpost = LM[i][-5:]
1275        if mpost =='.grid':
1276            counts[0]+=1
1277        elif mpost =='netcc':
1278            counts[1]+=1
1279        else:
1280            counts[2]+=1
1281
1282    if counts[0] == N:
1283        print('++ Looks like all matrices are GRID. Fine.')
1284        return 'GRID'
1285    elif counts[1] == N:
1286        print('++ Looks like all matrices are NETCC. Fine.')
1287        return 'NETCC'
1288    else:
1289        print('*+ Warning: nonuniform or foreign type of matrix files!')
1290        return 'OTHER'
1291
1292    return
1293
1294def FindGroupwiseTargets(All_sub_grid, ftype, ExternLabsOK, UNION=0):
1295    '''Take a list of 4-tuples representing subject data (and a string
1296    of what filetype it is), go through all, and find set of
1297    intersecting matrix elements based on labels.  When done, return
1298    that list of matrix elements, AND a list of the variables.'''
1299
1300    print('++ Going through sets of matrix element ROIs...')
1301    Nsub = len(All_sub_grid)
1302
1303    if Nsub == 0:
1304        print('** ERROR! No subjects found when looking for matrix set!')
1305        return []
1306    else:
1307        print('\tFound %d subjects for finding ROI elements.' % Nsub)
1308
1309    ## For finding the set of ROI elements
1310
1311    # START EDITING HERE WITH USING LABELS
1312    a = GetSet(All_sub_grid[0], ftype, ExternLabsOK)
1313
1314    otxt = '\tThe number of elements in the ROI matrix set is:\n\t   '
1315    #print('\tThe number of elements in the ROI matrix set is:\n\t  ', end=' ')
1316    for i in range(1,Nsub):
1317        otxt+= '{}, '.format(len(a))
1318        #print('%d,' % len(a), end=' ')
1319        if UNION:
1320            # updating with the *union* of regions
1321            a.update(GetSet(All_sub_grid[i], ftype, ExternLabsOK))
1322        else:
1323            a.intersection_update(GetSet(All_sub_grid[i], ftype, ExternLabsOK))
1324    otxt+= '{}.'.format(len(a))
1325    print(otxt)
1326    #print('%d.' % len(a))
1327
1328    temp = list(a)
1329    temp.sort()
1330    print('\tThe set of ROIs is:')
1331    for t in temp:
1332        print('\t   %s' % t)
1333
1334    ## For finding the set of parameters
1335    b = set(All_sub_grid[0][1])
1336    for i in range(1,Nsub):
1337        b.intersection_update(set(All_sub_grid[i][1]))
1338    print('\tThe (final) number of parameters in the ROI matrix set is:'
1339          '{}.'.format(len(b)))
1340
1341    temp2 = list(b)
1342    temp2.sort()
1343    print('\tThe set of parameters is (alphabetically):')
1344    #print('\t ', end=' ')
1345    otxt2 = '\t  '
1346    for t in temp2:
1347        otxt2+= ' {}  '.format(t)
1348        #print(' %s ' % t, end=' ')
1349    #print('')
1350    print(otxt2)
1351    print('++ ... done getting set of ROIs.')
1352    return temp, temp2  #a, b
1353
1354
1355
1356####### [PT: Sep 1, 2020] the original version of this func after
1357####### 2to3; going back to replace the parts with end=' ' (see
1358####### above), for backward compatability.
1359#def FindGroupwiseTargets(All_sub_grid, ftype, ExternLabsOK, UNION=0):
1360#    '''Take a list of 4-tuples representing subject data (and a string
1361#    of what filetype it is), go through all, and find set of
1362#    intersecting matrix elements based on labels.  When done, return
1363#    that list of matrix elements, AND a list of the variables.'''
1364#
1365#    print('++ Going through sets of matrix element ROIs...')
1366#    Nsub = len(All_sub_grid)
1367#
1368#    if Nsub == 0:
1369#        print('** ERROR! No subjects found when looking for matrix set!')
1370#        return []
1371#    else:
1372#        print('\tFound %d subjects for finding ROI elements.' % Nsub)
1373#
1374#    ## For finding the set of ROI elements
1375#
1376#    # START EDITING HERE WITH USING LABELS
1377#    a = GetSet(All_sub_grid[0], ftype, ExternLabsOK)
1378#    print('\tThe number of elements in the ROI matrix set is:\n\t  ', end=' ')
1379#    for i in range(1,Nsub):
1380#        print('%d,' % len(a), end=' ')
1381#        if UNION:
1382#            # updating with the *union* of regions
1383#            a.update(GetSet(All_sub_grid[i], ftype, ExternLabsOK))
1384#        else:
1385#            a.intersection_update(GetSet(All_sub_grid[i], ftype, ExternLabsOK))
1386#    print('%d.' % len(a))
1387#
1388#    temp = list(a)
1389#    temp.sort()
1390#    print('\tThe set of ROIs is:')
1391#    for t in temp:
1392#        print('\t   %s' % t)
1393#
1394#    ## For finding the set of parameters
1395#    b = set(All_sub_grid[0][1])
1396#    for i in range(1,Nsub):
1397#        b.intersection_update(set(All_sub_grid[i][1]))
1398#    print('\tThe (final) number of parameters in the ROI matrix set is:'
1399#          '{}.'.format(len(b)))
1400#
1401#    temp2 = list(b)
1402#    temp2.sort()
1403#    print('\tThe set of parameters is (alphabetically):')
1404#    print('\t ', end=' ')
1405#    for t in temp2:
1406#        print(' %s ' % t, end=' ')
1407#    print('')
1408#    print('++ ... done getting set of ROIs.')
1409#    return temp, temp2  #a, b
1410
1411
1412###------------------------------------------------------------------
1413
1414def GetSet(x, ftype, ExternLabsOK):
1415    '''Input one of the 4-tuples returned by LoadInGridOrNetcc.
1416    Output is a 'set' of nonempty target-connecting elements.
1417    FOR NOW: this is specifically for a GRID file, where there's
1418    a nice 'NT' set of ints.'''
1419
1420    if not(ftype=='GRID' or ftype=='NETCC'):
1421        print("** ERROR: unrecognized matrix type. Don't know what to do.")
1422        sys.exit(34)
1423
1424    # default: use the boring old numbers themselves as ROI labels,
1425    # just zero-padding them.
1426    # switch: if there's an externally input list of labels, such as
1427    # from a labeltable, then use those.
1428    UseL = 3
1429    if (len(x) >= 4) and ExternLabsOK:
1430        if x[4]:
1431            UseL = 4
1432
1433
1434    #if ftype=='GRID': # later, grid vs netcc
1435    Ntar = len(x[UseL])
1436    roi_set = set()  # empty set
1437    # should be UHT, nondiagonal selector
1438    for i in range(Ntar):
1439        for j in range(i+1,Ntar):
1440            # NETCC: all UHT elements
1441            # GRID: UHT elements, with nonzero 'NT' values
1442            if ((ftype=='GRID') and x[0][x[2][GridCheckVar]][i,j]) \
1443                 or (ftype=='NETCC'):
1444                roi_set.add(ElementNameMaker(x[UseL][i], x[UseL][j],UseL-3))
1445
1446    return roi_set
1447
1448###------------------------------------------------------------------
1449
1450def ElementNameMaker(A, B, AS_IS):
1451    '''Current format for turning integer ROI labels A and B into ROI
1452    element name.  If AS_IS=0, then zero pad.  Else: use names as they
1453    is.'''
1454    # above, if UseL=3, then AS_IS=0 --> zero pad.
1455
1456    # paranoia
1457    if (type(A) != str) or (type(B) != str):
1458        print("** ERROR: problem in element name-maker. Help!")
1459        sys.exit(43)
1460
1461    # first case is original style
1462    # second case is if data labels are used
1463    if not(AS_IS) and A.isdigit() and B.isdigit():
1464        aint = int(A)
1465        bint = int(B)
1466
1467        if aint > 999:
1468            print("*+ POSSIBLE error in element maker: big label %d > 999!" % A)
1469            print("\tUgliness may ensue!")
1470        if bint > 999:
1471            print("*+ POSSIBLE error in element maker: big label %d > 999!" % B)
1472            print("\tUgliness may ensue!")
1473
1474        x = str("%03d" % (aint))
1475        y = str("%03d" % (bint))
1476
1477    else:
1478        x = str(A)
1479        y = str(B)
1480
1481    return x+ConnNames+y
1482
1483#### Old version: used to treat ROI labels of elements as ints;
1484#### new possibility that they will be strings now-- so treat all as str
1485#def ElementNameMaker(A, B):
1486#    '''Current format for turning integer ROI labels into
1487#    ROI element name.'''
1488#    # paranoia
1489#    if (type(A) != int) or (type(A) != int):
1490#        print "ERROR in element maker: why not 'int' element labels?"
1491#    if A > 999:
1492#        print "POSSIBLE error in element maker: big label %d > 999!" % A
1493#    if B > 999:
1494#        print "POSSIBLE error in element maker: big label %d > 999!" % B
1495#
1496#    x = str("%03d" % (A))
1497#    y = str("%03d" % (B))
1498#    return x+'_'+y
1499
1500
1501
1502###------------------------------------------------------------------
1503###------------------------------------------------------------------
1504###------------- STOP: Matrix element and set operations ------------
1505###------------------------------------------------------------------
1506###------------------------------------------------------------------
1507
1508
1509###------------------------------------------------------------------
1510###------------------------------------------------------------------
1511###----------------- START: Reading in CSV files --------------------
1512###------------------------------------------------------------------
1513###------------------------------------------------------------------
1514
1515# [PT: Aug. 31, 2020] this had to be updated a bit in the 2to3 shift
1516def LoadInCSV(fname):
1517    ''' open up a file, and return a 2-tuple: (raw) data set and header.'''
1518
1519    print('++ Opening and reading CSV file...')
1520
1521    data = []
1522    CSVheader = []
1523
1524    idx = 0
1525
1526    # [PT: Sep 1, 2020] In order to have backwards compatability with
1527    # Python2, the """newline=''""" was removed from this open
1528    # command.  This doesn't appear to negatively affect Python3
1529    # results, so keeping like this now.
1530    with open(fname) as csvfile :
1531    #with open(fname, newline='') as csvfile :
1532        csvread = csv.reader(csvfile) #, delimiter=' ', quotechar='|')
1533
1534        for row in csvread:
1535            if not(idx) and HeaderRowTrue:
1536                CSVheader = row
1537                idx+= 1
1538            else:
1539                data.append(row) #print row
1540
1541    CSVheader = Check_Header_Spaces(CSVheader)
1542
1543    print('++ ... done opening and reading CSV file.')
1544
1545    return data, CSVheader
1546
1547###### [PT: Aug 31, 2020] ... and this is the old Python v2 form of
1548###### the above func
1549#def LoadInCSV(fname):
1550#    ''' open up a file, and return a 2-tuple: (raw) data set and header.'''
1551#
1552#    filein = open(fname, 'rb')
1553#    csvread = csv.reader(filein)
1554#
1555#    print '++ Opening and reading CSV file...'
1556#
1557#    data = []
1558#    CSVheader = []
1559#
1560#    idx = 0
1561#    for row in csvread:
1562#        if not(idx) and HeaderRowTrue:
1563#            CSVheader = row
1564#            idx+= 1
1565#        else:
1566#            data.append(row) #print row
1567#    filein.close()
1568#
1569#    CSVheader = Check_Header_Spaces(CSVheader)
1570#
1571#    print '++ ... done opening and reading CSV file.'
1572#
1573#    return data, CSVheader
1574
1575###------------------------------------------------------------------
1576
1577def Check_Header_Spaces(X):
1578
1579    N = len(X)
1580
1581    for s in range(N):
1582        X[s] = X[s].strip()    # first strip away leading/trailing WS; aug,2015
1583        nn = X[s].rfind(' ')
1584        if nn >= 0:
1585            bleep = str(X[s])  # cheap copy
1586            X[s] = X[s].replace(' ','_')
1587            print("\tWarning: space in header name; replacing: '%s' -> '%s'." \
1588             % (bleep, X[s]))
1589
1590    return X
1591
1592###------------------------------------------------------------------
1593
1594def Choose_IntFloatStr(s):
1595    '''Check and see if the input string 's' represents a number:
1596          -> if it does, then return int (default) or float version;
1597          -> else, return the same string back.
1598       Returns error if the input is not of type 'str'.
1599    '''
1600
1601    if not(type(s) == str):
1602        print("BAD USAGE OF THIS FUNCTION: "
1603              "\tinput argument should be a string, not %s." % type(s))
1604        return s
1605
1606    try:
1607        float(s)
1608        try:
1609            return int(s)
1610        except ValueError:
1611            return float(s)
1612    except:
1613        return s
1614
1615###------------------------------------------------------------------
1616
1617def ConvertCSVfromStr(dat1, head1, NA_WARN):
1618    '''Take the all-string input and header lists in,
1619       and return a copy with ints, floats and strings.'''
1620
1621    print('++ Converting (hopefully appropriate) strings to numbers...')
1622
1623    # careful copying...  need to recursively copy each list
1624    # unattachedly
1625
1626    # Jan,2015 !!!!
1627    #    dat2 = [list(x) for x in dat1] # OLD method
1628    # ---> new method: skip empty lines
1629    FOUND_WS = 0
1630    FOUND_EL = 0
1631    dat2 = []
1632    for x in dat1:
1633        if x:                      # check if empty line
1634            if x[0].split() :      # check if just whitespace
1635                dat2.append(x)
1636            else:
1637                FOUND_WS = 1
1638        else:
1639            FOUND_EL = 1
1640
1641    if FOUND_WS:
1642        print("\t FYI: removed at least one whitespace line from CSV file.")
1643    if FOUND_EL:
1644        print("\t FYI: removed at least one empty line from CSV file.")
1645
1646    head2 = list(head1)
1647
1648    Lx,Ly = np.shape(dat2)
1649    for i in range(Lx):
1650        # usually, have first column be name labels, even if numeric
1651        for j in range(FirstColLabels, Ly):
1652            dat2[i][j] = Choose_IntFloatStr(dat2[i][j])
1653
1654    # initialize
1655    for j in range(Ly):
1656        head2[j] = type(dat2[0][j])
1657
1658    for j in range(Ly):
1659        ty0 = head2[j]
1660        for i in range(1,Lx):
1661            ty = type(dat2[i][j])
1662            if not(ty0 == ty):
1663                if ty0 == str :
1664                    print("Odd mixing in types of data in Col {} (={}) "
1665                          "\t-> seeing both {} and {}."
1666                          "".format(j, head1[j], ty0, ty))
1667                elif ty0 == int:
1668                    if ty == float:
1669                        print("\tCSV col {} -> making all floats. {}"
1670                              "".format(head1[j], dat2[i][j]))
1671                        head2[j] = float    # Oct, 2014 fix
1672                        for k in range(Lx):
1673                            if type(dat2[k][j]) == int:
1674                                dat2[k][j] = float(dat2[k][j])
1675                            else:
1676                                "Odd combo..."
1677                    elif (dat2[i][j] == 'NA'):
1678                        if NA_WARN:
1679                        #print "Loc (%d, %d) has value 'NA'." % (i,j)
1680                            print("*+ Warning: 'NA' value in Col %d (='%s') !" \
1681                             % ( j, head1[j] ))
1682                    else:
1683                        print("Odd mixing in types of data in Col {} (={}) "
1684                              "\t-> seeing both {} and {}."
1685                              "".format( j, head1[j], ty0, ty))
1686                elif ty0 == float:
1687                    if ty == int:
1688                        #print "\tin CSV col %d ->" % j,
1689                        #print "making %d float." % dat2[i][j]
1690                        dat2[i][j] = float(dat2[i][j])
1691                    elif dat2[i][j] == 'NA':
1692                        if NA_WARN:
1693                        #print "Loc (%d, %d) has value 'NA'." % (i,j)
1694                            print("*+ Warning: 'NA' value in Col %d (='%s') !" \
1695                             % ( j, head1[j] ))
1696                    else:
1697                        print("Odd mixing in types of data in Col {} (={}) "
1698                              "\t-> seeing both {} and {}."
1699                              "".format( j, head1[j], ty0, ty))
1700                else:
1701                    print("Extra odd: non str, int or float type? %s" % ty0)
1702
1703    print('++ ... done with CSV str->numeric conversions.')
1704
1705    return dat2, head2
1706
1707
1708###------------------------------------------------------------------
1709###------------------------------------------------------------------
1710###----------------- STOP: Reading in CSV files ---------------------
1711###------------------------------------------------------------------
1712###------------------------------------------------------------------
1713
1714
1715###------------------------------------------------------------------
1716###------------------------------------------------------------------
1717###--------------START: Reading in GRID/NETCC files -----------------
1718###------------------------------------------------------------------
1719###------------------------------------------------------------------
1720
1721def LoadInGridOrNetcc(fname):
1722    ''' Input the grid file name, and return:
1723    1)  a list of data [NMAT, NROI, NROI];
1724    2)  a list of labels [NMAT];
1725    3)  a supplementary dictionary for each calling use;
1726    4)  and a list of (int) ROI labels (which may not be consecutive).'''
1727
1728    x = ReadInGridOrNetcc(fname)
1729    y1,y2,y3,y4,y5 = SepHeaderFile(x, fname)
1730    return y1,y2,y3,y4,y5
1731
1732###------------------------------------------------------------------
1733
1734def ReadInGridOrNetcc(fname):
1735    ''' open, read file, return raw/unsplit output.'''
1736    fff = open(fname,'r')
1737    x = fff.readlines()
1738    fff.close()
1739    return x
1740
1741###------------------------------------------------------------------
1742
1743def SepHeaderFile(RawX, fname):
1744    ''' Take in the data and return:
1745    1)  a list of data [NMAT, NROI, NROI];
1746    2)  a list of labels [NMAT];
1747    3)  a supplementary dictionary for each calling use;
1748    4)  and a list of (int) ROI labels (which may not be consecutive).'''
1749
1750    Nroi,Nmat,ListLabels,ReadNext,ROI_str_labs = \
1751     HeaderInfo(RawX[:NLinesGridHead]) # header
1752
1753    y = RawX[ReadNext:]  # info grids
1754
1755    y = RemoveEmptyWhitespaceFromReadlines(y, fname)
1756
1757    Ly = len(y)
1758
1759    if not( Ly == ((Nroi+1) * Nmat) ):
1760        print("ERROR reading grid file!")
1761        print("Number of non-header lines don't match Nmatrices (%d)" % (Nmat))
1762        return [0,0]
1763
1764    OUT_data = []
1765    OUT_labs = []
1766    OUT_dlab = {}
1767    ctr = 0
1768    for i in range(Nmat):
1769        OUT_labs.append(y[ctr].split()[1])
1770        OUT_dlab[OUT_labs[i]]=i
1771        ctr+=1
1772        if (OUT_labs[i] == 'NT'):
1773            temp = np.zeros((Nroi,Nroi),dtype=int)
1774            for j in range(Nroi):
1775                temp[j,:] = [int(xx) for xx in y[ctr].split()]
1776                ctr+=1
1777        else:
1778            temp = np.zeros((Nroi,Nroi))
1779            for j in range(Nroi):
1780                temp[j,:] = [float(xx) for xx in y[ctr].split()]
1781                ctr+=1
1782
1783        OUT_data.append(np.array(temp))
1784    return OUT_data, OUT_labs, OUT_dlab,ListLabels,ROI_str_labs
1785
1786###------------------------------------------------------------------
1787
1788def RemoveEmptyWhitespaceFromReadlines(x, fname):
1789
1790    N = len(x)
1791    whitespace = 0
1792    for i in range(1,N+1,1):
1793        if x[-i].split() == []:
1794            whitespace = i
1795        else:
1796            break
1797
1798    if whitespace:
1799        print("\tNB: ignored %d lines of whitespace from the end of '%s'." \
1800     % ( whitespace, fname ))
1801        return x[:-whitespace]
1802    else:
1803        return x
1804
1805###------------------------------------------------------------------
1806
1807def HeaderInfo(RawX):
1808    ''' Read in necessary numbers from first few lines. '''
1809    if not(RawX[0].split()[0] == '#'):
1810        print("** ERROR: the file format doesn't look modern!")
1811        print("**\t-> run Grid_Modernizer?")
1812        sys.exit(52)
1813
1814    Nroi = int(RawX[0].split()[1])
1815    if Nroi <=0:
1816        print("ERROR reading header! Number of ROIs is: %d" % Nroi)
1817        sys.exit(53)
1818
1819    Nmat = int(RawX[1].split()[1])
1820    if Nmat <=0:
1821        print("ERROR reading header! Number of Matrices is: %d" % Nmat)
1822        sys.exit(54)
1823
1824    # Sept 2014: might have labels here.
1825    # defaults
1826    #HAVE_LABS = 0
1827    listreadline = 2
1828    ReadNext = 3
1829    ROI_str_labs = []
1830    # check about changing if LABELS are input
1831    temp = RawX[2].split()
1832    if len(temp) > 1:
1833        if temp[1] == HEADER_Labels:
1834            #HAVE_LABS = 1
1835            listreadline = 4
1836            ReadNext = 5
1837            ROI_str_labs = RawX[3].split()
1838            ll = len(ROI_str_labs)
1839            if not( ll == Nroi ):
1840                print("ERROR reading header!")
1841                print("  Number of ROIs (%d) doesn't match stringnames (%d)" % \
1842                 (Nroi,ll))
1843                sys.exit(55)
1844
1845    #if HAVE_LABS :
1846    #    print "++ Have ROI LABELS: reading."
1847
1848    ListLabels=RawX[listreadline].split()
1849    ll = len(ListLabels)
1850    if not( ll == Nroi ):
1851        print("ERROR reading header!")
1852        print("  Number of ROIs (%d) doesn't match Labels (%d)" % (Nroi,ll))
1853        sys.exit(55)
1854
1855    ### Don't do this--> may have non-numeric labels
1856#    for i in range(ll):
1857#        ListLabels[i] = int(ListLabels[i])
1858#        if ListLabels[i] <=0:
1859#            print "ERROR reading header! ListLabel[%d]=%d." % \
1860#            (i,ListLabels[i])
1861#            sys.exit(56)
1862
1863    return Nroi, Nmat, ListLabels, ReadNext, ROI_str_labs
1864
1865###------------------------------------------------------------------
1866###------------------------------------------------------------------
1867###-------------- STOP: Reading in GRID/NETCC files -----------------
1868###------------------------------------------------------------------
1869###------------------------------------------------------------------
1870
1871
1872
1873###------------------------------------------------------------------
1874###------------------------------------------------------------------
1875###---------- START: Modernizing/rewriting info into better ---------
1876###------------------------------------------------------------------
1877###------------------------------------------------------------------
1878
1879
1880def OldFash_Grid_modernize(fname, outname):
1881
1882    x = ReadInGridOrNetcc(fname)
1883    y = mod_func_break(x, fname, outname)
1884
1885    return y
1886
1887###------------------------------------------------------------------
1888
1889def mod_func_break(X, fname, outname):
1890
1891    lnum = 4 # starting line number for reading as of Sept 2013
1892
1893    # 1)
1894    Nroi = int(X[0].split()[0])
1895
1896    # 2)
1897    ROI_LABS = X[2].split()
1898
1899    # 3)
1900    Npar = len(OldFash_parnames)
1901    Y = np.zeros((Npar,Nroi,Nroi))
1902
1903    for i in range( Npar ):
1904        k = 0
1905        for j in range(lnum,lnum+Nroi):
1906            Y[i,k,:] = np.array(X[j].split(),dtype=np.float32)
1907            k+=1
1908        lnum = j+2
1909
1910    z = mod_func_write(Nroi, ROI_LABS, Y, fname, outname)
1911
1912    return Y, z
1913
1914###------------------------------------------------------------------
1915
1916def DefaultNamingOutGrid(list_all):
1917
1918    out = []
1919
1920    for x in list_all:
1921        if not(x[-5:] == '.grid'):
1922            print("ERROR-- doesn't look like you're modernizing a *.grid file!")
1923
1924        out.append(x[:-5]+'_MOD'+x[-5:])
1925
1926    return out
1927
1928
1929def mod_func_write(Nroi, ROI_LABS, Y, fname, outname):
1930
1931    Ngrid=np.shape(Y)[0]
1932    if not(Ngrid == len(OldFash_parnames)):
1933        print("ERROR-- doesn't look like you have the right number of")
1934        print("\tmatrices in the *.grid file!")
1935
1936    f = open(outname, 'w')
1937    f.write('# %d # %s\n' % (Nroi, HEADER_Nroi))
1938    f.write('# %d # %s\n' % (Ngrid, HEADER_Ngrid))
1939
1940    #f.write('# %s \n' % HEADER_Labels)
1941    #for roi in ROI_LABS:
1942    #    tt = "Funcky%dFresh" % int(roi)
1943    #    f.write("%12s\t" % tt)
1944    #f.write('\n')
1945
1946    for roi in ROI_LABS:
1947        f.write("%12s\t" % roi)
1948    f.write('\n')
1949
1950    for i in range(Ngrid):
1951        f.write('# %s\n' % OldFash_parnames[i])
1952        for j in range(Nroi):
1953            for k in range(Nroi):
1954                if OldFash_parnames[i]=='NT':
1955                    f.write('%12d\t' % Y[i][j][k])
1956                else:
1957                    f.write('%12e\t' % Y[i][j][k])
1958            f.write('\n')
1959
1960    f.close()
1961
1962    return (1)
1963
1964###------------------------------------------------------------------
1965###------------------------------------------------------------------
1966###------------ STOP: Modernizing/rewriting info into better ---------
1967###------------------------------------------------------------------
1968###------------------------------------------------------------------
1969
1970
1971
1972
1973
1974### -----------------------------------------------------------------
1975### -----------------------------------------------------------------
1976
1977