1#!/usr/local/bin/python3.8
2
3########################################################################
4## 05/2018 Justin Rajendra
5## wrapper for Gang's Bayesian Group Analysis
6
7## system libraries
8import sys, os, glob, subprocess, re, argparse, csv, signal, textwrap, shutil
9
10## location and name of the R program that is run
11R_script = sys.path[0]+'/BayesianGroupAna.R'
12
13## check for valid integers
14def int_gt_0(v):
15    try:
16        vi = int(v)
17        if vi >= 1:
18            return v
19        else:
20            raise argparse.ArgumentTypeError("%r is not an integer >= 1."%(v,))
21    except:
22        raise argparse.ArgumentTypeError("%r is not an integer >= 1."%(v,))
23
24def zero2one(p):
25    try:
26        float(p)
27    except:
28        raise argparse.ArgumentTypeError("%r is not positive value < 1."%(p,))
29    if float(p) < 0.0 or float(p) > 1.0:
30        raise argparse.ArgumentTypeError("%r is not positive value < 1."%(p,))
31    return p
32
33## check for rectiness of mvm table
34def data_is_rect(mdata):
35    if mdata == None: return 1
36    if len(mdata) == 0: return 1
37    rlen = len(mdata[0])
38    for row in mdata:
39        if len(row) != rlen: return 0
40    return 1
41
42########################################################################
43## parse command line arguments / build help
44
45## make parser with help
46parser = argparse.ArgumentParser(prog=str(sys.argv[0]),
47                                 formatter_class=argparse.RawDescriptionHelpFormatter,
48                                 description=textwrap.dedent('''\
49------------------------------------------
50Overview ~1~
51
52    This program conducts Bayesian Group Analysis (BGA) on a list
53    (e.g., 6 or more) of regions of interest (ROIs) as laid out in Chen et al.
54    (2018, https://www.biorxiv.org/content/early/2018/02/20/238998).
55    Compared to the conventional univariate GLM in which each voxel or ROI is
56    considered autonomous and analyzed independently, BGA pools and shares the
57    information across the ROIs in a multilevel system. It is the
58    probability of incorrect sign, instead of false positive rate that is
59    controlled. In other words, there is only one BGA model that incorporates
60    the data from all ROIs.
61
62    This will explore the effect of X on Y at each ROI. The computation may
63    take a few minutes or more depending on the amount of input data and
64    model complexity. The final inferences are conducted through the
65    posterior distribution or quantile intervals for each effect that are
66    provided in a table in the output. A boxplot can also be generated if
67    requested with -plot.
68
69    The computation requires that the R package "brms" be installed
70    (e.g., through rPkgsInstall).
71    More info on the brms package can be found here:
72        https://CRAN.R-project.org/package=brms
73    And the brms reference manual is here:
74        https://cran.r-project.org/web/packages/brms/brms.pdf
75
76Details ~1~
77
78    Similar to 3dMVM and 3dLME, a data table  should be created containing
79    the input data and relevant variables (with at least 3 columns: subject
80    labels, ROI labels, response variable values).
81
82    The -dataTable should be formated as follows:
83
84        Subj  ROI   some_y  some_x other_x
85        S001  roi1  0.12    0.056  0.356
86        S001  roi2  0.65    0.232  0.231
87        S002  roi1  0.14    0.456  0.856
88        S002  roi2  0.64    0.432  0.431
89        ...
90
91    The Subj and ROI columns must be included with the exact spelling!!
92    If there are no x variables, only the intercept will be calculated.
93
94Outputs ~1~
95
96    Given -prefix is "gangBGA" and -x is "some_x", the default outputs are the
97    following files:
98
99        gangBGA_summary.txt:
100            Summary of the brmsfit object from R.
101
102        gangBGA_rhats.csv:
103            rhats for each effect and x variable combination.
104
105        gangBGA_Intercept_table.csv:
106            Table with the MedianEst, StdDev, 2.50%, 5%, 50%, 95%, and 97.50%
107            of each ROI for the Intercept term.
108
109        gangBGA_some_x_table.csv:
110            The same table as the Intercept but for the some_x variable.
111
112Caveats ~1~
113
114    All x variables are centered by default.
115
116    The boxplot with -plot is not a standard boxplot.
117    It is a plot of the 2.50%, 5%, 50%, 95%, 97.50% percentiles.
118    The coloring of the boxes is determined by where the zero line crosses the
119    box and whiskers.
120        White:  The zero line crosses the main box (between 5% and 95%).
121        Purple: The zero line crosses between the whiskers and the main box.
122                (2.50% to 5%) OR (95% to 97.50%)
123        Red:    The zero line does not cross the box or the whiskers.
124
125    Additional plot types for -more_plots include (not sure all of these work):
126        hist dens hist_by_chain dens_overlay violin intervalsareas
127        acf acf_bar trace trace_highlight rhat rhat_hist neff neff_hist
128        nuts_acceptance nuts_divergence nuts_stepsize nuts_treedepth
129        nuts_energy
130
131    Tables and plots will be created for the intercept and all specified x
132    variables separately. So there may be a lot of output.
133
134Examples ~1~
135
136    Minimum requirement only calculates the intercept (may not be useful).
137        BayesianGroupAna.py -dataTable my_roi_data.txt -y zscore
138
139    More useful. Calculates 2 x variables and saves out some plots.
140        BayesianGroupAna.py -dataTable my_roi_data.txt  \\
141                            -prefix dock_of_the_bayes   \\
142                            -y zscore -x some_x other_x \\
143                            -chains 4 -iterations 1000  \\
144                            -plot -more_plots rhat violin
145
146------------------------------------------
147
148Options ~1~
149
150                                 '''),epilog=textwrap.dedent('''\
151------------------------------------------
152Justin Rajendra circa 05/2018
1534 Gang Box...
154Keep on keeping on!
155------------------------------------------
156                                 '''))
157
158parser._optionals.title = 'Optional arguments'
159required = parser.add_argument_group('Required arguments')
160
161parser._action_groups.reverse()
162
163## required
164required.add_argument('-dataTable',type=str,help='Input text file.',
165                      required=True)
166required.add_argument('-y',type=str,help='Column name for the y variable.',
167                      required=True,metavar='VAR')
168
169## optional
170parser.add_argument('-help',action='help',help='Show this help.')
171parser.add_argument('-prefix',type=str,default="BayesOut",
172                    help="Name of the output file.")
173parser.add_argument('-x',type=str,default='1',nargs='+',metavar='VAR',
174                      help=('Column name for the x variables. '+
175                            'If not specified, only the intercept will be '+
176                            'added.'))
177parser.add_argument('-no_center',action='store_true',default=False,
178                    help=("Disable centering on the x variables. "+
179                          "Maybe useful if you centered manually."))
180parser.add_argument('-iterations',type=int_gt_0,default=1000,metavar='ITER',
181                    help=("Number of total iterations per chain including"+
182                          " warmup. Default [1000]"))
183parser.add_argument('-chains',type=int_gt_0,default=4,
184                    help="Number of Markov chains. Default [4]")
185parser.add_argument('-control_list',type=str,default="",metavar="LIST",
186                    help=("Comma separated list of control parameters to"+
187                          " pass to the brm function. (example:"+
188                          " 'adapt_delta=0.99,max_treedepth=20')."+
189                          " Default is the brm function defaults"))
190parser.add_argument('-plot',action='store_true',default=False,
191                    help="Output box, fit, and posterior prediction plots.")
192parser.add_argument('-more_plots',type=str,default='',nargs='+',
193                    metavar='TYPE',help=('Output "stanplots" given different'+
194                                         ' types of plot names.'))
195parser.add_argument('-RData',action='store_true',default=False,
196                    help="Save the R session workspace and data.")
197parser.add_argument('-seed',type=int_gt_0,default=1234,
198                    help="Seed to generate random number. Default [1234]")
199parser.add_argument('-overwrite',action='store_true',default=False,
200                    help="Overwrites the output files.")
201
202## if nothing, show help
203if len(sys.argv) == 1:
204    parser.print_help()
205    sys.exit(1)
206
207## do the parsing
208args = parser.parse_args()
209
210########################################################################
211## collect the arguments (need strings) and validate
212InFile = args.dataTable
213y_var = args.y
214x_var = args.x
215OutFile = args.prefix
216NumIter = str(args.iterations)
217NumChains = str(args.chains)
218control_list = args.control_list
219InSeed = str(args.seed)
220StanPlots = args.more_plots
221overwrite = args.overwrite
222
223## make caps for R code (check for plots)
224plot = "TRUE" if args.plot or len(StanPlots) > 0 else "FALSE"
225NoCenter = "TRUE" if args.no_center else "FALSE"
226RData = "TRUE" if args.RData else "FALSE"
227
228########################################################################
229## validate
230
231## check for output rhats
232if os.path.isfile(OutFile+"_rhats.csv") and not overwrite:
233    print("\nERROR: Output file ("+OutFile+"_rhats.csv) exists!!\n")
234    sys.exit(1)
235
236## check for input file
237if not os.path.isfile(InFile):
238    print("\nERROR: Input file ("+InFile+") not found!!\n")
239    sys.exit(1)
240
241## make sure the table is valid
242test_table = []
243with open(InFile) as fin:
244    rows = (re.split('[ \t,;]+', line) for line in fin)  ## any delimiter?
245    for row in rows:
246        test_table.append(row)
247if not data_is_rect(test_table):
248    print("\nERROR: Input file ("+InFile+") is not a rectangular table!!\n")
249    sys.exit(1)
250
251## check plotting options
252## standard plot choices
253stanplot_types = ['hist','dens','hist_by_chain','dens_overlay','violin',
254                  'intervals','areas','acf','acf_bar','trace',
255                  'trace_highlight','rhat','rhat_hist','neff',
256                  'neff_hist','nuts_acceptance','nuts_divergence',
257                  'nuts_stepsize','nuts_treedepth',' nuts_energy']
258
259## do not work yet: 'scatter',
260
261## check the plotting types
262if plot == "TRUE":
263    print("\nPlotting: \nboxes\nfit\nPostPred")
264    if set(StanPlots) < set(stanplot_types):
265        for s in StanPlots: print(s)
266        print("")
267        StanPlots = " ".join(StanPlots)
268    else:
269        print("")
270        for p in StanPlots:
271            if p not in stanplot_types:
272                print("ERROR: "+p+" is not a valid plot type!!!")
273        print("")
274        sys.exit(1)
275
276########################################################################
277## check the variable names
278
279print("\nVariables in the table are:\n")
280var_names = [item.strip() for item in test_table[0]]
281for var in var_names: print(var)
282print("")
283
284if "Subj" not in var_names:
285    print("\nERROR: Missing 'Subj' column!! Spelling counts!!\n")
286    sys.exit(1)
287if "ROI" not in var_names:
288    print("\nERROR: Missing 'ROI' column!! Spelling counts!!\n")
289    sys.exit(1)
290if y_var not in var_names:
291    print("\nERROR: "+y_var+" is not in the list of variables!! "+
292          "Spelling counts!!\n")
293    sys.exit(1)
294if x_var != '1':
295    for x in x_var:
296        if x not in var_names:
297            print("\nERROR: "+x+" is not in the list of variables!!"+
298                  " Spelling counts!!\n")
299            sys.exit(1)
300
301## convert x_vars to space separated string
302x_var = " ".join(x_var)
303
304## add intercept if necessary
305if x_var is not "1":
306    x_var = "1 "+x_var
307
308########################################################################
309## run the R script with arguments
310
311subprocess.call(['Rscript',R_script,InFile,y_var,x_var,OutFile,
312                 NumIter,NumChains,control_list,InSeed,os.getcwd(),plot,
313                 StanPlots,RData,NoCenter])
314
315## check output
316if os.path.isfile(OutFile+"_rhats.csv"):
317    print("\nProcessing complete! Output is "+OutFile+"_rhats.csv\n")
318    sys.exit(0)
319else:
320    print("\nERROR: Something failed in R!! There is no output file!\n")
321    sys.exit(1)
322
323
324