1#!/usr/local/bin/python3.8 2 3######################################################################## 4## 05/2018 Justin Rajendra 5## wrapper for Gang's Bayesian Group Analysis 6 7## system libraries 8import sys, os, glob, subprocess, re, argparse, csv, signal, textwrap, shutil 9 10## location and name of the R program that is run 11R_script = sys.path[0]+'/BayesianGroupAna.R' 12 13## check for valid integers 14def int_gt_0(v): 15 try: 16 vi = int(v) 17 if vi >= 1: 18 return v 19 else: 20 raise argparse.ArgumentTypeError("%r is not an integer >= 1."%(v,)) 21 except: 22 raise argparse.ArgumentTypeError("%r is not an integer >= 1."%(v,)) 23 24def zero2one(p): 25 try: 26 float(p) 27 except: 28 raise argparse.ArgumentTypeError("%r is not positive value < 1."%(p,)) 29 if float(p) < 0.0 or float(p) > 1.0: 30 raise argparse.ArgumentTypeError("%r is not positive value < 1."%(p,)) 31 return p 32 33## check for rectiness of mvm table 34def data_is_rect(mdata): 35 if mdata == None: return 1 36 if len(mdata) == 0: return 1 37 rlen = len(mdata[0]) 38 for row in mdata: 39 if len(row) != rlen: return 0 40 return 1 41 42######################################################################## 43## parse command line arguments / build help 44 45## make parser with help 46parser = argparse.ArgumentParser(prog=str(sys.argv[0]), 47 formatter_class=argparse.RawDescriptionHelpFormatter, 48 description=textwrap.dedent('''\ 49------------------------------------------ 50Overview ~1~ 51 52 This program conducts Bayesian Group Analysis (BGA) on a list 53 (e.g., 6 or more) of regions of interest (ROIs) as laid out in Chen et al. 54 (2018, https://www.biorxiv.org/content/early/2018/02/20/238998). 55 Compared to the conventional univariate GLM in which each voxel or ROI is 56 considered autonomous and analyzed independently, BGA pools and shares the 57 information across the ROIs in a multilevel system. It is the 58 probability of incorrect sign, instead of false positive rate that is 59 controlled. In other words, there is only one BGA model that incorporates 60 the data from all ROIs. 61 62 This will explore the effect of X on Y at each ROI. The computation may 63 take a few minutes or more depending on the amount of input data and 64 model complexity. The final inferences are conducted through the 65 posterior distribution or quantile intervals for each effect that are 66 provided in a table in the output. A boxplot can also be generated if 67 requested with -plot. 68 69 The computation requires that the R package "brms" be installed 70 (e.g., through rPkgsInstall). 71 More info on the brms package can be found here: 72 https://CRAN.R-project.org/package=brms 73 And the brms reference manual is here: 74 https://cran.r-project.org/web/packages/brms/brms.pdf 75 76Details ~1~ 77 78 Similar to 3dMVM and 3dLME, a data table should be created containing 79 the input data and relevant variables (with at least 3 columns: subject 80 labels, ROI labels, response variable values). 81 82 The -dataTable should be formated as follows: 83 84 Subj ROI some_y some_x other_x 85 S001 roi1 0.12 0.056 0.356 86 S001 roi2 0.65 0.232 0.231 87 S002 roi1 0.14 0.456 0.856 88 S002 roi2 0.64 0.432 0.431 89 ... 90 91 The Subj and ROI columns must be included with the exact spelling!! 92 If there are no x variables, only the intercept will be calculated. 93 94Outputs ~1~ 95 96 Given -prefix is "gangBGA" and -x is "some_x", the default outputs are the 97 following files: 98 99 gangBGA_summary.txt: 100 Summary of the brmsfit object from R. 101 102 gangBGA_rhats.csv: 103 rhats for each effect and x variable combination. 104 105 gangBGA_Intercept_table.csv: 106 Table with the MedianEst, StdDev, 2.50%, 5%, 50%, 95%, and 97.50% 107 of each ROI for the Intercept term. 108 109 gangBGA_some_x_table.csv: 110 The same table as the Intercept but for the some_x variable. 111 112Caveats ~1~ 113 114 All x variables are centered by default. 115 116 The boxplot with -plot is not a standard boxplot. 117 It is a plot of the 2.50%, 5%, 50%, 95%, 97.50% percentiles. 118 The coloring of the boxes is determined by where the zero line crosses the 119 box and whiskers. 120 White: The zero line crosses the main box (between 5% and 95%). 121 Purple: The zero line crosses between the whiskers and the main box. 122 (2.50% to 5%) OR (95% to 97.50%) 123 Red: The zero line does not cross the box or the whiskers. 124 125 Additional plot types for -more_plots include (not sure all of these work): 126 hist dens hist_by_chain dens_overlay violin intervalsareas 127 acf acf_bar trace trace_highlight rhat rhat_hist neff neff_hist 128 nuts_acceptance nuts_divergence nuts_stepsize nuts_treedepth 129 nuts_energy 130 131 Tables and plots will be created for the intercept and all specified x 132 variables separately. So there may be a lot of output. 133 134Examples ~1~ 135 136 Minimum requirement only calculates the intercept (may not be useful). 137 BayesianGroupAna.py -dataTable my_roi_data.txt -y zscore 138 139 More useful. Calculates 2 x variables and saves out some plots. 140 BayesianGroupAna.py -dataTable my_roi_data.txt \\ 141 -prefix dock_of_the_bayes \\ 142 -y zscore -x some_x other_x \\ 143 -chains 4 -iterations 1000 \\ 144 -plot -more_plots rhat violin 145 146------------------------------------------ 147 148Options ~1~ 149 150 '''),epilog=textwrap.dedent('''\ 151------------------------------------------ 152Justin Rajendra circa 05/2018 1534 Gang Box... 154Keep on keeping on! 155------------------------------------------ 156 ''')) 157 158parser._optionals.title = 'Optional arguments' 159required = parser.add_argument_group('Required arguments') 160 161parser._action_groups.reverse() 162 163## required 164required.add_argument('-dataTable',type=str,help='Input text file.', 165 required=True) 166required.add_argument('-y',type=str,help='Column name for the y variable.', 167 required=True,metavar='VAR') 168 169## optional 170parser.add_argument('-help',action='help',help='Show this help.') 171parser.add_argument('-prefix',type=str,default="BayesOut", 172 help="Name of the output file.") 173parser.add_argument('-x',type=str,default='1',nargs='+',metavar='VAR', 174 help=('Column name for the x variables. '+ 175 'If not specified, only the intercept will be '+ 176 'added.')) 177parser.add_argument('-no_center',action='store_true',default=False, 178 help=("Disable centering on the x variables. "+ 179 "Maybe useful if you centered manually.")) 180parser.add_argument('-iterations',type=int_gt_0,default=1000,metavar='ITER', 181 help=("Number of total iterations per chain including"+ 182 " warmup. Default [1000]")) 183parser.add_argument('-chains',type=int_gt_0,default=4, 184 help="Number of Markov chains. Default [4]") 185parser.add_argument('-control_list',type=str,default="",metavar="LIST", 186 help=("Comma separated list of control parameters to"+ 187 " pass to the brm function. (example:"+ 188 " 'adapt_delta=0.99,max_treedepth=20')."+ 189 " Default is the brm function defaults")) 190parser.add_argument('-plot',action='store_true',default=False, 191 help="Output box, fit, and posterior prediction plots.") 192parser.add_argument('-more_plots',type=str,default='',nargs='+', 193 metavar='TYPE',help=('Output "stanplots" given different'+ 194 ' types of plot names.')) 195parser.add_argument('-RData',action='store_true',default=False, 196 help="Save the R session workspace and data.") 197parser.add_argument('-seed',type=int_gt_0,default=1234, 198 help="Seed to generate random number. Default [1234]") 199parser.add_argument('-overwrite',action='store_true',default=False, 200 help="Overwrites the output files.") 201 202## if nothing, show help 203if len(sys.argv) == 1: 204 parser.print_help() 205 sys.exit(1) 206 207## do the parsing 208args = parser.parse_args() 209 210######################################################################## 211## collect the arguments (need strings) and validate 212InFile = args.dataTable 213y_var = args.y 214x_var = args.x 215OutFile = args.prefix 216NumIter = str(args.iterations) 217NumChains = str(args.chains) 218control_list = args.control_list 219InSeed = str(args.seed) 220StanPlots = args.more_plots 221overwrite = args.overwrite 222 223## make caps for R code (check for plots) 224plot = "TRUE" if args.plot or len(StanPlots) > 0 else "FALSE" 225NoCenter = "TRUE" if args.no_center else "FALSE" 226RData = "TRUE" if args.RData else "FALSE" 227 228######################################################################## 229## validate 230 231## check for output rhats 232if os.path.isfile(OutFile+"_rhats.csv") and not overwrite: 233 print("\nERROR: Output file ("+OutFile+"_rhats.csv) exists!!\n") 234 sys.exit(1) 235 236## check for input file 237if not os.path.isfile(InFile): 238 print("\nERROR: Input file ("+InFile+") not found!!\n") 239 sys.exit(1) 240 241## make sure the table is valid 242test_table = [] 243with open(InFile) as fin: 244 rows = (re.split('[ \t,;]+', line) for line in fin) ## any delimiter? 245 for row in rows: 246 test_table.append(row) 247if not data_is_rect(test_table): 248 print("\nERROR: Input file ("+InFile+") is not a rectangular table!!\n") 249 sys.exit(1) 250 251## check plotting options 252## standard plot choices 253stanplot_types = ['hist','dens','hist_by_chain','dens_overlay','violin', 254 'intervals','areas','acf','acf_bar','trace', 255 'trace_highlight','rhat','rhat_hist','neff', 256 'neff_hist','nuts_acceptance','nuts_divergence', 257 'nuts_stepsize','nuts_treedepth',' nuts_energy'] 258 259## do not work yet: 'scatter', 260 261## check the plotting types 262if plot == "TRUE": 263 print("\nPlotting: \nboxes\nfit\nPostPred") 264 if set(StanPlots) < set(stanplot_types): 265 for s in StanPlots: print(s) 266 print("") 267 StanPlots = " ".join(StanPlots) 268 else: 269 print("") 270 for p in StanPlots: 271 if p not in stanplot_types: 272 print("ERROR: "+p+" is not a valid plot type!!!") 273 print("") 274 sys.exit(1) 275 276######################################################################## 277## check the variable names 278 279print("\nVariables in the table are:\n") 280var_names = [item.strip() for item in test_table[0]] 281for var in var_names: print(var) 282print("") 283 284if "Subj" not in var_names: 285 print("\nERROR: Missing 'Subj' column!! Spelling counts!!\n") 286 sys.exit(1) 287if "ROI" not in var_names: 288 print("\nERROR: Missing 'ROI' column!! Spelling counts!!\n") 289 sys.exit(1) 290if y_var not in var_names: 291 print("\nERROR: "+y_var+" is not in the list of variables!! "+ 292 "Spelling counts!!\n") 293 sys.exit(1) 294if x_var != '1': 295 for x in x_var: 296 if x not in var_names: 297 print("\nERROR: "+x+" is not in the list of variables!!"+ 298 " Spelling counts!!\n") 299 sys.exit(1) 300 301## convert x_vars to space separated string 302x_var = " ".join(x_var) 303 304## add intercept if necessary 305if x_var is not "1": 306 x_var = "1 "+x_var 307 308######################################################################## 309## run the R script with arguments 310 311subprocess.call(['Rscript',R_script,InFile,y_var,x_var,OutFile, 312 NumIter,NumChains,control_list,InSeed,os.getcwd(),plot, 313 StanPlots,RData,NoCenter]) 314 315## check output 316if os.path.isfile(OutFile+"_rhats.csv"): 317 print("\nProcessing complete! Output is "+OutFile+"_rhats.csv\n") 318 sys.exit(0) 319else: 320 print("\nERROR: Something failed in R!! There is no output file!\n") 321 sys.exit(1) 322 323 324