1#!/usr/bin/env python 2############################################################################### 3# # 4# groopm # 5# # 6# Entry point. See groopm/groopm.py for internals # 7# # 8# Copyright (C) Michael Imelfort # 9# # 10############################################################################### 11# # 12# .d8888b. 888b d888 # 13# d88P Y88b 8888b d8888 # 14# 888 888 88888b.d88888 # 15# 888 888d888 .d88b. .d88b. 88888b. 888Y88888P888 # 16# 888 88888 888P" d88""88b d88""88b 888 "88b 888 Y888P 888 # 17# 888 888 888 888 888 888 888 888 888 888 Y8P 888 # 18# Y88b d88P 888 Y88..88P Y88..88P 888 d88P 888 " 888 # 19# "Y8888P88 888 "Y88P" "Y88P" 88888P" 888 888 # 20# 888 # 21# 888 # 22# 888 # 23# # 24############################################################################### 25# # 26# This program is free software: you can redistribute it and/or modify # 27# it under the terms of the GNU General Public License as published by # 28# the Free Software Foundation, either version 3 of the License, or # 29# (at your option) any later version. # 30# # 31# This program is distributed in the hope that it will be useful, # 32# but WITHOUT ANY WARRANTY; without even the implied warranty of # 33# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 34# GNU General Public License for more details. # 35# # 36# You should have received a copy of the GNU General Public License # 37# along with this program. If not, see <http://www.gnu.org/licenses/>. # 38# # 39############################################################################### 40 41__author__ = "Michael Imelfort" 42__copyright__ = "Copyright 2012-2014" 43__credits__ = ["Michael Imelfort"] 44__license__ = "GPL3" 45__version__ = "0.3.4" 46__maintainer__ = "Michael Imelfort" 47__email__ = "mike@mikeimelfort.com" 48__status__ = "Released" 49 50############################################################################### 51 52import argparse 53import sys 54import re 55from groopm import groopm 56 57############################################################################### 58############################################################################### 59############################################################################### 60############################################################################### 61 62def printHelp(): 63 print '''\ 64 65 ...::: GroopM :::... 66 67 Automagical metagenomic binning FTW! 68 69 ------------------------------------------------------------------------- 70 version: %s 71 ------------------------------------------------------------------------- 72 73 Typical workflow: 74 75 groopm parse -> Load the raw data and save to disk 76 groopm core -> Create core bins 77 groopm refine -> Refine these cores a little 78 groopm recruit -> Add more contigs to the cores 79 groopm extract -> Extract binned contigs or reads 80 81 Extra features: 82 83 Utilities: 84 85 groopm merge -> Merge two or more bins 86 groopm split -> Split a bin into N parts 87 groopm delete -> Delete a bin 88 89 Printing, plotting: 90 91 groopm explore -> Methods for viewing bin layouts 92 groopm plot -> Plot bins 93 groopm highlight -> Highlight individual bins and apply labels 94 groopm flyover -> Create a movie of your data 95 groopm print -> Print summary statistics 96 97 Import, export: 98 99 groopm dump -> Write database fields to csv 100 101 USE: groopm OPTION -h to see detailed options 102 ''' % __version__ 103 104# groopm import -> Import data from csv 105 106if __name__ == '__main__': 107 108 #------------------------------------------------- 109 # intialise the options parser 110 parser = argparse.ArgumentParser(add_help=False) 111 subparsers = parser.add_subparsers(help="--", dest='subparser_name') 112 113 ################################################## 114 # Typical workflow 115 ################################################## 116 117 #------------------------------------------------- 118 # parse raw data and save 119 file_parser = subparsers.add_parser('parse', 120 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 121 help='parse raw data and save to disk', 122 description='Parse raw data and save to disk') 123 file_parser.add_argument('dbname', help="name of the database being created") 124 file_parser.add_argument('reference', help="fasta file containing bam reference sequences") 125 file_parser.add_argument('bamfiles', nargs='+', help="bam files to parse") 126 file_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads to use during BAM parsing") 127 file_parser.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing DB file without prompting") 128 file_parser.add_argument('-c', '--cutoff', type=int, default=500, help="cutoff contig size during parsing") 129 130 #------------------------------------------------- 131 # load saved data and make bin cores 132 core_builder = subparsers.add_parser('core', 133 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 134 help='load saved data and make bin cores', 135 description='Load saved data and make bin cores') 136 core_builder.add_argument('dbname', help="name of the database to open") 137 core_builder.add_argument('-c', '--cutoff', type=int, default=1500, help="cutoff contig size for core creation") 138 core_builder.add_argument('-s', '--size', type=int, default=10, help="minimum number of contigs which define a core") 139 core_builder.add_argument('-b', '--bp', type=int, default=1000000, help="cumulative size of contigs which define a core regardless of number of contigs") 140 core_builder.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing DB file without prompting") 141 core_builder.add_argument('-g', '--graphfile', help="output graph of micro bin mergers") 142 core_builder.add_argument('-p', '--plot', action="store_true", default=False, help="create plots of bins after basic refinement") 143 core_builder.add_argument('-m', '--multiplot', default=0, help="create plots during core creation - (0-3) MAKES MANY IMAGES!") 144 145 #------------------------------------------------- 146 # refine bins 147 bin_refiner = subparsers.add_parser('refine', 148 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 149 help='merge similar bins / split chimeric ones', 150 description='Merge similar bins and split chimeric ones') 151 bin_refiner.add_argument('dbname', help="name of the database to open") 152# bin_refiner.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to use (None for all)") 153 bin_refiner.add_argument('-a', '--auto', action="store_true", default=False, help="automatically refine bins") 154 bin_refiner.add_argument('-r', '--no_transform', action="store_true", default=False, help="skip data transformation (3 stoits only)") 155 bin_refiner.add_argument('-p', '--plot', action="store_true", default=False, help="create plots of bins after refinement") 156 157 #------------------------------------------------- 158 # enlarge bins 159 bin_expander = subparsers.add_parser('recruit', 160 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 161 help='load saved data and enlarge bins', 162 description='Recruit more contigs into existing bins') 163 bin_expander.add_argument('dbname', help="name of the database to open") 164 bin_expander.add_argument('-c', '--cutoff', type=int, default=500, help="cutoff contig size") 165 bin_expander.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing db file without prompting") 166 bin_expander.add_argument('-s', '--step', default=200, type=int, help="step size for iterative recruitment") 167 bin_expander.add_argument('-i', '--inclusivity', default=2.5, type=float, help="make recruitment more or less inclusive") 168 169 #------------------------------------------------- 170 # extract reads and contigs from saved 171 bin_extractor = subparsers.add_parser('extract', 172 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 173 help='extract contigs or reads based on bin affiliations', 174 description='Extract contigs or reads based on bin affiliations') 175 bin_extractor.add_argument('dbname', help="name of the database to open") 176 bin_extractor.add_argument('data', nargs='+', help="data file(s) to extract from, bam or fasta") 177 bin_extractor.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to use (None for all)") 178 bin_extractor.add_argument('-m', '--mode', default="contigs", help="what to extract [reads, contigs]") 179 bin_extractor.add_argument('-o', '--out_folder', default="", help="write to this folder (None for current dir)") 180 bin_extractor.add_argument('-p', '--prefix', default="", help="prefix to apply to output files") 181 182 bin_extractor.add_argument('-c', '--cutoff', type=int, default=0, help=">>CONTIG MODE ONLY<< cutoff contig size (0 for no cutoff)") 183 184 bin_extractor.add_argument('--mix_bams', action="store_true", default=False, help=">>READ MODE ONLY<< use the same file for multiple bam files") 185 bin_extractor.add_argument('--mix_groups', action="store_true", default=False, help=">>READ MODE ONLY<< use the same files for multiple group groups") 186 bin_extractor.add_argument('--mix_reads', action="store_true", default=False, help=">>READ MODE ONLY<< use the same files for paired/unpaired reads") 187 bin_extractor.add_argument('--interleave', action="store_true", default=False, help=">>READ MODE ONLY<< interleave paired reads in ouput files") 188 bin_extractor.add_argument('--headers_only', action="store_true", default=False, help=">>READ MODE ONLY<< extract only (unique) headers") 189 bin_extractor.add_argument('--no_gzip', action="store_true", default=False, help="do not gzip output files") 190 191 bin_extractor.add_argument('--mapping_quality', type=int, default=0, help=">>READ MODE ONLY<< mapping quality threshold") 192 bin_extractor.add_argument('--use_secondary', action="store_true", default=False, help=">>READ MODE ONLY<< use reads marked with the secondary flag") 193 bin_extractor.add_argument('--use_supplementary', action="store_true", default=False, help=">>READ MODE ONLY<< use reads marked with the supplementary flag") 194 bin_extractor.add_argument('--max_distance', type=int, default=1000, help=">>READ MODE ONLY<< maximum allowable edit distance from query to reference") 195 196 bin_extractor.add_argument('-v', '--verbose', action="store_true", default=False, help=">>READ MODE ONLY<< be verbose") 197 bin_extractor.add_argument('-t', '--threads', type=int, default=1, help=">>READ MODE ONLY<< maximum number of threads to use") 198 199 ################################################## 200 # Utilities 201 ################################################## 202 203 #------------------------------------------------- 204 # combine two or more bins into one 205 bin_merger = subparsers.add_parser('merge', 206 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 207 help='merge 2 or more bins') 208 bin_merger.add_argument('dbname', help="name of the database to open") 209 bin_merger.add_argument('bids', nargs='+', type=int, help="bin ids to merge.") 210 bin_merger.add_argument('-f', '--force', action="store_true", default=False, help="merge without prompting") 211 212 #------------------------------------------------- 213 # split a bin into two parts 214 bin_splitter = subparsers.add_parser('split', 215 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 216 help='split a bin into n pieces') 217 bin_splitter.add_argument('dbname', help="name of the database to open") 218 bin_splitter.add_argument('bid', type=int, help="bin id to split") 219 bin_splitter.add_argument('parts', type=int, help="number of parts to split the bin into") 220 bin_splitter.add_argument('-m', '--mode', default="kmer", help="profile to split on [kmer, cov]") 221 bin_splitter.add_argument('-f', '--force', action="store_true", default=False, help="split without prompting") 222 223 #------------------------------------------------- 224 # delete bins 225 bin_deleter = subparsers.add_parser('delete', 226 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 227 help='delete bins') 228 bin_deleter.add_argument('dbname', help="name of the database to open") 229 bin_deleter.add_argument('bids', nargs='+', type=int, help="bin ids to delete") 230 bin_deleter.add_argument('-f', '--force', action="store_true", default=False, help="delete without prompting") 231 232 ################################################## 233 # Plotting 234 ################################################## 235 236 #------------------------------------------------- 237 # visualise all bins 238 bin_explorer = subparsers.add_parser('explore', 239 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 240 help='explore and validate bins') 241 bin_explorer.add_argument('dbname', help="name of the database to open") 242 bin_explorer.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to plot (None for all)") 243 bin_explorer.add_argument('-c', '--cutoff', type=int, default=1000, help="cutoff contig size") 244 bin_explorer.add_argument('-m', '--mode', default="binids", help="Exploration mode [binpoints, binids, allcontigs, unbinnedcontigs, binnedcontigs, binassignments, compare, sidebyside, together]") 245 bin_explorer.add_argument('-r', '--no_transform', action="store_true", default=False, help="skip data transformation (3 stoits only)") 246 bin_explorer.add_argument('-k', '--kmers', action="store_true", default=False, help="include kmers in figure [only used when mode == together]") 247 bin_explorer.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting") 248 bin_explorer.add_argument('-C', '--cm', default="HSV", help="set colormap [HSV, Accent, Blues, Spectral, Grayscale, Discrete, DiscretePaired]") 249 250 #------------------------------------------------- 251 # flyover --- usually this is basically an easter egg. If you find it then have fun 252 bin_pilot = subparsers.add_parser('flyover', 253 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 254 help='create a purdy flyover plot of the bins you made') 255 bin_pilot.add_argument('dbname', help="name of the database to open") 256 bin_pilot.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to concentrate on (None for all)") 257 bin_pilot.add_argument('-c', '--cutoff', type=int, default=1000, help="cutoff contig size") 258 bin_pilot.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting") 259 bin_pilot.add_argument('-P', '--prefix', default="file", help="prefix to append to start of output files") 260 bin_pilot.add_argument('-t', '--title', default="", help="title to add to output images") 261 bin_pilot.add_argument('-B', '--colorbar', action="store_true", default=False, help="show the colorbar") 262 bin_pilot.add_argument('-f', '--format', default="jpeg", help="file format output images") 263 bin_pilot.add_argument('--fps', type=float, default=10, help="frames per second") 264 bin_pilot.add_argument('--totalTime', type=float, default=120., help="how long the movie should go for (seconds)") 265 bin_pilot.add_argument('--firstFade', type=float, default=0.05, help="what percentage of the movie is devoted to the unbinned contigs") 266 267 #------------------------------------------------- 268 # plot a bin/bins 269 bin_plotter = subparsers.add_parser('plot', 270 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 271 help='plot bins') 272 bin_plotter.add_argument('dbname', help="name of the database to open") 273 bin_plotter.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to plot (None for all)") 274 bin_plotter.add_argument('-t', '--tag', default="BIN", help="tag to add to output filename") 275 bin_plotter.add_argument('-f', '--folder', default="", help="save plots in folder") 276 bin_plotter.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting") 277 bin_plotter.add_argument('-C', '--cm', default="HSV", help="set colormap [HSV, Accent, Blues, Spectral, Grayscale, Discrete, DiscretePaired]") 278 279 #------------------------------------------------- 280 # produce fancy image for publications 281 bin_highlighter = subparsers.add_parser('highlight', 282 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 283 help='highlight specific bins') 284 bin_highlighter.add_argument('dbname', help="name of the database to open") 285 bin_highlighter.add_argument('-P', '--place', action="store_true", default=False, help="use this to help work out azimuth/elevation parameters") 286 bin_highlighter.add_argument('-L', '--binlabels', default="", help="replace bin IDs with user specified labels (use 'none' to force no labels)") 287 bin_highlighter.add_argument('-C', '--contigcolors', default="", help="specify contig colors") 288 bin_highlighter.add_argument('-r', '--radius', action="store_true", default=False, help="draw placement radius to help with label moving") 289 bin_highlighter.add_argument('-c', '--cutoff', type=int, default=1000, help="cutoff contig size") 290 bin_highlighter.add_argument('-e', '--elevation', type=float, default=25.0, help="elevation in printed image") 291 bin_highlighter.add_argument('-a', '--azimuth', type=float, default=-45.0, help="azimuth in printed image") 292 bin_highlighter.add_argument('-f', '--file', default="gmview", help="name of image file to produce") 293 bin_highlighter.add_argument('-t', '--filetype', default="jpg", help="Type of file to produce") 294 bin_highlighter.add_argument('-d', '--dpi', default=300, help="Image resolution") 295 bin_highlighter.add_argument('-s', '--show', action="store_true", default=False, help="load image in viewer only") 296 bin_highlighter.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting") 297 bin_highlighter.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to plot (None for all)") 298 299 #------------------------------------------------- 300 # print bin information 301 bin_printer = subparsers.add_parser('print', 302 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 303 help='print bin information') 304 bin_printer.add_argument('dbname', help="name of the database to open") 305 bin_printer.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to print (None for all)") 306 bin_printer.add_argument('-o', '--outfile', default="", help="print to file not STDOUT") 307 bin_printer.add_argument('-f', '--format', default='bins', help="output format [bins, contigs]") 308 bin_printer.add_argument('-u', '--unbinned', action="store_true", default=False, help="print unbinned contig IDs too") 309 310 ################################################## 311 # Import Export 312 ################################################## 313 314 #------------------------------------------------- 315 # dump data to file 316 data_dumper = subparsers.add_parser('dump', 317 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 318 help='write database to text file') 319 data_dumper.add_argument('dbname', help="name of the database to open") 320 data_dumper.add_argument('-f', '--fields', default="names,bins", help="fields to extract: Build a comma separated list from [names, mers, gc, coverage, tcoverage, ncoverage, lengths, bins] or just use 'all']") 321 data_dumper.add_argument('-o', '--outfile', default="GMdump.csv", help="write data to this file") 322 data_dumper.add_argument('-s', '--separator', default=",", help="data separator") 323 data_dumper.add_argument('--no_headers', action="store_true", default=False, help="don't add headers") 324 325 if False: 326 #------------------------------------------------- 327 # import from file 328 data_importer = subparsers.add_parser('import', 329 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 330 help='import information from ') 331 data_importer.add_argument('dbname', help="name of the database to open") 332 data_importer.add_argument('infile', help="file with data to import") 333 data_importer.add_argument('-t', '--fields', default="bins", help="data type to import. [bins]") 334 data_importer.add_argument('-s', '--separator', default=",", help="data separator") 335 data_importer.add_argument('--has_headers', action="store_true", default=False, help="file contains headers") 336 337 ################################################## 338 # System 339 ################################################## 340 341 #------------------------------------------------- 342 # get and check options 343 args = None 344 if(len(sys.argv) == 1): 345 printHelp() 346 sys.exit(0) 347 elif(sys.argv[1] == '-v' or \ 348 sys.argv[1] == '--v' or \ 349 sys.argv[1] == '-version' or \ 350 sys.argv[1] == '--version'): 351 print "GroopM: version %s %s %s" % (__version__, 352 __copyright__, 353 __author__) 354 sys.exit(0) 355 elif(sys.argv[1] == '-h' or \ 356 sys.argv[1] == '--h' or \ 357 sys.argv[1] == '-help' or \ 358 sys.argv[1] == '--help'): 359 printHelp() 360 sys.exit(0) 361 else: 362 args = parser.parse_args() 363 364 #------------------------------------------------- 365 # do what we came here to do 366 try: 367 GM_parser = groopm.GroopMOptionsParser(__version__) 368 if(False): 369 import cProfile 370 cProfile.run('GM_parser.parseOptions(args)', 'prof') 371 ########################################## 372 ########################################## 373 # Use this in python console! 374 #import pstats 375 #p = pstats.Stats('prof') 376 #p.sort_stats('cumulative').print_stats(10) 377 #p.sort_stats('time').print_stats(10) 378 ########################################## 379 ########################################## 380 else: 381 GM_parser.parseOptions(args) 382 except: 383 print "Unexpected error:", sys.exc_info()[0] 384 raise 385 386############################################################################### 387############################################################################### 388############################################################################### 389############################################################################### 390 391