1#!/usr/bin/env python
2###############################################################################
3#                                                                             #
4#    groopm                                                                   #
5#                                                                             #
6#    Entry point. See groopm/groopm.py for internals                          #
7#                                                                             #
8#    Copyright (C) Michael Imelfort                                           #
9#                                                                             #
10###############################################################################
11#                                                                             #
12#          .d8888b.                                    888b     d888          #
13#         d88P  Y88b                                   8888b   d8888          #
14#         888    888                                   88888b.d88888          #
15#         888        888d888 .d88b.   .d88b.  88888b.  888Y88888P888          #
16#         888  88888 888P"  d88""88b d88""88b 888 "88b 888 Y888P 888          #
17#         888    888 888    888  888 888  888 888  888 888  Y8P  888          #
18#         Y88b  d88P 888    Y88..88P Y88..88P 888 d88P 888   "   888          #
19#          "Y8888P88 888     "Y88P"   "Y88P"  88888P"  888       888          #
20#                                             888                             #
21#                                             888                             #
22#                                             888                             #
23#                                                                             #
24###############################################################################
25#                                                                             #
26#    This program is free software: you can redistribute it and/or modify     #
27#    it under the terms of the GNU General Public License as published by     #
28#    the Free Software Foundation, either version 3 of the License, or        #
29#    (at your option) any later version.                                      #
30#                                                                             #
31#    This program is distributed in the hope that it will be useful,          #
32#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
33#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
34#    GNU General Public License for more details.                             #
35#                                                                             #
36#    You should have received a copy of the GNU General Public License        #
37#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
38#                                                                             #
39###############################################################################
40
41__author__ = "Michael Imelfort"
42__copyright__ = "Copyright 2012-2014"
43__credits__ = ["Michael Imelfort"]
44__license__ = "GPL3"
45__version__ = "0.3.4"
46__maintainer__ = "Michael Imelfort"
47__email__ = "mike@mikeimelfort.com"
48__status__ = "Released"
49
50###############################################################################
51
52import argparse
53import sys
54import re
55from groopm import groopm
56
57###############################################################################
58###############################################################################
59###############################################################################
60###############################################################################
61
62def printHelp():
63    print '''\
64
65                             ...::: GroopM :::...
66
67                     Automagical metagenomic binning FTW!
68
69   -------------------------------------------------------------------------
70                                  version: %s
71   -------------------------------------------------------------------------
72
73    Typical workflow:
74
75    groopm parse        -> Load the raw data and save to disk
76    groopm core         -> Create core bins
77    groopm refine       -> Refine these cores a little
78    groopm recruit      -> Add more contigs to the cores
79    groopm extract      -> Extract binned contigs or reads
80
81    Extra features:
82
83        Utilities:
84
85    groopm merge        -> Merge two or more bins
86    groopm split        -> Split a bin into N parts
87    groopm delete       -> Delete a bin
88
89        Printing, plotting:
90
91    groopm explore      -> Methods for viewing bin layouts
92    groopm plot         -> Plot bins
93    groopm highlight    -> Highlight individual bins and apply labels
94    groopm flyover      -> Create a movie of your data
95    groopm print        -> Print summary statistics
96
97        Import, export:
98
99    groopm dump         -> Write database fields to csv
100
101    USE: groopm OPTION -h to see detailed options
102    ''' % __version__
103
104#    groopm import       -> Import data from csv
105
106if __name__ == '__main__':
107
108    #-------------------------------------------------
109    # intialise the options parser
110    parser = argparse.ArgumentParser(add_help=False)
111    subparsers = parser.add_subparsers(help="--", dest='subparser_name')
112
113    ##################################################
114    # Typical workflow
115    ##################################################
116
117    #-------------------------------------------------
118    # parse raw data and save
119    file_parser = subparsers.add_parser('parse',
120                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
121                                        help='parse raw data and save to disk',
122                                        description='Parse raw data and save to disk')
123    file_parser.add_argument('dbname', help="name of the database being created")
124    file_parser.add_argument('reference', help="fasta file containing bam reference sequences")
125    file_parser.add_argument('bamfiles', nargs='+', help="bam files to parse")
126    file_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads to use during BAM parsing")
127    file_parser.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing DB file without prompting")
128    file_parser.add_argument('-c', '--cutoff', type=int, default=500, help="cutoff contig size during parsing")
129
130    #-------------------------------------------------
131    # load saved data and make bin cores
132    core_builder = subparsers.add_parser('core',
133                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
134                                        help='load saved data and make bin cores',
135                                        description='Load saved data and make bin cores')
136    core_builder.add_argument('dbname', help="name of the database to open")
137    core_builder.add_argument('-c', '--cutoff', type=int, default=1500, help="cutoff contig size for core creation")
138    core_builder.add_argument('-s', '--size', type=int, default=10, help="minimum number of contigs which define a core")
139    core_builder.add_argument('-b', '--bp', type=int, default=1000000, help="cumulative size of contigs which define a core regardless of number of contigs")
140    core_builder.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing DB file without prompting")
141    core_builder.add_argument('-g', '--graphfile', help="output graph of micro bin mergers")
142    core_builder.add_argument('-p', '--plot', action="store_true", default=False, help="create plots of bins after basic refinement")
143    core_builder.add_argument('-m', '--multiplot', default=0, help="create plots during core creation - (0-3) MAKES MANY IMAGES!")
144
145    #-------------------------------------------------
146    # refine bins
147    bin_refiner = subparsers.add_parser('refine',
148                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
149                                        help='merge similar bins / split chimeric ones',
150                                        description='Merge similar bins and split chimeric ones')
151    bin_refiner.add_argument('dbname', help="name of the database to open")
152#    bin_refiner.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to use (None for all)")
153    bin_refiner.add_argument('-a', '--auto', action="store_true", default=False, help="automatically refine bins")
154    bin_refiner.add_argument('-r', '--no_transform', action="store_true", default=False, help="skip data transformation (3 stoits only)")
155    bin_refiner.add_argument('-p', '--plot', action="store_true", default=False, help="create plots of bins after refinement")
156
157    #-------------------------------------------------
158    # enlarge bins
159    bin_expander = subparsers.add_parser('recruit',
160                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
161                                        help='load saved data and enlarge bins',
162                                        description='Recruit more contigs into existing bins')
163    bin_expander.add_argument('dbname', help="name of the database to open")
164    bin_expander.add_argument('-c', '--cutoff', type=int, default=500, help="cutoff contig size")
165    bin_expander.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing db file without prompting")
166    bin_expander.add_argument('-s', '--step', default=200, type=int, help="step size for iterative recruitment")
167    bin_expander.add_argument('-i', '--inclusivity', default=2.5, type=float, help="make recruitment more or less inclusive")
168
169    #-------------------------------------------------
170    # extract reads and contigs from saved
171    bin_extractor = subparsers.add_parser('extract',
172                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
173                                        help='extract contigs or reads based on bin affiliations',
174                                        description='Extract contigs or reads based on bin affiliations')
175    bin_extractor.add_argument('dbname', help="name of the database to open")
176    bin_extractor.add_argument('data', nargs='+', help="data file(s) to extract from, bam or fasta")
177    bin_extractor.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to use (None for all)")
178    bin_extractor.add_argument('-m', '--mode', default="contigs", help="what to extract [reads, contigs]")
179    bin_extractor.add_argument('-o', '--out_folder', default="", help="write to this folder (None for current dir)")
180    bin_extractor.add_argument('-p', '--prefix', default="", help="prefix to apply to output files")
181
182    bin_extractor.add_argument('-c', '--cutoff', type=int, default=0, help=">>CONTIG MODE ONLY<< cutoff contig size (0 for no cutoff)")
183
184    bin_extractor.add_argument('--mix_bams', action="store_true", default=False, help=">>READ MODE ONLY<< use the same file for multiple bam files")
185    bin_extractor.add_argument('--mix_groups', action="store_true", default=False, help=">>READ MODE ONLY<< use the same files for multiple group groups")
186    bin_extractor.add_argument('--mix_reads', action="store_true", default=False, help=">>READ MODE ONLY<< use the same files for paired/unpaired reads")
187    bin_extractor.add_argument('--interleave', action="store_true", default=False, help=">>READ MODE ONLY<< interleave paired reads in ouput files")
188    bin_extractor.add_argument('--headers_only', action="store_true", default=False, help=">>READ MODE ONLY<< extract only (unique) headers")
189    bin_extractor.add_argument('--no_gzip', action="store_true", default=False, help="do not gzip output files")
190
191    bin_extractor.add_argument('--mapping_quality', type=int, default=0, help=">>READ MODE ONLY<< mapping quality threshold")
192    bin_extractor.add_argument('--use_secondary', action="store_true", default=False, help=">>READ MODE ONLY<< use reads marked with the secondary flag")
193    bin_extractor.add_argument('--use_supplementary', action="store_true", default=False, help=">>READ MODE ONLY<< use reads marked with the supplementary flag")
194    bin_extractor.add_argument('--max_distance', type=int, default=1000, help=">>READ MODE ONLY<< maximum allowable edit distance from query to reference")
195
196    bin_extractor.add_argument('-v', '--verbose', action="store_true", default=False, help=">>READ MODE ONLY<< be verbose")
197    bin_extractor.add_argument('-t', '--threads', type=int, default=1, help=">>READ MODE ONLY<< maximum number of threads to use")
198
199    ##################################################
200    # Utilities
201    ##################################################
202
203    #-------------------------------------------------
204    # combine two or more bins into one
205    bin_merger = subparsers.add_parser('merge',
206                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
207                                        help='merge 2 or more bins')
208    bin_merger.add_argument('dbname', help="name of the database to open")
209    bin_merger.add_argument('bids', nargs='+', type=int, help="bin ids to merge.")
210    bin_merger.add_argument('-f', '--force', action="store_true", default=False, help="merge without prompting")
211
212    #-------------------------------------------------
213    # split a bin into two parts
214    bin_splitter = subparsers.add_parser('split',
215                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
216                                        help='split a bin into n pieces')
217    bin_splitter.add_argument('dbname', help="name of the database to open")
218    bin_splitter.add_argument('bid', type=int, help="bin id to split")
219    bin_splitter.add_argument('parts', type=int, help="number of parts to split the bin into")
220    bin_splitter.add_argument('-m', '--mode', default="kmer", help="profile to split on [kmer, cov]")
221    bin_splitter.add_argument('-f', '--force', action="store_true", default=False, help="split without prompting")
222
223    #-------------------------------------------------
224    # delete bins
225    bin_deleter = subparsers.add_parser('delete',
226                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
227                                        help='delete bins')
228    bin_deleter.add_argument('dbname', help="name of the database to open")
229    bin_deleter.add_argument('bids', nargs='+', type=int, help="bin ids to delete")
230    bin_deleter.add_argument('-f', '--force', action="store_true", default=False, help="delete without prompting")
231
232    ##################################################
233    # Plotting
234    ##################################################
235
236    #-------------------------------------------------
237    # visualise all bins
238    bin_explorer = subparsers.add_parser('explore',
239                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
240                                        help='explore and validate bins')
241    bin_explorer.add_argument('dbname', help="name of the database to open")
242    bin_explorer.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to plot (None for all)")
243    bin_explorer.add_argument('-c', '--cutoff', type=int, default=1000, help="cutoff contig size")
244    bin_explorer.add_argument('-m', '--mode', default="binids", help="Exploration mode [binpoints, binids, allcontigs, unbinnedcontigs, binnedcontigs, binassignments, compare, sidebyside, together]")
245    bin_explorer.add_argument('-r', '--no_transform', action="store_true", default=False, help="skip data transformation (3 stoits only)")
246    bin_explorer.add_argument('-k', '--kmers', action="store_true", default=False, help="include kmers in figure [only used when mode == together]")
247    bin_explorer.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting")
248    bin_explorer.add_argument('-C', '--cm', default="HSV", help="set colormap [HSV, Accent, Blues, Spectral, Grayscale, Discrete, DiscretePaired]")
249
250    #-------------------------------------------------
251    # flyover  --- usually this is basically an easter egg. If you find it then have fun
252    bin_pilot = subparsers.add_parser('flyover',
253                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter,
254                                      help='create a purdy flyover plot of the bins you made')
255    bin_pilot.add_argument('dbname', help="name of the database to open")
256    bin_pilot.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to concentrate on (None for all)")
257    bin_pilot.add_argument('-c', '--cutoff', type=int, default=1000, help="cutoff contig size")
258    bin_pilot.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting")
259    bin_pilot.add_argument('-P', '--prefix', default="file", help="prefix to append to start of output files")
260    bin_pilot.add_argument('-t', '--title', default="", help="title to add to output images")
261    bin_pilot.add_argument('-B', '--colorbar', action="store_true", default=False, help="show the colorbar")
262    bin_pilot.add_argument('-f', '--format', default="jpeg", help="file format output images")
263    bin_pilot.add_argument('--fps', type=float, default=10, help="frames per second")
264    bin_pilot.add_argument('--totalTime', type=float, default=120., help="how long the movie should go for (seconds)")
265    bin_pilot.add_argument('--firstFade', type=float, default=0.05, help="what percentage of the movie is devoted to the unbinned contigs")
266
267    #-------------------------------------------------
268    # plot a bin/bins
269    bin_plotter = subparsers.add_parser('plot',
270                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
271                                        help='plot bins')
272    bin_plotter.add_argument('dbname', help="name of the database to open")
273    bin_plotter.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to plot (None for all)")
274    bin_plotter.add_argument('-t', '--tag', default="BIN", help="tag to add to output filename")
275    bin_plotter.add_argument('-f', '--folder', default="", help="save plots in folder")
276    bin_plotter.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting")
277    bin_plotter.add_argument('-C', '--cm', default="HSV", help="set colormap [HSV, Accent, Blues, Spectral, Grayscale, Discrete, DiscretePaired]")
278
279    #-------------------------------------------------
280    # produce fancy image for publications
281    bin_highlighter = subparsers.add_parser('highlight',
282                                            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
283                                            help='highlight specific bins')
284    bin_highlighter.add_argument('dbname', help="name of the database to open")
285    bin_highlighter.add_argument('-P', '--place', action="store_true", default=False, help="use this to help work out azimuth/elevation parameters")
286    bin_highlighter.add_argument('-L', '--binlabels', default="", help="replace bin IDs with user specified labels (use 'none' to force no labels)")
287    bin_highlighter.add_argument('-C', '--contigcolors', default="", help="specify contig colors")
288    bin_highlighter.add_argument('-r', '--radius', action="store_true", default=False, help="draw placement radius to help with label moving")
289    bin_highlighter.add_argument('-c', '--cutoff', type=int, default=1000, help="cutoff contig size")
290    bin_highlighter.add_argument('-e', '--elevation', type=float, default=25.0, help="elevation in printed image")
291    bin_highlighter.add_argument('-a', '--azimuth', type=float, default=-45.0, help="azimuth in printed image")
292    bin_highlighter.add_argument('-f', '--file', default="gmview", help="name of image file to produce")
293    bin_highlighter.add_argument('-t', '--filetype', default="jpg", help="Type of file to produce")
294    bin_highlighter.add_argument('-d', '--dpi', default=300, help="Image resolution")
295    bin_highlighter.add_argument('-s', '--show', action="store_true", default=False, help="load image in viewer only")
296    bin_highlighter.add_argument('-p', '--points', action="store_true", default=False, help="ignore contig lengths when plotting")
297    bin_highlighter.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to plot (None for all)")
298
299    #-------------------------------------------------
300    # print bin information
301    bin_printer = subparsers.add_parser('print',
302                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
303                                        help='print bin information')
304    bin_printer.add_argument('dbname', help="name of the database to open")
305    bin_printer.add_argument('-b', '--bids', nargs='+', type=int, default=None, help="bin ids to print (None for all)")
306    bin_printer.add_argument('-o', '--outfile', default="", help="print to file not STDOUT")
307    bin_printer.add_argument('-f', '--format', default='bins', help="output format [bins, contigs]")
308    bin_printer.add_argument('-u', '--unbinned', action="store_true", default=False, help="print unbinned contig IDs too")
309
310    ##################################################
311    # Import Export
312    ##################################################
313
314    #-------------------------------------------------
315    # dump data to file
316    data_dumper = subparsers.add_parser('dump',
317                                        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
318                                        help='write database to text file')
319    data_dumper.add_argument('dbname', help="name of the database to open")
320    data_dumper.add_argument('-f', '--fields', default="names,bins", help="fields to extract: Build a comma separated list from [names, mers, gc, coverage, tcoverage, ncoverage, lengths, bins] or just use 'all']")
321    data_dumper.add_argument('-o', '--outfile', default="GMdump.csv", help="write data to this file")
322    data_dumper.add_argument('-s', '--separator', default=",", help="data separator")
323    data_dumper.add_argument('--no_headers', action="store_true", default=False, help="don't add headers")
324
325    if False:
326        #-------------------------------------------------
327        # import from file
328        data_importer = subparsers.add_parser('import',
329                                              formatter_class=argparse.ArgumentDefaultsHelpFormatter,
330                                              help='import information from ')
331        data_importer.add_argument('dbname', help="name of the database to open")
332        data_importer.add_argument('infile', help="file with data to import")
333        data_importer.add_argument('-t', '--fields', default="bins", help="data type to import. [bins]")
334        data_importer.add_argument('-s', '--separator', default=",", help="data separator")
335        data_importer.add_argument('--has_headers', action="store_true", default=False, help="file contains headers")
336
337    ##################################################
338    # System
339    ##################################################
340
341    #-------------------------------------------------
342    # get and check options
343    args = None
344    if(len(sys.argv) == 1):
345        printHelp()
346        sys.exit(0)
347    elif(sys.argv[1] == '-v' or \
348         sys.argv[1] == '--v' or \
349         sys.argv[1] == '-version' or \
350         sys.argv[1] == '--version'):
351        print "GroopM: version %s %s %s" % (__version__,
352                                            __copyright__,
353                                            __author__)
354        sys.exit(0)
355    elif(sys.argv[1] == '-h' or \
356         sys.argv[1] == '--h' or \
357         sys.argv[1] == '-help' or \
358         sys.argv[1] == '--help'):
359        printHelp()
360        sys.exit(0)
361    else:
362        args = parser.parse_args()
363
364    #-------------------------------------------------
365    # do what we came here to do
366    try:
367        GM_parser = groopm.GroopMOptionsParser(__version__)
368        if(False):
369            import cProfile
370            cProfile.run('GM_parser.parseOptions(args)', 'prof')
371            ##########################################
372            ##########################################
373            # Use this in python console!
374            #import pstats
375            #p = pstats.Stats('prof')
376            #p.sort_stats('cumulative').print_stats(10)
377            #p.sort_stats('time').print_stats(10)
378            ##########################################
379            ##########################################
380        else:
381            GM_parser.parseOptions(args)
382    except:
383        print "Unexpected error:", sys.exc_info()[0]
384        raise
385
386###############################################################################
387###############################################################################
388###############################################################################
389###############################################################################
390
391