1#!/usr/bin/env python
2
3###############################################################################
4#
5# checkm - main program entry point. See checkm/main.py for internals.
6#
7###############################################################################
8#                                                                             #
9#    This program is free software: you can redistribute it and/or modify     #
10#    it under the terms of the GNU General Public License as published by     #
11#    the Free Software Foundation, either version 3 of the License, or        #
12#    (at your option) any later version.                                      #
13#                                                                             #
14#    This program is distributed in the hope that it will be useful,          #
15#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
16#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
17#    GNU General Public License for more details.                             #
18#                                                                             #
19#    You should have received a copy of the GNU General Public License        #
20#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
21#                                                                             #
22###############################################################################
23
24__author__ = "Donovan Parks, Connor Skennerton, Michael Imelfort"
25__copyright__ = "Copyright 2014"
26__credits__ = ["Donovan Parks", "Connor Skennerton", "Michael Imelfort"]
27__license__ = "GPL3"
28__maintainer__ = "Donovan Parks"
29__email__ = "donovan.parks@gmail.com"
30__status__ = "Development"
31
32import argparse
33import sys
34import os
35
36from checkm import main
37from checkm.defaultValues import DefaultValues
38from checkm.customHelpFormatter import CustomHelpFormatter
39from checkm.util.taxonomyUtils import taxonomicRanks
40from checkm.logger import logger_setup
41import tempfile
42
43
44class ChangeTempAction(argparse.Action):
45    def __call__(self, parser, namespace, values, option_string=None):
46        if os.path.isdir(values):
47            tempfile.tempdir = values
48        else:
49            raise argparse.ArgumentTypeError('The value of %s must be a valid directory' % option_string)
50
51
52def version():
53    import checkm
54    versionFile = open(os.path.join(checkm.__path__[0], 'VERSION'))
55    return versionFile.readline().strip()
56
57
58def printHelp():
59    print ''
60    print '                ...::: CheckM v' + version() + ' :::...'''
61    print '''\
62
63  Lineage-specific marker set:
64    tree         -> Place bins in the reference genome tree
65    tree_qa      -> Assess phylogenetic markers found in each bin
66    lineage_set  -> Infer lineage-specific marker sets for each bin
67
68  Taxonomic-specific marker set:
69    taxon_list   -> List available taxonomic-specific marker sets
70    taxon_set    -> Generate taxonomic-specific marker set
71
72  Apply marker set to genome bins:
73    analyze      -> Identify marker genes in bins
74    qa           -> Assess bins for contamination and completeness
75
76  Common workflows (combines above commands):
77    lineage_wf   -> Runs tree, lineage_set, analyze, qa
78    taxonomy_wf  -> Runs taxon_set, analyze, qa
79
80  Bin QA plots:
81    bin_qa_plot  -> Bar plot of bin completeness, contamination, and strain heterogeneity
82
83  Reference distribution plots:
84    gc_plot      -> Create GC histogram and delta-GC plot
85    coding_plot  -> Create coding density (CD) histogram and delta-CD plot
86    tetra_plot   -> Create tetranucleotide distance (TD) histogram and delta-TD plot
87    dist_plot    -> Create image with GC, CD, and TD distribution plots together
88
89  General plots:
90    nx_plot      -> Create Nx-plots
91    len_plot     -> Cumulative sequence length plot
92    len_hist     -> Sequence length histogram
93    marker_plot  -> Plot position of marker genes on sequences
94    par_plot     -> Parallel coordinate plot of GC and coverage
95    gc_bias_plot -> Plot bin coverage as a function of GC
96
97  Sequence subspace plots:
98    cov_pca      -> PCA plot of coverage profiles
99    tetra_pca    -> PCA plot of tetranucleotide signatures
100
101  Bin exploration and modification:
102    unique       -> Ensure no sequences are assigned to multiple bins
103    merge        -> Identify bins with complementary sets of marker genes
104    bin_compare  -> Compare two sets of bins (e.g., from alternative binning methods)
105    bin_union    -> [Experimental] Merge multiple binning efforts into a single bin set
106    modify       -> [Experimental] Modify sequences in a bin
107    outliers     -> [Experimental] Identify outlier in bins relative to reference distributions
108
109  Utility functions:
110    unbinned     -> Identify unbinned sequences
111    coverage     -> Calculate coverage of sequences
112    tetra        -> Calculate tetranucleotide signature of sequences
113    profile      -> Calculate percentage of reads mapped to each bin
114    join_tables  -> Join tab-separated value tables containing bin information
115    ssu_finder   -> Identify SSU (16S/18S) rRNAs in sequences
116
117  Use 'checkm data setRoot <checkm_data_dir>' to specify the location of CheckM database files.
118
119  Usage: checkm <command> -h for command specific help
120    '''
121
122if __name__ == '__main__':
123    # initialize the options parser
124    parser = argparse.ArgumentParser(add_help=False)
125    subparsers = parser.add_subparsers(help="--", dest='subparser_name')
126
127    data_parser = subparsers.add_parser('data',
128                                        formatter_class=CustomHelpFormatter,
129                                        description='Set path to the CheckM database files.',
130                                        epilog='Example: checkm data setRoot')
131    data_parser.add_argument('action', nargs="+",
132            help='''
133  setRoot <PATH>  -> set the data directory to <PATH> (requires permissions)
134            ''')
135
136    # determine placement of each genome bin in the genome tree
137    tree_parser = subparsers.add_parser('tree',
138                                        formatter_class=CustomHelpFormatter,
139                                        description='Place bins in the genome tree.',
140                                        epilog='Example: checkm tree ./bins ./output')
141    tree_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
142    tree_parser.add_argument('output_dir', help="directory to write output files")
143    tree_parser.add_argument('-r', '--reduced_tree', dest='bReducedTree', action="store_true", help="use reduced tree (requires <16GB of memory) for determining lineage of each bin")
144    tree_parser.add_argument('--ali', dest='bKeepAlignment', action="store_true", default=False, help="generate HMMER alignment file for each bin")
145    tree_parser.add_argument('--nt', dest='bNucORFs', action="store_true", default=False, help="generate nucleotide gene sequences for each bin")
146    tree_parser.add_argument('-g', '--genes', dest='bCalledGenes', action="store_true", default=False, help="bins contain genes as amino acids instead of nucleotide contigs")
147    tree_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
148    tree_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
149    tree_parser.add_argument('--pplacer_threads', type=int, default=1, help="number of threads used by pplacer (memory usage increases linearly with additional threads)")
150    tree_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
151    tree_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
152
153    # do QA on phylogenetic marker genes
154    tree_qa_parser = subparsers.add_parser('tree_qa',
155                                        formatter_class=CustomHelpFormatter,
156                                        description='Assess phylogenetic markers found in each bin.',
157                                        epilog='Example: checkm tree_qa ./output')
158    tree_qa_parser.add_argument('tree_dir', help="directory specified during tree command")
159    tree_qa_parser.add_argument('-o', '--out_format', type=int,
160                                    help='''desired output:
161  1. brief summary of genome tree placement
162  2. detailed summary of genome tree placement including lineage-specific statistics
163  3. genome tree in Newick format decorated with IMG genome ids
164  4. genome tree in Newick format decorated with taxonomy strings
165  5. multiple sequence alignment of reference genomes and bins''',
166                                    default=1, choices=[1, 2, 3, 4, 5])
167    tree_qa_parser.add_argument('-f', '--file', default='stdout', help="print results to file")
168    tree_qa_parser.add_argument('--tab_table', dest='bTabTable', action="store_true", default=False, help="print tab-separated values table")
169    tree_qa_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
170    tree_qa_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
171
172    # calculate lineage-specific marker set for genome bins
173    lineage_set_parser = subparsers.add_parser('lineage_set',
174                                        formatter_class=CustomHelpFormatter,
175                                        description='Infer lineage-specific marker sets for each bin.',
176                                        epilog='Example: checkm lineage_set ./output lineage.ms')
177    lineage_set_parser.add_argument('tree_dir', help="directory specified during tree command")
178    lineage_set_parser.add_argument('marker_file', help="output file describing marker set for each bin")
179    lineage_set_parser.add_argument('-u', '--unique', type=int, default=10, help="minimum number of unique phylogenetic markers required to use lineage-specific marker set")
180    lineage_set_parser.add_argument('-m', '--multi', type=int, default=10, help="maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set")
181    lineage_set_parser.add_argument('--force_domain', dest='bForceDomain', action="store_true", default=False, help="use domain-level sets for all bins")
182    lineage_set_parser.add_argument('--no_refinement', dest='bNoLineageSpecificRefinement', action="store_true", default=False, help="do not perform lineage-specific marker set refinement")
183    lineage_set_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
184    lineage_set_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
185
186    # list of available taxonomic-specific marker set
187    taxon_list_parser = subparsers.add_parser('taxon_list',
188                                        formatter_class=CustomHelpFormatter,
189                                        description='List available taxonomic-specific marker sets.',
190                                        epilog='Example: checkm taxon_list --rank phylum')
191    taxon_list_parser.add_argument('--rank', help="restrict list to specified taxonomic rank", choices=['ALL'] + taxonomicRanks, default='ALL')
192    taxon_list_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
193
194    # calculate taxonomic-specific marker set
195    taxon_set_parser = subparsers.add_parser('taxon_set',
196                                        formatter_class=CustomHelpFormatter,
197                                        description='Generate taxonomic-specific marker set.',
198                                        epilog='Example: checkm taxon_set domain Bacteria bacteria.ms')
199    taxon_set_parser.add_argument('rank', help="taxonomic rank", choices=taxonomicRanks)
200    taxon_set_parser.add_argument('taxon', help="taxon of interest")
201    taxon_set_parser.add_argument('marker_file', help="output file describing taxonomic-specific marker set")
202    taxon_set_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
203    taxon_set_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
204
205    # identify marker genes within binned contigs and calculate genome statistics
206    analyze_parser = subparsers.add_parser('analyze',
207                                        formatter_class=CustomHelpFormatter,
208                                        description='Identify marker genes in bins and calculate genome statistics.',
209                                        epilog='Example: checkm analyze lineage.ms ./bins ./output')
210    analyze_parser.add_argument('marker_file', help="markers for assessing bins (marker set or HMM file)")
211    analyze_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
212    analyze_parser.add_argument('output_dir', help="directory to write output files")
213    analyze_parser.add_argument('--ali', dest='bKeepAlignment', action="store_true", default=False, help="generate HMMER alignment file for each bin")
214    analyze_parser.add_argument('--nt', dest='bNucORFs', action="store_true", default=False, help="generate nucleotide gene sequences for each bin")
215    analyze_parser.add_argument('-g', '--genes', dest='bCalledGenes', action="store_true", default=False, help="bins contain genes as amino acids instead of nucleotide contigs")
216    analyze_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
217    analyze_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
218    analyze_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
219
220    analyze_parser.add_argument('--ali_top_hits', dest='bAlignTopHit', action="store_true", default=False, help=argparse.SUPPRESS)  # [hidden argument] align top marker hits (used by genome tree database)
221    analyze_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
222
223    # do QA on pre-processed contigs
224    qa_parser = subparsers.add_parser('qa',
225                                        formatter_class=CustomHelpFormatter,
226                                        description='Assess bins for contamination and completeness.',
227                                        epilog='''
228Note: lineage_wf and taxonomy_wf produce a marker file in the specified output directory. The
229        lineage workflow produced a marker file called lineage.ms, while the taxonomy workflow
230        produces a marker file called <taxon>.ms (e.g. Bacteria.ms).
231
232Example: checkm qa ./output/lineage.ms ./output
233                                        ''')
234    qa_parser.add_argument('marker_file', help="marker file specified during analyze command")
235    qa_parser.add_argument('analyze_dir', help="directory specified during analyze command")
236    qa_parser.add_argument('-o', '--out_format', type=int,
237                                help='''desired output:
238  1. summary of bin completeness and contamination
239  2. extended summary of bin statistics (includes GC, genome size, ...)
240  3. summary of bin quality for increasingly basal lineage-specific marker sets
241  4. list of marker genes and their counts
242  5. list of bin id, marker gene id, gene id
243  6. list of marker genes present multiple times in a bin
244  7. list of marker genes present multiple times on the same scaffold
245  8. list indicating position of each marker gene within a bin
246  9. FASTA file of marker genes identified in each bin''',
247  # 10. scaffold statistics: scaffold id, bin id, length, GC, ..., marker gene(s)''',
248                                default=1, choices=[1, 2, 3, 4, 5, 6, 7, 8, 9])
249    qa_parser.add_argument('--exclude_markers', default=None, help="file specifying markers to exclude from marker sets")
250    qa_parser.add_argument('--individual_markers', dest='bIndividualMarkers', action="store_true", default=False, help="treat marker as independent (i.e., ignore co-located set structure)")
251    qa_parser.add_argument('--skip_adj_correction', dest='bSkipAdjCorrection', action="store_true", default=False, help="do not exclude adjacent marker genes when estimating contamination")
252    qa_parser.add_argument('--skip_pseudogene_correction', dest='bSkipPseudoGeneCorrection', action="store_true", default=False, help="skip identification and filtering of pseudogenes")
253    qa_parser.add_argument('--aai_strain', type=float, default=0.9, help="AAI threshold used to identify strain heterogeneity")
254    qa_parser.add_argument('-a', '--alignment_file', default=None, help="produce file showing alignment of multi-copy genes and their AAI identity")
255    qa_parser.add_argument('--ignore_thresholds', dest='bIgnoreThresholds', action="store_true", default=False, help="ignore model-specific score thresholds")
256    qa_parser.add_argument('-e', '--e_value', type=float, default=DefaultValues.E_VAL, help="e-value cut off")
257    qa_parser.add_argument('-l', '--length', type=float, default=DefaultValues.LENGTH, help="percent overlap between target and query")
258    qa_parser.add_argument('-c', '--coverage_file', default=None, help="file containing coverage of each sequence; coverage information added to table type 2 (see coverage command)")
259    qa_parser.add_argument('-f', '--file', default='stdout', help="print results to file")
260    qa_parser.add_argument('--tab_table', dest='bTabTable', action="store_true", default=False, help="print tab-separated values table")
261    qa_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
262    qa_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
263    qa_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
264
265    # run lineage-specific workflow
266    lineage_wf_parser = subparsers.add_parser('lineage_wf',
267                                        formatter_class=CustomHelpFormatter,
268                                        description='Runs tree, lineage_set, analyze, qa',
269                                        epilog='Example: checkm lineage_wf ./bins ./output')
270    lineage_wf_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
271    lineage_wf_parser.add_argument('output_dir', help="directory to write output files")
272    lineage_wf_parser.add_argument('-r', '--reduced_tree', dest='bReducedTree', action="store_true", help="use reduced tree (requires <16GB of memory) for determining lineage of each bin")
273    lineage_wf_parser.add_argument('--ali', dest='bKeepAlignment', action="store_true", default=False, help="generate HMMER alignment file for each bin")
274    lineage_wf_parser.add_argument('--nt', dest='bNucORFs', action="store_true", default=False, help="generate nucleotide gene sequences for each bin")
275    lineage_wf_parser.add_argument('-g', '--genes', dest='bCalledGenes', action="store_true", default=False, help="bins contain genes as amino acids instead of nucleotide contigs")
276    lineage_wf_parser.add_argument('-u', '--unique', type=int, default=10, help="minimum number of unique phylogenetic markers required to use lineage-specific marker set")
277    lineage_wf_parser.add_argument('-m', '--multi', type=int, default=10, help="maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set")
278    lineage_wf_parser.add_argument('--force_domain', dest='bForceDomain', action="store_true", default=False, help="use domain-level sets for all bins")
279    lineage_wf_parser.add_argument('--no_refinement', dest='bNoLineageSpecificRefinement', action="store_true", default=False, help="do not perform lineage-specific marker set refinement")
280    lineage_wf_parser.add_argument('--individual_markers', dest='bIndividualMarkers', action="store_true", default=False, help="treat marker as independent (i.e., ignore co-located set structure)")
281    lineage_wf_parser.add_argument('--skip_adj_correction', dest='bSkipAdjCorrection', action="store_true", default=False, help="do not exclude adjacent marker genes when estimating contamination")
282    lineage_wf_parser.add_argument('--skip_pseudogene_correction', dest='bSkipPseudoGeneCorrection', action="store_true", default=False, help="skip identification and filtering of pseudogenes")
283    lineage_wf_parser.add_argument('--aai_strain', type=float, default=0.9, help="AAI threshold used to identify strain heterogeneity")
284    lineage_wf_parser.add_argument('-a', '--alignment_file', default=None, help="produce file showing alignment of multi-copy genes and their AAI identity")
285    lineage_wf_parser.add_argument('--ignore_thresholds', dest='bIgnoreThresholds', action="store_true", default=False, help="ignore model-specific score thresholds")
286    lineage_wf_parser.add_argument('-e', '--e_value', type=float, default=DefaultValues.E_VAL, help="e-value cut off")
287    lineage_wf_parser.add_argument('-l', '--length', type=float, default=DefaultValues.LENGTH, help="percent overlap between target and query")
288    lineage_wf_parser.add_argument('-f', '--file', default='stdout', help="print results to file")
289    lineage_wf_parser.add_argument('--tab_table', dest='bTabTable', action="store_true", default=False, help="print tab-separated values table")
290    lineage_wf_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
291    lineage_wf_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
292    lineage_wf_parser.add_argument('--pplacer_threads', type=int, default=1, help="number of threads used by pplacer (memory usage increases linearly with additional threads)")
293    lineage_wf_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
294    lineage_wf_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
295
296    # run taxonomic-specific workflow
297    taxonomy_wf_parser = subparsers.add_parser('taxonomy_wf',
298                                        formatter_class=CustomHelpFormatter,
299                                        description='Runs taxon_set, analyze, qa',
300                                        epilog='Example: checkm taxonomy_wf domain Bacteria ./bins ./output')
301
302    taxonomy_wf_parser.add_argument('rank', help="taxonomic rank", choices=['life', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
303    taxonomy_wf_parser.add_argument('taxon', help="taxon of interest")
304    taxonomy_wf_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
305    taxonomy_wf_parser.add_argument('output_dir', help="directory to write output files")
306    taxonomy_wf_parser.add_argument('--ali', dest='bKeepAlignment', action="store_true", default=False, help="generate HMMER alignment file for each bin")
307    taxonomy_wf_parser.add_argument('--nt', dest='bNucORFs', action="store_true", default=False, help="generate nucleotide gene sequences for each bin")
308    taxonomy_wf_parser.add_argument('-g', '--genes', dest='bCalledGenes', action="store_true", default=False, help="bins contain genes as amino acids instead of nucleotide contigs")
309    taxonomy_wf_parser.add_argument('--individual_markers', dest='bIndividualMarkers', action="store_true", default=False, help="treat marker as independent (i.e., ignore co-located set structure)")
310    taxonomy_wf_parser.add_argument('--skip_adj_correction', dest='bSkipAdjCorrection', action="store_true", default=False, help="do not exclude adjacent marker genes when estimating contamination")
311    taxonomy_wf_parser.add_argument('--skip_pseudogene_correction', dest='bSkipPseudoGeneCorrection', action="store_true", default=False, help="skip identification and filtering of pseudogenes")
312    taxonomy_wf_parser.add_argument('--aai_strain', type=float, default=0.9, help="AAI threshold used to identify strain heterogeneity")
313    taxonomy_wf_parser.add_argument('-a', '--alignment_file', default=None, help="produce file showing alignment of multi-copy genes and their AAI identity")
314    taxonomy_wf_parser.add_argument('--ignore_thresholds', dest='bIgnoreThresholds', action="store_true", default=False, help="ignore model-specific score thresholds")
315    taxonomy_wf_parser.add_argument('-e', '--e_value', type=float, default=DefaultValues.E_VAL, help="e-value cut off")
316    taxonomy_wf_parser.add_argument('-l', '--length', type=float, default=DefaultValues.LENGTH, help="percent overlap between target and query")
317    taxonomy_wf_parser.add_argument('-c', '--coverage_file', default=None, help="file containing coverage of each sequence; coverage information added to table type 2 (see coverage command)")
318    taxonomy_wf_parser.add_argument('-f', '--file', default='stdout', help="print results to file")
319    taxonomy_wf_parser.add_argument('--tab_table', dest='bTabTable', action="store_true", default=False, help="print tab-separated values table")
320    taxonomy_wf_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
321    taxonomy_wf_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
322    taxonomy_wf_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
323    taxonomy_wf_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
324
325    # generic arguments for plots
326    plot_need_qa_results_parser = argparse.ArgumentParser(add_help=False)
327    plot_need_qa_results_parser.add_argument('results_dir', help="directory specified during qa command")
328
329    plot_parser = argparse.ArgumentParser(add_help=False)
330    plot_parser.add_argument('bin_dir', help="directory containing bins to plot (fasta format)")
331    plot_parser.add_argument('output_dir', help="directory to hold plots")
332    plot_parser.add_argument('--image_type', default='png', choices=['eps', 'pdf', 'png', 'ps', 'svg'], help='desired image type')
333    plot_parser.add_argument('--dpi', type=int, default=600, help='desired DPI of output image')
334    plot_parser.add_argument('--font_size', type=int, default=8, help='Desired font size')
335    plot_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
336
337    plot_single_parser = argparse.ArgumentParser('plot_single',
338                                        parents=[plot_parser], add_help=False)
339    plot_single_parser.add_argument('--width', type=float, default=6.5, help='width of output image')
340    plot_single_parser.add_argument('--height', type=float, default=6.5, help='height of output image')
341
342    plot_double_parser = argparse.ArgumentParser('plot_double',
343                                        parents=[plot_parser], add_help=False)
344    plot_double_parser.add_argument('--width', type=float, default=6.5, help='width of output image')
345    plot_double_parser.add_argument('--height', type=float, default=3.5, help='height of output image')
346
347    plot_rows_parser = argparse.ArgumentParser('plot_rows',
348                                        parents=[plot_parser], add_help=False)
349    plot_rows_parser.add_argument('--width', type=float, default=6.5, help='width of output image')
350    plot_rows_parser.add_argument('--row_height', type=float, default=0.3, help='height of each row in the output image')
351
352    # GC plot
353    plot_gc_parser = subparsers.add_parser('gc_plot',
354                                        formatter_class=CustomHelpFormatter,
355                                        help='Create GC histogram and delta-GC plot.',
356                                        parents=[plot_double_parser],
357                                        description='Create GC histogram and delta-GC plot.',
358                                        epilog='Example: checkm gc_plot ./bins ./plots 95')
359
360    plot_gc_parser.add_argument('distributions', help='reference distribution(s) to plot; integer between 0 and 100', nargs='+', type=int, choices=xrange(0, 101), default=95, metavar='dist_value')
361    plot_gc_parser.add_argument('-w', '--gc_window_size', help="window size used to calculate GC histogram", type=int, default=5000)
362    plot_gc_parser.add_argument('-b', '--gc_bin_width', help="width of GC bars in histogram", type=float, default=0.01)
363    plot_gc_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
364
365    # Coding density plot
366    plot_coding_parser = subparsers.add_parser('coding_plot',
367                                        formatter_class=CustomHelpFormatter,
368                                        parents=[plot_need_qa_results_parser, plot_double_parser],
369                                        description='Create coding density (CD) histogram and delta-CD plot.',
370                                        epilog='Example: checkm coding_plot ./output ./bins ./plots 95')
371
372    plot_coding_parser.add_argument('distributions', help='reference distribution(s) to plot; integer between 0 and 100', nargs='+', type=int, choices=xrange(0, 101), default=95, metavar='dist_value')
373    plot_coding_parser.add_argument('-w', '--cd_window_size', help="window size used to calculate CD histogram", type=int, default=10000)
374    plot_coding_parser.add_argument('-b', '--cd_bin_width', help="width of CD bars in histogram", type=float, default=0.01)
375    plot_coding_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
376
377    # Tetranucleotide distance  plot
378    plot_tetra_parser = subparsers.add_parser('tetra_plot',
379                                        formatter_class=CustomHelpFormatter,
380                                        parents=[plot_need_qa_results_parser, plot_double_parser],
381                                        description='Create tetranucleotide distance (TD) histogram and delta-TD plot.',
382                                        epilog='Example: checkm tetra_plot ./output ./bins ./plots tetra.tsv 95')
383    plot_tetra_parser.add_argument('tetra_profile', help='tetranucleotide profiles for each bin (see tetra command)')
384    plot_tetra_parser.add_argument('distributions', help='reference distribution(s) to plot; integer between 0 and 100', nargs='+', type=int, choices=xrange(0, 101), default=95, metavar='dist_value')
385    plot_tetra_parser.add_argument('-w', '--td_window_size', help="window size used to calculate TD histogram", type=int, default=5000)
386    plot_tetra_parser.add_argument('-b', '--td_bin_width', help="width of TD bars in histogram", type=float, default=0.01)
387    plot_tetra_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
388
389    # Reference distribution  plot
390    plot_dist_parser = subparsers.add_parser('dist_plot',
391                                        formatter_class=CustomHelpFormatter,
392                                        description='Create image with GC, CD, and TD distribution plots together.',
393                                        epilog='Example: checkm dist_plot ./output ./bins ./plots tetra.tsv 95')
394    plot_dist_parser.add_argument('results_dir', help="directory specified during analyze command")
395    plot_dist_parser.add_argument('bin_dir', help="directory containing bins to plot (fasta format)")
396    plot_dist_parser.add_argument('output_dir', help="directory to hold plots")
397    plot_dist_parser.add_argument('tetra_profile', help='tetranucleotide profiles for each sequence (see tetra command)')
398    plot_dist_parser.add_argument('distributions', help='reference distribution(s) to plot; integer between 0 and 100', nargs='+', type=int, choices=xrange(0, 101), default=95, metavar='dist_value')
399
400    plot_dist_parser.add_argument('--image_type', default='png', choices=['eps', 'pdf', 'png', 'ps', 'svg'], help='desired image type')
401    plot_dist_parser.add_argument('--dpi', type=int, default=600, help='desired DPI of output image')
402    plot_dist_parser.add_argument('--font_size', type=int, default=8, help='Desired font size')
403    plot_dist_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
404    plot_dist_parser.add_argument('--width', type=float, default=6.5, help='width of output image')
405    plot_dist_parser.add_argument('--height', type=float, default=8, help='height of output image')
406
407    plot_dist_parser.add_argument('-a', '--gc_window_size', help="window size used to calculate GC histogram", type=int, default=5000)
408    plot_dist_parser.add_argument('-b', '--td_window_size', help="window size used to calculate TD histogram", type=int, default=5000)
409    plot_dist_parser.add_argument('-c', '--cd_window_size', help="window size used to calculate CD histogram", type=int, default=10000)
410    plot_dist_parser.add_argument('-1', '--gc_bin_width', help="width of GC bars in histogram", type=float, default=0.01)
411    plot_dist_parser.add_argument('-2', '--td_bin_width', help="width of TD bars in histogram", type=float, default=0.01)
412    plot_dist_parser.add_argument('-3', '--cd_bin_width', help="width of CD bars in histogram", type=float, default=0.01)
413    plot_dist_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
414
415    # PCA plot of tetranucleotide signatures
416    plot_tetra_pca_parser = subparsers.add_parser('tetra_pca',
417                                        formatter_class=CustomHelpFormatter,
418                                        parents=[plot_parser],
419                                        description='PCA plot of tetranucleotide signatures.',
420                                        epilog='Example: checkm tetra_pca ./bins ./plots tetra.tsv')
421    plot_tetra_pca_parser.add_argument('tetra_profile', help='tetranucleotide profiles for each sequence (see tetra command)')
422    plot_tetra_pca_parser.add_argument('--width', type=float, default=6.5, help='width of output image')
423    plot_tetra_pca_parser.add_argument('--height', type=float, default=6.5, help='height of output image')
424    plot_tetra_pca_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
425
426    # GC bias plots
427    plot_gc_bias_parser = subparsers.add_parser('gc_bias_plot',
428                                        formatter_class=CustomHelpFormatter,
429                                        parents=[plot_double_parser],
430                                        description='Plot bin coverage as a function of GC.',
431                                        epilog='Example: checkm gc_bias_plot ./bins ./plots example.bam')
432    plot_gc_bias_parser.add_argument('bam_file', help="BAM file to interrogate for coverage information")
433    plot_gc_bias_parser.add_argument('-w', '--window_size', help="window size used to calculate plot statistics", type=int, default=5000)
434    plot_gc_bias_parser.add_argument('-r', '--all_reads', action='store_true', help="use all reads to estimate coverage instead of just those in proper pairs")
435    plot_gc_bias_parser.add_argument('-a', '--min_align', help='minimum alignment length as percentage of read length', type=float, default=0.98)
436    plot_gc_bias_parser.add_argument('-e', '--max_edit_dist', help='maximum edit distance as percentage of read length', type=float, default=0.02)
437    plot_gc_bias_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
438    plot_gc_bias_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
439
440    # PCA plot of coverage profiles
441    plot_cov_pca_parser = subparsers.add_parser('cov_pca',
442                                        formatter_class=CustomHelpFormatter,
443                                        parents=[plot_parser],
444                                        description='PCA plot of coverage profiles.',
445                                        epilog='Example: checkm cov_pca ./bins ./plots coverate.tsv')
446    plot_cov_pca_parser.add_argument('coverage_file', help="file indicating coverage of each sequence (see coverage command)")
447    plot_cov_pca_parser.add_argument('--width', type=float, default=6.5, help='width of output image')
448    plot_cov_pca_parser.add_argument('--height', type=float, default=6.5, help='height of output image')
449    plot_cov_pca_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
450
451    # Nx-plot
452    plot_nx_parser = subparsers.add_parser('nx_plot',
453                                        formatter_class=CustomHelpFormatter,
454                                        parents=[plot_single_parser],
455                                        description='Create Nx-plots.',
456                                        epilog='Example: checkm nx_plot ./bins ./plots')
457
458    plot_nx_parser.add_argument('-s', '--step_size', help="x step size for calculating Nx", type=float, default=0.05)
459    plot_nx_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
460
461    # Cumulative sequence length plot
462    plot_len_parser = subparsers.add_parser('len_plot',
463                                        formatter_class=CustomHelpFormatter,
464                                        parents=[plot_single_parser],
465                                        description='Cumulative sequence length plot.',
466                                        epilog='Example: checkm len_plot ./bins ./plots')
467
468    plot_len_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
469
470    # Sequence length distribution plot
471    hist_len_parser = subparsers.add_parser('len_hist',
472                                        formatter_class=CustomHelpFormatter,
473                                            parents=[plot_single_parser],
474                                            description='Sequence length histogram.',
475                                        epilog='Example: checkm len_hist ./bins ./plots')
476
477    hist_len_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
478
479    # Marker position plot
480    marker_plot_parser = subparsers.add_parser('marker_plot',
481                                        formatter_class=CustomHelpFormatter,
482                                        parents=[plot_need_qa_results_parser, plot_single_parser],
483                                        description='Plot position of marker genes on sequences.',
484                                        epilog='Example: checkm marker_plot ./output ./bins ./plots')
485
486    marker_plot_parser.add_argument('--fig_padding', type=float, default=0.2, help='white space to place around figure (in inches)')
487    marker_plot_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
488
489    # Parallel coordinate plot
490    parallel_coord_plot_parser = subparsers.add_parser('par_plot',
491                                        formatter_class=CustomHelpFormatter,
492                                        parents=[plot_need_qa_results_parser, plot_single_parser],
493                                        description='Parallel coordinate plot of GC and coverage.',
494                                        epilog='Example: checkm par_plot ./output ./bins ./plots coverage.tsv')
495    parallel_coord_plot_parser.add_argument('coverage_file', help="file indicating coverage of each sequence (see coverage command)")
496    parallel_coord_plot_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
497
498    # Bin QA plot
499    bin_qa_plot_parser = subparsers.add_parser('bin_qa_plot',
500                                        formatter_class=CustomHelpFormatter,
501                                        parents=[plot_need_qa_results_parser, plot_rows_parser],
502                                        description='Bar plot of bin completeness, contamination, and strain heterogeneity.',
503                                        epilog='Example: checkm bin_qa_plot ./output ./bins ./plots')
504    bin_qa_plot_parser.add_argument('--ignore_hetero', dest='bIgnoreHetero', action="store_true", help="do not plot strain heterogeneity")
505    bin_qa_plot_parser.add_argument('--aai_strain', type=float, default=0.9, help="AAI threshold used to identify strain heterogeneity")
506    bin_qa_plot_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
507
508    # Identify unbinned sequences
509    unbinned_parser = subparsers.add_parser('unbinned',
510                                            formatter_class=CustomHelpFormatter,
511                                            description='Identify unbinned sequences.',
512                                            epilog='Example: checkm unbinned ./bins seqs.fna unbinned.fna unbinned_stats.tsv')
513    unbinned_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
514    unbinned_parser.add_argument('seq_file', help="sequences used to generate bins (fasta format)")
515    unbinned_parser.add_argument('output_seq_file', help="write unbinned sequences to file")
516    unbinned_parser.add_argument('output_stats_file', help="write unbinned sequence statistics to file")
517    unbinned_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
518    unbinned_parser.add_argument('-s', '--min_seq_len', type=int, default=0, help="required length of sequence")
519    unbinned_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
520
521    # Calculate coverage
522    coverage_parser = subparsers.add_parser('coverage',
523                                            formatter_class=CustomHelpFormatter,
524                                            description='Calculate coverage of sequences.',
525                                            epilog='Example: checkm coverage ./bins coverage.tsv example_1.bam example_2.bam')
526
527    coverage_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
528    coverage_parser.add_argument('output_file', help="print results to file")
529    coverage_parser.add_argument('bam_files', nargs='+', help="BAM files to parse")
530    coverage_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
531    coverage_parser.add_argument('-r', '--all_reads', action='store_true', help="use all reads to estimate coverage instead of just those in proper pairs")
532    coverage_parser.add_argument('-a', '--min_align', help='minimum alignment length as percentage of read length', type=float, default=0.98)
533    coverage_parser.add_argument('-e', '--max_edit_dist', help='maximum edit distance as percentage of read length', type=float, default=0.02)
534    coverage_parser.add_argument('-m', '--min_qc', help='minimum quality score (in phred)', type=int, default=15)
535    coverage_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
536    coverage_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
537
538    # Calculate tetranucleotide signatures
539    tetra_parser = subparsers.add_parser('tetra',
540                                            formatter_class=CustomHelpFormatter,
541                                            description='Calculate tetranucleotide signature of sequences.',
542                                            epilog='Example: checkm tetra seqs.fna tetra.tsv')
543
544    tetra_parser.add_argument('seq_file', help="sequences used to generate bins (fasta format)")
545    tetra_parser.add_argument('output_file', help="print results to file")
546    tetra_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
547    tetra_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
548
549    # Calculate community profile
550    profile_parser = subparsers.add_parser('profile',
551                                            formatter_class=CustomHelpFormatter,
552                                            description='Calculate percentage of reads mapped to each bin.',
553                                            epilog='Example: checkm profile coverage.tsv')
554    profile_parser.add_argument('coverage_file', help="file indicating coverage of each sequence (see coverage command)")
555    profile_parser.add_argument('-f', '--file', default='stdout', help="print results to file")
556    profile_parser.add_argument('--tab_table', dest='bTabTable', action="store_true", default=False, help="print tab-separated values table")
557    profile_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
558
559    # Join tab-separated values file
560    join_parser = subparsers.add_parser('join_tables',
561                                            formatter_class=CustomHelpFormatter,
562                                            description='Join tab-separated value tables containing bin information.',
563                                            epilog='Example: checkm join_tables table1.tsv table2.tsv')
564    join_parser.add_argument('tables', nargs='+', help="tab-separated table files with bin ids as their primary key")
565    join_parser.add_argument('-f', '--file', default='stdout', help="print results to file")
566    join_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
567
568    # Find SSU rRNAs in sequences
569    ssu_finder_parser = subparsers.add_parser('ssu_finder',
570                                              formatter_class=CustomHelpFormatter,
571                                              description='Identify SSU (16S/18S) rRNAs in sequences.',
572                                              epilog='Example: checkm ssu_finder seqs.fna ./bins ./ssu_finder')
573    ssu_finder_parser.add_argument('seq_file', help="sequences used to generate bins (fasta format)")
574    ssu_finder_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
575    ssu_finder_parser.add_argument('output_dir', help="directory to write output files")
576
577    ssu_finder_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
578    ssu_finder_parser.add_argument('-e', '--evalue', help='e-value threshold for identifying hits', type=float, default=1e-5)
579    ssu_finder_parser.add_argument('-c', '--concatenate', help='concatenate hits that are within the specified number of base pairs', type=int, default=200)
580    ssu_finder_parser.add_argument('-t', '--threads', help='number of threads', type=int, default=1)
581    ssu_finder_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
582
583    # Compare two sets of bins (e.g., from alternative binning methods)
584    bin_compare_parser = subparsers.add_parser('bin_compare',
585                                               formatter_class=CustomHelpFormatter,
586                                               description='Compare two sets of bins.',
587                                               epilog='Example: checkm bin_compare seqs.fna ./bins1 ./bins2 bin_comparison.tsv')
588    bin_compare_parser.add_argument('seq_file', help="sequences used to generate bins (fasta format)")
589    bin_compare_parser.add_argument('bin_dir1', help="directory containing bins (fasta format)")
590    bin_compare_parser.add_argument('bin_dir2', help="directory containing bins (fasta format)")
591    bin_compare_parser.add_argument('output_file', help="output file showing overlap between bins")
592
593    bin_compare_parser.add_argument('-x', '--extension1', default='fna', help="extension of bins in directory 1")
594    bin_compare_parser.add_argument('-y', '--extension2', default='fna', help="extension of bins in directory 2")
595    bin_compare_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
596
597    # Remove redundant bins from several binning attempts
598    bin_union_parser = subparsers.add_parser('bin_union',
599                                               formatter_class=argparse.RawDescriptionHelpFormatter,
600                                               description='Create a non-redundant set of bins from multiple sets of bins',
601                                               epilog='''Example: checkm bin_union_output bins1/ checkm_qa_tab_table1.tsv bins2/ checkm_qa_tab_table2.tsv
602
603The checkm_qa_tab_table.tsv files are generated through this process for each bin.
604checkm qa --tab_table ./output/lineage.ms ./output >checkm_qa_tab_table.tsv
605
606Also note that sequences can be assigned to multiple resulting bins.
607i.e. checkm unique will now fail''')
608    bin_union_parser.add_argument('output_dir', help="directory for outputting")
609    bin_union_parser.add_argument('bin_or_checkm_qa_table', nargs='+', help="bin directories and checkm_qa_table_tables (must have at least one of each)")
610    bin_union_parser.add_argument('-x', '--extension', default='fna', help="extension of bins in bin directories")
611    bin_union_parser.add_argument('--min_completeness', help="ignore bins with less completeness than this, as a percentage e.g. '70'", type=float, default=70)
612    bin_union_parser.add_argument('--max_contamination', help="ignore bins with more contamination than this, as a percentage e.g. '10'", type=float, default=10)
613    bin_union_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
614
615    # Identify bins with complementary marker sets
616    merge_parser = subparsers.add_parser('merge',
617                                            formatter_class=CustomHelpFormatter,
618                                            description='Identify bins with complementary sets of marker genes.',
619                                            epilog='Example: checkm merge bacteria.ms ./bins ./output')
620    merge_parser.add_argument('marker_file', help="marker file to use for assessing potential bin mergers (marker set or HMM file)")
621    merge_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
622    merge_parser.add_argument('output_dir', help="directory to write output files")
623    merge_parser.add_argument('-g', '--genes', dest='bCalledGenes', action="store_true", default=False, help="bins contain genes as amino acids instead of nucleotide contigs")
624    merge_parser.add_argument('--delta_comp', help="minimum increase in completeness to report pair", type=float, default=5.0)
625    merge_parser.add_argument('--delta_cont', help="maximum increase in contamination to report pair", type=float, default=10.0)
626    merge_parser.add_argument('--merged_comp', help="minimum merged completeness to report pair", type=float, default=50.0)
627    merge_parser.add_argument('--merged_cont', help="maximum merged contamination to report pair", type=float, default=20.0)
628    merge_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
629    merge_parser.add_argument('-t', '--threads', type=int, default=1, help="number of threads")
630    merge_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
631
632    # Identify outlier sequences
633    outlier_parser = subparsers.add_parser('outliers',
634                                            formatter_class=CustomHelpFormatter,
635                                            parents=[plot_need_qa_results_parser],
636                                            description='Identify outliers in bins relative to reference distributions.',
637                                            epilog='Example: checkm outliers ./output ./bins tetra.tsv outliers.tsv')
638    outlier_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
639    outlier_parser.add_argument('tetra_profile', help='tetranucleotide profiles for each sequence (see tetra command)')
640    outlier_parser.add_argument('output_file', help="print results to file")
641    outlier_parser.add_argument('-d', '--distributions', help='reference distribution used to identify outliers; integer between 0 and 100', type=int, choices=xrange(0, 101), default=95, metavar='dist_value')
642    outlier_parser.add_argument('-r', '--report_type', help="report sequences that are outliers in 'all' or 'any' reference distribution", choices=['any', 'all'], default='any')
643    outlier_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (other files in directory are ignored)")
644    outlier_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
645
646    # Modify a bin
647    modify_parser = subparsers.add_parser('modify',
648                                            formatter_class=CustomHelpFormatter,
649                                            description='Modify sequences in a bin.',
650                                            epilog='Example: checkm modify -r seq_id1 -r seq_id2 seqs.fna bin.fna new_bin.fna')
651    modify_parser.add_argument('seq_file', help="sequences used to generate bins (fasta format)")
652    modify_parser.add_argument('bin_file', help="bin to be modified")
653    modify_parser.add_argument('output_file', help="modified bin")
654    modify_parser.add_argument('-a', '--add', action='append', help="ID of sequence to add to bin (may specify multiple times)")
655    modify_parser.add_argument('-r', '--remove', action='append', help="ID of sequence to remove from bin (may specify multiple times)")
656    modify_parser.add_argument('-o', '--outlier_file', help="remove all sequences marked as outliers in the bin (see outlier command)")
657    modify_parser.add_argument('-q', '--quiet', dest='bQuiet', action="store_true", default=False, help="suppress console output")
658
659    # Ensure uniqueness of bins
660    unique_parser = subparsers.add_parser('unique',
661                                            formatter_class=CustomHelpFormatter,
662                                            description='Ensure no sequences are assigned to multiple bins.',
663                                            epilog='Example: checkm unique ./bins')
664    unique_parser.add_argument('bin_dir', help="directory containing bins (fasta format)")
665    unique_parser.add_argument('-x', '--extension', default='fna', help="extension of bins (all other files in bin directory are ignored)")
666
667    # Quick test of CheckM
668    test_parser = subparsers.add_parser('test',
669                                            formatter_class=CustomHelpFormatter,
670                                            description='Test CheckM on E. coli genome.',
671                                            epilog='Example: checkm test ~/checkm_test')
672    test_parser.add_argument('output_dir', help="output directory for test data")
673    test_parser.add_argument('--tmpdir', action=ChangeTempAction, help="specify an alternative directory for temporary files")
674
675    # debug and development
676    if False:
677        debug_parser = subparsers.add_parser('debug',
678                                            formatter_class=CustomHelpFormatter,
679                                            description='Rogue mode for use in testing new features.')
680        debug_parser.add_argument('data', help="some data")
681
682    # get and check options
683    args = None
684    if(len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help'):
685        printHelp()
686        sys.exit(0)
687    else:
688        args = parser.parse_args()
689
690    # setup logging
691    silent = False
692    if hasattr(args, 'bQuiet') and args.bQuiet:
693        silent = True
694
695    try:
696        logger_setup(args.output_dir, "checkm.log", "CheckM", version(), silent)
697    except:
698        logger_setup(None, "checkm.log", "CheckM", version(), silent)
699
700    # do what we came here to do
701    try:
702        checkmParser = main.OptionsParser()
703        if(False):
704            # import pstats
705            # p = pstats.Stats('prof')
706            # p.sort_stats('cumulative').print_stats(10)
707            # p.sort_stats('time').print_stats(10)
708            import cProfile
709            cProfile.run('checkmParser.parseOptions(args)', 'prof')
710        elif False:
711            import pdb
712            pdb.run(checkmParser.parseOptions(args))
713        else:
714            checkmParser.parseOptions(args)
715    except SystemExit:
716        print "\n  Controlled exit resulting from an unrecoverable error or warning."
717    except:
718        print "\nUnexpected error:", sys.exc_info()[0]
719        raise
720