1 /*
2  * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3  *
4  * This file is part of Bowtie 2.
5  *
6  * Bowtie 2 is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Bowtie 2 is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <zlib.h>
21 #include <iostream>
22 #include <fstream>
23 #include <string>
24 #include <cassert>
25 #include <getopt.h>
26 #include "assert_helpers.h"
27 #include "endian_swap.h"
28 #include "bt2_idx.h"
29 #include "formats.h"
30 #include "sequence_io.h"
31 #include "tokenize.h"
32 #include "timer.h"
33 #include "ref_read.h"
34 #include "filebuf.h"
35 #include "reference.h"
36 #include "ds.h"
37 #ifdef WITH_ZSTD
38 #include "zstd_decompress.h"
39 #endif
40 
41 /**
42  * \file Driver for the bowtie-build indexing tool.
43  */
44 
45 // Build parameters
46 int verbose;
47 static int sanityCheck;
48 static int format;
49 static TIndexOffU bmax;
50 static TIndexOffU bmaxMultSqrt;
51 static uint32_t bmaxDivN;
52 static int dcv;
53 static int noDc;
54 static int entireSA;
55 static int seed;
56 static int showVersion;
57 //   Ebwt parameters
58 static int32_t lineRate;
59 static int32_t linesPerSide;
60 static int32_t offRate;
61 static int32_t ftabChars;
62 static int  bigEndian;
63 static bool nsToAs;    // convert Ns to As
64 static bool doSaFile;  // make a file with just the suffix array in it
65 static bool doBwtFile; // make a file with just the BWT string in it
66 static bool autoMem;
67 static bool packed;
68 static bool writeRef;
69 static bool justRef;
70 static bool reverseEach;
71 static int nthreads;
72 static string wrapper;
73 
resetOptions()74 static void resetOptions() {
75 	verbose      = true;  // be talkative (default)
76 	sanityCheck  = 0;     // do slow sanity checks
77 	format       = FASTA; // input sequence format
78 	bmax         = OFF_MASK; // max blockwise SA bucket size
79 	bmaxMultSqrt = OFF_MASK; // same, as multplier of sqrt(n)
80 	bmaxDivN     = 4;          // same, as divisor of n
81 	dcv          = 1024;  // bwise SA difference-cover sample sz
82 	noDc         = 0;     // disable difference-cover sample
83 	entireSA     = 0;     // 1 = disable blockwise SA
84 	seed         = 0;     // srandom seed
85 	showVersion  = 0;     // just print version and quit?
86 	//   Ebwt parameters
87 	lineRate     = Ebwt::default_lineRate; // a "line" is 64 or 128 bytes
88 	linesPerSide = 1;  // 1 64-byte line on a side
89 	offRate      = 4;  // sample 1 out of 16 SA elts
90 	ftabChars    = 10; // 10 chars in initial lookup table
91 	bigEndian    = 0;  // little endian
92 	nsToAs       = false; // convert reference Ns to As prior to indexing
93 	doSaFile     = false; // make a file with just the suffix array in it
94 	doBwtFile    = false; // make a file with just the BWT string in it
95 	autoMem      = true;  // automatically adjust memory usage parameters
96 	packed       = false; //
97 	writeRef     = true;  // write compact reference to .3.gEbwt_ext/.4.gEbwt_ext
98 	justRef      = false; // *just* write compact reference, don't index
99 	reverseEach  = false;
100 	nthreads     = 1;
101 	wrapper.clear();
102 }
103 
104 // Argument constants for getopts
105 enum {
106 	ARG_BMAX = 256,
107 	ARG_BMAX_MULT,
108 	ARG_BMAX_DIV,
109 	ARG_DCV,
110 	ARG_SEED,
111 	ARG_CUTOFF,
112 	ARG_PMAP,
113 	ARG_NTOA,
114 	ARG_USAGE,
115 	ARG_REVERSE_EACH,
116 	ARG_SA,
117 	ARG_THREADS,
118 	ARG_WRAPPER
119 };
120 
121 /**
122  * Print a detailed usage message to the provided output stream.
123  */
printUsage(ostream & out)124 static void printUsage(ostream& out) {
125 	out << "Bowtie 2 version " << string(BOWTIE2_VERSION).c_str() << " by Ben Langmead (langmea@cs.jhu.edu, www.cs.jhu.edu/~langmea)" << endl;
126 
127 #ifdef BOWTIE_64BIT_INDEX
128 	string tool_name = "bowtie2-build-l";
129 #else
130 	string tool_name = "bowtie2-build-s";
131 #endif
132 	if(wrapper == "basic-0") {
133 		tool_name = "bowtie2-build";
134 	}
135 
136 	//               1         2         3         4         5         6         7         8
137 	//      12345678901234567890123456789012345678901234567890123456789012345678901234567890
138 	out << "Usage: " << tool_name << " [options]* <reference_in> <bt2_index_base>" << endl
139 	    << "    reference_in            comma-separated list of files with ref sequences" << endl
140 	    << "    bt2_index_base          write " + gEbwt_ext + " data to files with this dir/basename" << endl
141 	    << "*** Bowtie 2 indexes work only with v2 (not v1).  Likewise for v1 indexes. ***" << endl
142 	    << "Options:" << endl
143 	    << "    -f                      reference files are Fasta (default)" << endl
144 	    << "    -c                      reference sequences given on cmd line (as" << endl
145 	    << "                            <reference_in>)" << endl;
146 	if(wrapper == "basic-0") {
147 	out << "    --large-index           force generated index to be 'large', even if ref" << endl
148 		<< "                            has fewer than 4 billion nucleotides" << endl
149 		<< "    --debug                 use the debug binary; slower, assertions enabled" << endl
150 		<< "    --sanitized             use sanitized binary; slower, uses ASan and/or UBSan" << endl
151 		<< "    --verbose               log the issued command" << endl;
152 	}
153 	out << "    -a/--noauto             disable automatic -p/--bmax/--dcv memory-fitting" << endl
154 	    << "    -p/--packed             use packed strings internally; slower, less memory" << endl
155 	    << "    --bmax <int>            max bucket sz for blockwise suffix-array builder" << endl
156 	    << "    --bmaxdivn <int>        max bucket sz as divisor of ref len (default: 4)" << endl
157 	    << "    --dcv <int>             diff-cover period for blockwise (default: 1024)" << endl
158 	    << "    --nodc                  disable diff-cover (algorithm becomes quadratic)" << endl
159 	    << "    -r/--noref              don't build .3/.4 index files" << endl
160 	    << "    -3/--justref            just build .3/.4 index files" << endl
161 	    << "    -o/--offrate <int>      SA is sampled every 2^<int> BWT chars (default: 5)" << endl
162 	    << "    -t/--ftabchars <int>    # of chars consumed in initial lookup (default: 10)" << endl
163 	    << "    --threads <int>         # of threads" << endl
164 	    //<< "    --ntoa                  convert Ns in reference to As" << endl
165 	    //<< "    --big --little          endianness (default: little, this host: "
166 	    //<< (currentlyBigEndian()? "big":"little") << ")" << endl
167 	    << "    --seed <int>            seed for random number generator" << endl
168 	    << "    -q/--quiet              verbose output (for debugging)" << endl
169 	    << "    -h/--help               print detailed description of tool and its options" << endl
170 	    << "    --usage                 print this usage message" << endl
171 	    << "    --version               print version information and quit" << endl
172 	    ;
173 	if(wrapper.empty()) {
174 		cerr << endl
175 		     << "*** Warning ***" << endl
176 			 << "'" << tool_name << "' was run directly.  It is recommended "
177 			 << "that you run the wrapper script 'bowtie2-build' instead."
178 			 << endl << endl;
179 	}
180 }
181 
182 static const char *short_options = "qraph?nscfl:i:o:t:h:3C";
183 
184 static struct option long_options[] = {
185 	{(char*)"quiet",        no_argument,       0,            'q'},
186 	{(char*)"sanity",       no_argument,       0,            's'},
187 	{(char*)"packed",       no_argument,       0,            'p'},
188 	{(char*)"little",       no_argument,       &bigEndian,   0},
189 	{(char*)"big",          no_argument,       &bigEndian,   1},
190 	{(char*)"bmax",         required_argument, 0,            ARG_BMAX},
191 	{(char*)"bmaxmultsqrt", required_argument, 0,            ARG_BMAX_MULT},
192 	{(char*)"bmaxdivn",     required_argument, 0,            ARG_BMAX_DIV},
193 	{(char*)"dcv",          required_argument, 0,            ARG_DCV},
194 	{(char*)"nodc",         no_argument,       &noDc,        1},
195 	{(char*)"seed",         required_argument, 0,            ARG_SEED},
196 	{(char*)"entiresa",     no_argument,       &entireSA,    1},
197 	{(char*)"version",      no_argument,       &showVersion, 1},
198 	{(char*)"noauto",       no_argument,       0,            'a'},
199 	{(char*)"noblocks",     required_argument, 0,            'n'},
200 	{(char*)"linerate",     required_argument, 0,            'l'},
201 	{(char*)"linesperside", required_argument, 0,            'i'},
202 	{(char*)"offrate",      required_argument, 0,            'o'},
203 	{(char*)"ftabchars",    required_argument, 0,            't'},
204 	{(char*)"help",         no_argument,       0,            'h'},
205 	{(char*)"ntoa",         no_argument,       0,            ARG_NTOA},
206 	{(char*)"justref",      no_argument,       0,            '3'},
207 	{(char*)"noref",        no_argument,       0,            'r'},
208 	{(char*)"sa",           no_argument,       0,            ARG_SA},
209 	{(char*)"reverse-each", no_argument,       0,            ARG_REVERSE_EACH},
210 	{(char*)"threads",      required_argument, 0,            ARG_THREADS},
211 	{(char*)"usage",        no_argument,       0,            ARG_USAGE},
212 	{(char*)"wrapper",      required_argument, 0,            ARG_WRAPPER},
213 	{(char*)0, 0, 0, 0} // terminator
214 };
215 
216 /**
217  * Parse an int out of optarg and enforce that it be at least 'lower';
218  * if it is less than 'lower', then output the given error message and
219  * exit with an error and a usage message.
220  */
221 template<typename T>
parseNumber(T lower,const char * errmsg)222 static T parseNumber(T lower, const char *errmsg) {
223 	char *endPtr= NULL;
224 	T t = (T)strtoll(optarg, &endPtr, 10);
225 	if (endPtr != NULL) {
226 		if (t < lower) {
227 			cerr << errmsg << endl;
228 			printUsage(cerr);
229 			throw 1;
230 		}
231 		return t;
232 	}
233 	cerr << errmsg << endl;
234 	printUsage(cerr);
235 	throw 1;
236 	return -1;
237 }
238 
239 /**
240  * Read command-line arguments
241  */
parseOptions(int argc,const char ** argv)242 static bool parseOptions(int argc, const char **argv) {
243 	int option_index = 0;
244 	int next_option;
245 	bool bmaxDivNSet = false;
246 	bool abort = false;
247 	do {
248 		next_option = getopt_long(
249 			argc, const_cast<char**>(argv),
250 			short_options, long_options, &option_index);
251 		switch (next_option) {
252 			case ARG_WRAPPER:
253 				wrapper = optarg;
254 				break;
255 			case 'f': format = FASTA; break;
256 			case 'c': format = CMDLINE; break;
257 			case 'p': packed = true; break;
258 			case 'l':
259 				lineRate = parseNumber<int>(3, "-l/--lineRate arg must be at least 3");
260 				break;
261 			case 'i':
262 				linesPerSide = parseNumber<int>(1, "-i/--linesPerSide arg must be at least 1");
263 				break;
264 			case 'o':
265 				offRate = parseNumber<int>(0, "-o/--offRate arg must be at least 0");
266 				break;
267 			case '3':
268 				justRef = true;
269 				break;
270 			case 't':
271 				ftabChars = parseNumber<int>(1, "-t/--ftabChars arg must be at least 1");
272 				if (ftabChars > 16) {
273 					std::cerr << "-t/--ftabChars arg must not exceed 16" << std::endl;
274 					throw 1;
275 				}
276 				break;
277 			case 'n':
278 				// all f-s is used to mean "not set", so put 'e' on end
279 				bmax = 0xfffffffe;
280 				break;
281 			case 'h':
282 			case ARG_USAGE:
283 				printUsage(cout);
284 				abort = true;
285 				break;
286 			case ARG_BMAX:
287 				bmax = parseNumber<TIndexOffU>(1, "--bmax arg must be at least 1");
288 				bmaxMultSqrt = OFF_MASK; // don't use multSqrt
289 				bmaxDivN = 0xffffffff;     // don't use multSqrt
290 				break;
291 			case ARG_BMAX_MULT:
292 				bmaxMultSqrt = parseNumber<TIndexOffU>(1, "--bmaxmultsqrt arg must be at least 1");
293 				bmax = OFF_MASK;     // don't use bmax
294 				bmaxDivN = 0xffffffff; // don't use multSqrt
295 				break;
296 			case ARG_BMAX_DIV:
297 				bmaxDivNSet = true;
298 				bmaxDivN = parseNumber<uint32_t>(1, "--bmaxdivn arg must be at least 1");
299 				bmax = OFF_MASK;         // don't use bmax
300 				bmaxMultSqrt = OFF_MASK; // don't use multSqrt
301 				break;
302 			case ARG_DCV:
303 				dcv = parseNumber<int>(3, "--dcv arg must be at least 3");
304 				break;
305 			case ARG_SEED:
306 				seed = parseNumber<int>(0, "--seed arg must be at least 0");
307 				break;
308 			case ARG_REVERSE_EACH:
309 				reverseEach = true;
310 				break;
311 			case ARG_SA:
312 				doSaFile = true;
313 				break;
314 			case ARG_NTOA: nsToAs = true; break;
315 			case ARG_THREADS:
316 				nthreads = parseNumber<int>(0, "--threads arg must be at least 1");
317 				break;
318 			case 'a': autoMem = false; break;
319 			case 'q': verbose = false; break;
320 			case 's': sanityCheck = true; break;
321 			case 'r': writeRef = false; break;
322 
323 			case -1: /* Done with options. */
324 				break;
325 			case 0:
326 				if (long_options[option_index].flag != 0)
327 					break;
328 			default:
329 				printUsage(cerr);
330 				throw 1;
331 		}
332 	} while(next_option != -1);
333 	if(bmax < 40) {
334 		cerr << "Warning: specified bmax is very small (" << bmax << ").  This can lead to" << endl
335 		     << "extremely slow performance and memory exhaustion.  Perhaps you meant to specify" << endl
336 		     << "a small --bmaxdivn?" << endl;
337 	}
338 	if (!bmaxDivNSet) {
339 		bmaxDivN *= nthreads;
340 	}
341 	return abort;
342 }
343 
344 EList<string> filesWritten;
345 
346 /**
347  * Delete all the index files that we tried to create.  For when we had to
348  * abort the index-building process due to an error.
349  */
deleteIdxFiles(const string & outfile,bool doRef,bool justRef)350 static void deleteIdxFiles(
351 	const string& outfile,
352 	bool doRef,
353 	bool justRef)
354 {
355 
356 	for(size_t i = 0; i < filesWritten.size(); i++) {
357 		cerr << "Deleting \"" << filesWritten[i].c_str()
358 		     << "\" file written during aborted indexing attempt." << endl;
359 		remove(filesWritten[i].c_str());
360 	}
361 }
362 
363 /**
364  * Drive the index construction process and optionally sanity-check the
365  * result.
366  */
367 template<typename TStr>
driver(const string & infile,EList<string> & infiles,const string & outfile,bool packed,int reverse)368 static void driver(
369 	const string& infile,
370 	EList<string>& infiles,
371 	const string& outfile,
372 	bool packed,
373 	int reverse)
374 {
375 	EList<FileBuf*> is(MISC_CAT);
376 	bool bisulfite = false;
377 	RefReadInParams refparams(false, reverse, nsToAs, bisulfite);
378 	assert_gt(infiles.size(), 0);
379 	if(format == CMDLINE) {
380 		// Adapt sequence strings to stringstreams open for input
381 		stringstream *ss = new stringstream();
382 		for(size_t i = 0; i < infiles.size(); i++) {
383 			(*ss) << ">" << i << endl << infiles[i].c_str() << endl;
384 		}
385 		FileBuf *fb = new FileBuf(ss);
386 		assert(fb != NULL);
387 		assert(!fb->eof());
388 		assert(fb->get() == '>');
389 		ASSERT_ONLY(fb->reset());
390 		assert(!fb->eof());
391 		is.push_back(fb);
392 	} else {
393 		// Adapt sequence files to ifstreams
394 		for(size_t i = 0; i < infiles.size(); i++) {
395 			FileBuf *fb;
396 
397 			size_t idx = infiles[i].find_last_of(".");
398 			std::string ext = (idx == std::string::npos) ? "" : infiles[i].substr(idx + 1);
399 			if (ext == "" || ext == "gz" || ext == "Z") {
400 				gzFile zFp = gzopen(infiles[i].c_str(), "rb");
401 				if (zFp == NULL) {
402 					cerr << "Error: could not open "<< infiles[i].c_str() << endl;
403 					throw 1;
404 				}
405 				fb = new FileBuf(zFp);
406 #ifdef WITH_ZSTD
407 			} else if (ext == "zstd" || ext == "zst") {
408 				zstdStrm *zstdFp = zstdOpen(infiles[i].c_str());
409 				if (zstdFp == NULL) {
410 					cerr << "Error: could not open " << infiles[i].c_str() << endl;
411 					throw 1;
412 				}
413 				fb = new FileBuf(zstdFp);
414 #endif
415 			} else {
416 				FILE *f = fopen(infiles[i].c_str(), "rb");
417 				if (f == NULL) {
418 					cerr << "Error: could not open "<< infiles[i].c_str() << endl;
419 					throw 1;
420 				}
421 				fb = new FileBuf(f);
422 			}
423 			assert(fb != NULL);
424 			if(fb->peek() == -1 || fb->eof()) {
425 				cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl;
426 				continue;
427 			}
428 			assert(!fb->eof());
429 			assert(fb->get() == '>');
430 			ASSERT_ONLY(fb->reset());
431 			assert(!fb->eof());
432 			is.push_back(fb);
433 		}
434 	}
435 	if(is.empty()) {
436 		cerr << "Warning: All fasta inputs were empty" << endl;
437 		throw 1;
438 	}
439 	if(!reverse) {
440 #ifdef BOWTIE_64BIT_INDEX
441 		if (verbose) cerr << "Building a LARGE index" << endl;
442 #else
443 		if (verbose) cerr << "Building a SMALL index" << endl;
444 #endif
445 	}
446 	// Vector for the ordered list of "records" comprising the input
447 	// sequences.  A record represents a stretch of unambiguous
448 	// characters in one of the input sequences.
449 	EList<RefRecord> szs(MISC_CAT);
450 	std::pair<size_t, size_t> sztot;
451 	{
452 		if(verbose) cout << "Reading reference sizes" << endl;
453 		Timer _t(cout, "  Time reading reference sizes: ", verbose);
454 		if(!reverse && (writeRef || justRef)) {
455 			filesWritten.push_back(outfile + ".3." + gEbwt_ext);
456 			filesWritten.push_back(outfile + ".4." + gEbwt_ext);
457 			sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck);
458 		} else {
459 			sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck);
460 		}
461 	}
462 	if(justRef) return;
463 	assert_gt(sztot.first, 0);
464 	assert_gt(sztot.second, 0);
465 	assert_gt(szs.size(), 0);
466 	// Construct index from input strings and parameters
467 	filesWritten.push_back(outfile + ".1." + gEbwt_ext);
468 	filesWritten.push_back(outfile + ".2." + gEbwt_ext);
469 	Ebwt ebwt(
470 		TStr(),
471 		packed,
472 		0,
473 		1,            // TODO: maybe not?
474 		lineRate,
475 		offRate,      // suffix-array sampling rate
476 		ftabChars,    // number of chars in initial arrow-pair calc
477 		nthreads,     // number of threads
478 		outfile,      // basename for .?.ebwt files
479 		reverse == 0, // fw
480 		!entireSA,    // useBlockwise
481 		bmax,         // block size for blockwise SA builder
482 		bmaxMultSqrt, // block size as multiplier of sqrt(len)
483 		bmaxDivN,     // block size as divisor of len
484 		noDc? 0 : dcv,// difference-cover period
485 		is,           // list of input streams
486 		szs,          // list of reference sizes
487 		(TIndexOffU)sztot.first,  // total size of all unambiguous ref chars
488 		refparams,    // reference read-in parameters
489 		seed,         // pseudo-random number generator seed
490 		-1,           // override offRate
491 		doSaFile,     // make a file with just the suffix array in it
492 		doBwtFile,    // make a file with just the BWT string in it
493 		verbose,      // be talkative
494 		autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
495 		sanityCheck); // verify results and internal consistency
496 	// Note that the Ebwt is *not* resident in memory at this time.  To
497 	// load it into memory, call ebwt.loadIntoMemory()
498 	if(verbose) {
499 		// Print Ebwt's vital stats
500 		ebwt.eh().print(cout);
501 	}
502 	if(sanityCheck) {
503 		// Try restoring the original string (if there were
504 		// multiple texts, what we'll get back is the joined,
505 		// padded string, not a list)
506 		ebwt.loadIntoMemory(
507 			0,
508 			reverse ? (refparams.reverse == REF_READ_REVERSE) : 0,
509 			true,  // load SA sample?
510 			true,  // load ftab?
511 			true,  // load rstarts?
512 			false,
513 			false);
514 		SString<char> s2;
515 		ebwt.restore(s2);
516 		ebwt.evictFromMemory();
517 		{
518 			SString<char> joinedss = Ebwt::join<SString<char> >(
519 				is,          // list of input streams
520 				szs,         // list of reference sizes
521 				(TIndexOffU)sztot.first, // total size of all unambiguous ref chars
522 				refparams,   // reference read-in parameters
523 				seed);       // pseudo-random number generator seed
524 			if(refparams.reverse == REF_READ_REVERSE) {
525 				joinedss.reverse();
526 			}
527 			assert_eq(joinedss.length(), s2.length());
528 			assert(sstr_eq(joinedss, s2));
529 		}
530 		if(verbose) {
531 			if(s2.length() < 1000) {
532 				cout << "Passed restore check: " << s2.toZBuf() << endl;
533 			} else {
534 				cout << "Passed restore check: (" << s2.length() << " chars)" << endl;
535 			}
536 		}
537 	}
538 
539         for (size_t i = 0; i < is.size(); ++i) {
540 		if (is[i] != NULL)
541 			// FileBuf object closes file when deconstructed
542 			delete is[i];
543         }
544 }
545 
546 static const char *argv0 = NULL;
547 
548 extern "C" {
549 /**
550  * main function.  Parses command-line arguments.
551  */
bowtie_build(int argc,const char ** argv)552 int bowtie_build(int argc, const char **argv) {
553 	string outfile;
554 	try {
555 		// Reset all global state, including getopt state
556 		opterr = optind = 1;
557 		resetOptions();
558 
559 		string infile;
560 		EList<string> infiles(MISC_CAT);
561 
562 		if(parseOptions(argc, argv)) {
563 			return 0;
564 		}
565 		argv0 = argv[0];
566 		if(showVersion) {
567 			cout << argv0 << " version " << string(BOWTIE2_VERSION).c_str() << endl;
568 			if(sizeof(void*) == 4) {
569 				cout << "32-bit" << endl;
570 			} else if(sizeof(void*) == 8) {
571 				cout << "64-bit" << endl;
572 			} else {
573 				cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl;
574 			}
575 			cout << "Built on " << BUILD_HOST << endl;
576 			cout << BUILD_TIME << endl;
577 			cout << "Compiler: " << COMPILER_VERSION << endl;
578 			cout << "Options: " << COMPILER_OPTIONS << endl;
579 			cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {"
580 				 << sizeof(int)
581 				 << ", " << sizeof(long) << ", " << sizeof(long long)
582 				 << ", " << sizeof(void *) << ", " << sizeof(size_t)
583 				 << ", " << sizeof(off_t) << "}" << endl;
584 			return 0;
585 		}
586 
587 		// Get input filename
588 		if(optind >= argc) {
589 			cerr << "No input sequence or sequence file specified!" << endl;
590 			printUsage(cerr);
591 			return 1;
592 		}
593 		infile = argv[optind++];
594 
595 		// Get output filename
596 		if(optind >= argc) {
597 			cerr << "No output file specified!" << endl;
598 			printUsage(cerr);
599 			return 1;
600 		}
601 		outfile = argv[optind++];
602 
603 		tokenize(infile, ",", infiles);
604 		if(infiles.size() < 1) {
605 			cerr << "Tokenized input file list was empty!" << endl;
606 			printUsage(cerr);
607 			return 1;
608 		}
609 
610 		// Optionally summarize
611 		if(verbose) {
612 			cout << "Settings:" << endl
613 				 << "  Output files: \"" << outfile.c_str() << ".*." + gEbwt_ext + "\"" << endl
614 				 << "  Line rate: " << lineRate << " (line is " << (1<<lineRate) << " bytes)" << endl
615 				 << "  Lines per side: " << linesPerSide << " (side is " << ((1<<lineRate)*linesPerSide) << " bytes)" << endl
616 				 << "  Offset rate: " << offRate << " (one in " << (1<<offRate) << ")" << endl
617 				 << "  FTable chars: " << ftabChars << endl
618 				 << "  Strings: " << (packed? "packed" : "unpacked") << endl
619 				 ;
620 			if(bmax == OFF_MASK) {
621 				cout << "  Max bucket size: default" << endl;
622 			} else {
623 				cout << "  Max bucket size: " << bmax << endl;
624 			}
625 			if(bmaxMultSqrt == OFF_MASK) {
626 				cout << "  Max bucket size, sqrt multiplier: default" << endl;
627 			} else {
628 				cout << "  Max bucket size, sqrt multiplier: " << bmaxMultSqrt << endl;
629 			}
630 			if(bmaxDivN == 0xffffffff) {
631 				cout << "  Max bucket size, len divisor: default" << endl;
632 			} else {
633 				cout << "  Max bucket size, len divisor: " << bmaxDivN << endl;
634 			}
635 			cout << "  Difference-cover sample period: " << dcv << endl;
636 			cout << "  Endianness: " << (bigEndian? "big":"little") << endl
637 				 << "  Actual local endianness: " << (currentlyBigEndian()? "big":"little") << endl
638 				 << "  Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl;
639 #ifdef NDEBUG
640 			cout << "  Assertions: disabled" << endl;
641 #else
642 			cout << "  Assertions: enabled" << endl;
643 #endif
644 			cout << "  Random seed: " << seed << endl;
645 			cout << "  Sizeofs: void*:" << sizeof(void*) << ", int:" << sizeof(int) << ", long:" << sizeof(long) << ", size_t:" << sizeof(size_t) << endl;
646 			cout << "Input files DNA, " << file_format_names[format].c_str() << ":" << endl;
647 			for(size_t i = 0; i < infiles.size(); i++) {
648 				cout << "  " << infiles[i].c_str() << endl;
649 			}
650 		}
651 		// Seed random number generator
652 		srand(seed);
653 		{
654 			Timer timer(cout, "Total time for call to driver() for forward index: ", verbose);
655 			if(!packed) {
656 				try {
657 					driver<SString<char> >(infile, infiles, outfile, false, REF_READ_FORWARD);
658 				} catch(bad_alloc& e) {
659 					if(autoMem) {
660 						cerr << "Switching to a packed string representation." << endl;
661 						packed = true;
662 					} else {
663 						throw e;
664 					}
665 				}
666 			}
667 			if(packed) {
668 				driver<S2bDnaString>(infile, infiles, outfile, true, REF_READ_FORWARD);
669 			}
670 		}
671 		int reverseType = reverseEach ? REF_READ_REVERSE_EACH : REF_READ_REVERSE;
672 		srand(seed);
673 		Timer timer(cout, "Total time for backward call to driver() for mirror index: ", verbose);
674 		if(!packed) {
675 			try {
676 				driver<SString<char> >(infile, infiles, outfile + ".rev", false, reverseType);
677 			} catch(bad_alloc& e) {
678 				if(autoMem) {
679 					cerr << "Switching to a packed string representation." << endl;
680 					packed = true;
681 				} else {
682 					throw e;
683 				}
684 			}
685 		}
686 		if(packed) {
687 			driver<S2bDnaString>(infile, infiles, outfile + ".rev", true, reverseType);
688 		}
689 		return 0;
690 	} catch(std::exception& e) {
691 		cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
692 		cerr << "Command: ";
693 		for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
694 		cerr << endl;
695 		deleteIdxFiles(outfile, writeRef || justRef, justRef);
696 		return 1;
697 	} catch(int e) {
698 		if(e != 0) {
699 			cerr << "Error: Encountered internal Bowtie 2 exception (#" << e << ")" << endl;
700 			cerr << "Command: ";
701 			for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
702 			cerr << endl;
703 		}
704 		deleteIdxFiles(outfile, writeRef || justRef, justRef);
705 		return e;
706 	}
707 }
708 }
709