1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <zlib.h>
21 #include <iostream>
22 #include <fstream>
23 #include <string>
24 #include <cassert>
25 #include <getopt.h>
26 #include "assert_helpers.h"
27 #include "endian_swap.h"
28 #include "bt2_idx.h"
29 #include "formats.h"
30 #include "sequence_io.h"
31 #include "tokenize.h"
32 #include "timer.h"
33 #include "ref_read.h"
34 #include "filebuf.h"
35 #include "reference.h"
36 #include "ds.h"
37 #ifdef WITH_ZSTD
38 #include "zstd_decompress.h"
39 #endif
40
41 /**
42 * \file Driver for the bowtie-build indexing tool.
43 */
44
45 // Build parameters
46 int verbose;
47 static int sanityCheck;
48 static int format;
49 static TIndexOffU bmax;
50 static TIndexOffU bmaxMultSqrt;
51 static uint32_t bmaxDivN;
52 static int dcv;
53 static int noDc;
54 static int entireSA;
55 static int seed;
56 static int showVersion;
57 // Ebwt parameters
58 static int32_t lineRate;
59 static int32_t linesPerSide;
60 static int32_t offRate;
61 static int32_t ftabChars;
62 static int bigEndian;
63 static bool nsToAs; // convert Ns to As
64 static bool doSaFile; // make a file with just the suffix array in it
65 static bool doBwtFile; // make a file with just the BWT string in it
66 static bool autoMem;
67 static bool packed;
68 static bool writeRef;
69 static bool justRef;
70 static bool reverseEach;
71 static int nthreads;
72 static string wrapper;
73
resetOptions()74 static void resetOptions() {
75 verbose = true; // be talkative (default)
76 sanityCheck = 0; // do slow sanity checks
77 format = FASTA; // input sequence format
78 bmax = OFF_MASK; // max blockwise SA bucket size
79 bmaxMultSqrt = OFF_MASK; // same, as multplier of sqrt(n)
80 bmaxDivN = 4; // same, as divisor of n
81 dcv = 1024; // bwise SA difference-cover sample sz
82 noDc = 0; // disable difference-cover sample
83 entireSA = 0; // 1 = disable blockwise SA
84 seed = 0; // srandom seed
85 showVersion = 0; // just print version and quit?
86 // Ebwt parameters
87 lineRate = Ebwt::default_lineRate; // a "line" is 64 or 128 bytes
88 linesPerSide = 1; // 1 64-byte line on a side
89 offRate = 4; // sample 1 out of 16 SA elts
90 ftabChars = 10; // 10 chars in initial lookup table
91 bigEndian = 0; // little endian
92 nsToAs = false; // convert reference Ns to As prior to indexing
93 doSaFile = false; // make a file with just the suffix array in it
94 doBwtFile = false; // make a file with just the BWT string in it
95 autoMem = true; // automatically adjust memory usage parameters
96 packed = false; //
97 writeRef = true; // write compact reference to .3.gEbwt_ext/.4.gEbwt_ext
98 justRef = false; // *just* write compact reference, don't index
99 reverseEach = false;
100 nthreads = 1;
101 wrapper.clear();
102 }
103
104 // Argument constants for getopts
105 enum {
106 ARG_BMAX = 256,
107 ARG_BMAX_MULT,
108 ARG_BMAX_DIV,
109 ARG_DCV,
110 ARG_SEED,
111 ARG_CUTOFF,
112 ARG_PMAP,
113 ARG_NTOA,
114 ARG_USAGE,
115 ARG_REVERSE_EACH,
116 ARG_SA,
117 ARG_THREADS,
118 ARG_WRAPPER
119 };
120
121 /**
122 * Print a detailed usage message to the provided output stream.
123 */
printUsage(ostream & out)124 static void printUsage(ostream& out) {
125 out << "Bowtie 2 version " << string(BOWTIE2_VERSION).c_str() << " by Ben Langmead (langmea@cs.jhu.edu, www.cs.jhu.edu/~langmea)" << endl;
126
127 #ifdef BOWTIE_64BIT_INDEX
128 string tool_name = "bowtie2-build-l";
129 #else
130 string tool_name = "bowtie2-build-s";
131 #endif
132 if(wrapper == "basic-0") {
133 tool_name = "bowtie2-build";
134 }
135
136 // 1 2 3 4 5 6 7 8
137 // 12345678901234567890123456789012345678901234567890123456789012345678901234567890
138 out << "Usage: " << tool_name << " [options]* <reference_in> <bt2_index_base>" << endl
139 << " reference_in comma-separated list of files with ref sequences" << endl
140 << " bt2_index_base write " + gEbwt_ext + " data to files with this dir/basename" << endl
141 << "*** Bowtie 2 indexes work only with v2 (not v1). Likewise for v1 indexes. ***" << endl
142 << "Options:" << endl
143 << " -f reference files are Fasta (default)" << endl
144 << " -c reference sequences given on cmd line (as" << endl
145 << " <reference_in>)" << endl;
146 if(wrapper == "basic-0") {
147 out << " --large-index force generated index to be 'large', even if ref" << endl
148 << " has fewer than 4 billion nucleotides" << endl
149 << " --debug use the debug binary; slower, assertions enabled" << endl
150 << " --sanitized use sanitized binary; slower, uses ASan and/or UBSan" << endl
151 << " --verbose log the issued command" << endl;
152 }
153 out << " -a/--noauto disable automatic -p/--bmax/--dcv memory-fitting" << endl
154 << " -p/--packed use packed strings internally; slower, less memory" << endl
155 << " --bmax <int> max bucket sz for blockwise suffix-array builder" << endl
156 << " --bmaxdivn <int> max bucket sz as divisor of ref len (default: 4)" << endl
157 << " --dcv <int> diff-cover period for blockwise (default: 1024)" << endl
158 << " --nodc disable diff-cover (algorithm becomes quadratic)" << endl
159 << " -r/--noref don't build .3/.4 index files" << endl
160 << " -3/--justref just build .3/.4 index files" << endl
161 << " -o/--offrate <int> SA is sampled every 2^<int> BWT chars (default: 5)" << endl
162 << " -t/--ftabchars <int> # of chars consumed in initial lookup (default: 10)" << endl
163 << " --threads <int> # of threads" << endl
164 //<< " --ntoa convert Ns in reference to As" << endl
165 //<< " --big --little endianness (default: little, this host: "
166 //<< (currentlyBigEndian()? "big":"little") << ")" << endl
167 << " --seed <int> seed for random number generator" << endl
168 << " -q/--quiet verbose output (for debugging)" << endl
169 << " -h/--help print detailed description of tool and its options" << endl
170 << " --usage print this usage message" << endl
171 << " --version print version information and quit" << endl
172 ;
173 if(wrapper.empty()) {
174 cerr << endl
175 << "*** Warning ***" << endl
176 << "'" << tool_name << "' was run directly. It is recommended "
177 << "that you run the wrapper script 'bowtie2-build' instead."
178 << endl << endl;
179 }
180 }
181
182 static const char *short_options = "qraph?nscfl:i:o:t:h:3C";
183
184 static struct option long_options[] = {
185 {(char*)"quiet", no_argument, 0, 'q'},
186 {(char*)"sanity", no_argument, 0, 's'},
187 {(char*)"packed", no_argument, 0, 'p'},
188 {(char*)"little", no_argument, &bigEndian, 0},
189 {(char*)"big", no_argument, &bigEndian, 1},
190 {(char*)"bmax", required_argument, 0, ARG_BMAX},
191 {(char*)"bmaxmultsqrt", required_argument, 0, ARG_BMAX_MULT},
192 {(char*)"bmaxdivn", required_argument, 0, ARG_BMAX_DIV},
193 {(char*)"dcv", required_argument, 0, ARG_DCV},
194 {(char*)"nodc", no_argument, &noDc, 1},
195 {(char*)"seed", required_argument, 0, ARG_SEED},
196 {(char*)"entiresa", no_argument, &entireSA, 1},
197 {(char*)"version", no_argument, &showVersion, 1},
198 {(char*)"noauto", no_argument, 0, 'a'},
199 {(char*)"noblocks", required_argument, 0, 'n'},
200 {(char*)"linerate", required_argument, 0, 'l'},
201 {(char*)"linesperside", required_argument, 0, 'i'},
202 {(char*)"offrate", required_argument, 0, 'o'},
203 {(char*)"ftabchars", required_argument, 0, 't'},
204 {(char*)"help", no_argument, 0, 'h'},
205 {(char*)"ntoa", no_argument, 0, ARG_NTOA},
206 {(char*)"justref", no_argument, 0, '3'},
207 {(char*)"noref", no_argument, 0, 'r'},
208 {(char*)"sa", no_argument, 0, ARG_SA},
209 {(char*)"reverse-each", no_argument, 0, ARG_REVERSE_EACH},
210 {(char*)"threads", required_argument, 0, ARG_THREADS},
211 {(char*)"usage", no_argument, 0, ARG_USAGE},
212 {(char*)"wrapper", required_argument, 0, ARG_WRAPPER},
213 {(char*)0, 0, 0, 0} // terminator
214 };
215
216 /**
217 * Parse an int out of optarg and enforce that it be at least 'lower';
218 * if it is less than 'lower', then output the given error message and
219 * exit with an error and a usage message.
220 */
221 template<typename T>
parseNumber(T lower,const char * errmsg)222 static T parseNumber(T lower, const char *errmsg) {
223 char *endPtr= NULL;
224 T t = (T)strtoll(optarg, &endPtr, 10);
225 if (endPtr != NULL) {
226 if (t < lower) {
227 cerr << errmsg << endl;
228 printUsage(cerr);
229 throw 1;
230 }
231 return t;
232 }
233 cerr << errmsg << endl;
234 printUsage(cerr);
235 throw 1;
236 return -1;
237 }
238
239 /**
240 * Read command-line arguments
241 */
parseOptions(int argc,const char ** argv)242 static bool parseOptions(int argc, const char **argv) {
243 int option_index = 0;
244 int next_option;
245 bool bmaxDivNSet = false;
246 bool abort = false;
247 do {
248 next_option = getopt_long(
249 argc, const_cast<char**>(argv),
250 short_options, long_options, &option_index);
251 switch (next_option) {
252 case ARG_WRAPPER:
253 wrapper = optarg;
254 break;
255 case 'f': format = FASTA; break;
256 case 'c': format = CMDLINE; break;
257 case 'p': packed = true; break;
258 case 'l':
259 lineRate = parseNumber<int>(3, "-l/--lineRate arg must be at least 3");
260 break;
261 case 'i':
262 linesPerSide = parseNumber<int>(1, "-i/--linesPerSide arg must be at least 1");
263 break;
264 case 'o':
265 offRate = parseNumber<int>(0, "-o/--offRate arg must be at least 0");
266 break;
267 case '3':
268 justRef = true;
269 break;
270 case 't':
271 ftabChars = parseNumber<int>(1, "-t/--ftabChars arg must be at least 1");
272 if (ftabChars > 16) {
273 std::cerr << "-t/--ftabChars arg must not exceed 16" << std::endl;
274 throw 1;
275 }
276 break;
277 case 'n':
278 // all f-s is used to mean "not set", so put 'e' on end
279 bmax = 0xfffffffe;
280 break;
281 case 'h':
282 case ARG_USAGE:
283 printUsage(cout);
284 abort = true;
285 break;
286 case ARG_BMAX:
287 bmax = parseNumber<TIndexOffU>(1, "--bmax arg must be at least 1");
288 bmaxMultSqrt = OFF_MASK; // don't use multSqrt
289 bmaxDivN = 0xffffffff; // don't use multSqrt
290 break;
291 case ARG_BMAX_MULT:
292 bmaxMultSqrt = parseNumber<TIndexOffU>(1, "--bmaxmultsqrt arg must be at least 1");
293 bmax = OFF_MASK; // don't use bmax
294 bmaxDivN = 0xffffffff; // don't use multSqrt
295 break;
296 case ARG_BMAX_DIV:
297 bmaxDivNSet = true;
298 bmaxDivN = parseNumber<uint32_t>(1, "--bmaxdivn arg must be at least 1");
299 bmax = OFF_MASK; // don't use bmax
300 bmaxMultSqrt = OFF_MASK; // don't use multSqrt
301 break;
302 case ARG_DCV:
303 dcv = parseNumber<int>(3, "--dcv arg must be at least 3");
304 break;
305 case ARG_SEED:
306 seed = parseNumber<int>(0, "--seed arg must be at least 0");
307 break;
308 case ARG_REVERSE_EACH:
309 reverseEach = true;
310 break;
311 case ARG_SA:
312 doSaFile = true;
313 break;
314 case ARG_NTOA: nsToAs = true; break;
315 case ARG_THREADS:
316 nthreads = parseNumber<int>(0, "--threads arg must be at least 1");
317 break;
318 case 'a': autoMem = false; break;
319 case 'q': verbose = false; break;
320 case 's': sanityCheck = true; break;
321 case 'r': writeRef = false; break;
322
323 case -1: /* Done with options. */
324 break;
325 case 0:
326 if (long_options[option_index].flag != 0)
327 break;
328 default:
329 printUsage(cerr);
330 throw 1;
331 }
332 } while(next_option != -1);
333 if(bmax < 40) {
334 cerr << "Warning: specified bmax is very small (" << bmax << "). This can lead to" << endl
335 << "extremely slow performance and memory exhaustion. Perhaps you meant to specify" << endl
336 << "a small --bmaxdivn?" << endl;
337 }
338 if (!bmaxDivNSet) {
339 bmaxDivN *= nthreads;
340 }
341 return abort;
342 }
343
344 EList<string> filesWritten;
345
346 /**
347 * Delete all the index files that we tried to create. For when we had to
348 * abort the index-building process due to an error.
349 */
deleteIdxFiles(const string & outfile,bool doRef,bool justRef)350 static void deleteIdxFiles(
351 const string& outfile,
352 bool doRef,
353 bool justRef)
354 {
355
356 for(size_t i = 0; i < filesWritten.size(); i++) {
357 cerr << "Deleting \"" << filesWritten[i].c_str()
358 << "\" file written during aborted indexing attempt." << endl;
359 remove(filesWritten[i].c_str());
360 }
361 }
362
363 /**
364 * Drive the index construction process and optionally sanity-check the
365 * result.
366 */
367 template<typename TStr>
driver(const string & infile,EList<string> & infiles,const string & outfile,bool packed,int reverse)368 static void driver(
369 const string& infile,
370 EList<string>& infiles,
371 const string& outfile,
372 bool packed,
373 int reverse)
374 {
375 EList<FileBuf*> is(MISC_CAT);
376 bool bisulfite = false;
377 RefReadInParams refparams(false, reverse, nsToAs, bisulfite);
378 assert_gt(infiles.size(), 0);
379 if(format == CMDLINE) {
380 // Adapt sequence strings to stringstreams open for input
381 stringstream *ss = new stringstream();
382 for(size_t i = 0; i < infiles.size(); i++) {
383 (*ss) << ">" << i << endl << infiles[i].c_str() << endl;
384 }
385 FileBuf *fb = new FileBuf(ss);
386 assert(fb != NULL);
387 assert(!fb->eof());
388 assert(fb->get() == '>');
389 ASSERT_ONLY(fb->reset());
390 assert(!fb->eof());
391 is.push_back(fb);
392 } else {
393 // Adapt sequence files to ifstreams
394 for(size_t i = 0; i < infiles.size(); i++) {
395 FileBuf *fb;
396
397 size_t idx = infiles[i].find_last_of(".");
398 std::string ext = (idx == std::string::npos) ? "" : infiles[i].substr(idx + 1);
399 if (ext == "" || ext == "gz" || ext == "Z") {
400 gzFile zFp = gzopen(infiles[i].c_str(), "rb");
401 if (zFp == NULL) {
402 cerr << "Error: could not open "<< infiles[i].c_str() << endl;
403 throw 1;
404 }
405 fb = new FileBuf(zFp);
406 #ifdef WITH_ZSTD
407 } else if (ext == "zstd" || ext == "zst") {
408 zstdStrm *zstdFp = zstdOpen(infiles[i].c_str());
409 if (zstdFp == NULL) {
410 cerr << "Error: could not open " << infiles[i].c_str() << endl;
411 throw 1;
412 }
413 fb = new FileBuf(zstdFp);
414 #endif
415 } else {
416 FILE *f = fopen(infiles[i].c_str(), "rb");
417 if (f == NULL) {
418 cerr << "Error: could not open "<< infiles[i].c_str() << endl;
419 throw 1;
420 }
421 fb = new FileBuf(f);
422 }
423 assert(fb != NULL);
424 if(fb->peek() == -1 || fb->eof()) {
425 cerr << "Warning: Empty fasta file: '" << infile.c_str() << "'" << endl;
426 continue;
427 }
428 assert(!fb->eof());
429 assert(fb->get() == '>');
430 ASSERT_ONLY(fb->reset());
431 assert(!fb->eof());
432 is.push_back(fb);
433 }
434 }
435 if(is.empty()) {
436 cerr << "Warning: All fasta inputs were empty" << endl;
437 throw 1;
438 }
439 if(!reverse) {
440 #ifdef BOWTIE_64BIT_INDEX
441 if (verbose) cerr << "Building a LARGE index" << endl;
442 #else
443 if (verbose) cerr << "Building a SMALL index" << endl;
444 #endif
445 }
446 // Vector for the ordered list of "records" comprising the input
447 // sequences. A record represents a stretch of unambiguous
448 // characters in one of the input sequences.
449 EList<RefRecord> szs(MISC_CAT);
450 std::pair<size_t, size_t> sztot;
451 {
452 if(verbose) cout << "Reading reference sizes" << endl;
453 Timer _t(cout, " Time reading reference sizes: ", verbose);
454 if(!reverse && (writeRef || justRef)) {
455 filesWritten.push_back(outfile + ".3." + gEbwt_ext);
456 filesWritten.push_back(outfile + ".4." + gEbwt_ext);
457 sztot = BitPairReference::szsFromFasta(is, outfile, bigEndian, refparams, szs, sanityCheck);
458 } else {
459 sztot = BitPairReference::szsFromFasta(is, string(), bigEndian, refparams, szs, sanityCheck);
460 }
461 }
462 if(justRef) return;
463 assert_gt(sztot.first, 0);
464 assert_gt(sztot.second, 0);
465 assert_gt(szs.size(), 0);
466 // Construct index from input strings and parameters
467 filesWritten.push_back(outfile + ".1." + gEbwt_ext);
468 filesWritten.push_back(outfile + ".2." + gEbwt_ext);
469 Ebwt ebwt(
470 TStr(),
471 packed,
472 0,
473 1, // TODO: maybe not?
474 lineRate,
475 offRate, // suffix-array sampling rate
476 ftabChars, // number of chars in initial arrow-pair calc
477 nthreads, // number of threads
478 outfile, // basename for .?.ebwt files
479 reverse == 0, // fw
480 !entireSA, // useBlockwise
481 bmax, // block size for blockwise SA builder
482 bmaxMultSqrt, // block size as multiplier of sqrt(len)
483 bmaxDivN, // block size as divisor of len
484 noDc? 0 : dcv,// difference-cover period
485 is, // list of input streams
486 szs, // list of reference sizes
487 (TIndexOffU)sztot.first, // total size of all unambiguous ref chars
488 refparams, // reference read-in parameters
489 seed, // pseudo-random number generator seed
490 -1, // override offRate
491 doSaFile, // make a file with just the suffix array in it
492 doBwtFile, // make a file with just the BWT string in it
493 verbose, // be talkative
494 autoMem, // pass exceptions up to the toplevel so that we can adjust memory settings automatically
495 sanityCheck); // verify results and internal consistency
496 // Note that the Ebwt is *not* resident in memory at this time. To
497 // load it into memory, call ebwt.loadIntoMemory()
498 if(verbose) {
499 // Print Ebwt's vital stats
500 ebwt.eh().print(cout);
501 }
502 if(sanityCheck) {
503 // Try restoring the original string (if there were
504 // multiple texts, what we'll get back is the joined,
505 // padded string, not a list)
506 ebwt.loadIntoMemory(
507 0,
508 reverse ? (refparams.reverse == REF_READ_REVERSE) : 0,
509 true, // load SA sample?
510 true, // load ftab?
511 true, // load rstarts?
512 false,
513 false);
514 SString<char> s2;
515 ebwt.restore(s2);
516 ebwt.evictFromMemory();
517 {
518 SString<char> joinedss = Ebwt::join<SString<char> >(
519 is, // list of input streams
520 szs, // list of reference sizes
521 (TIndexOffU)sztot.first, // total size of all unambiguous ref chars
522 refparams, // reference read-in parameters
523 seed); // pseudo-random number generator seed
524 if(refparams.reverse == REF_READ_REVERSE) {
525 joinedss.reverse();
526 }
527 assert_eq(joinedss.length(), s2.length());
528 assert(sstr_eq(joinedss, s2));
529 }
530 if(verbose) {
531 if(s2.length() < 1000) {
532 cout << "Passed restore check: " << s2.toZBuf() << endl;
533 } else {
534 cout << "Passed restore check: (" << s2.length() << " chars)" << endl;
535 }
536 }
537 }
538
539 for (size_t i = 0; i < is.size(); ++i) {
540 if (is[i] != NULL)
541 // FileBuf object closes file when deconstructed
542 delete is[i];
543 }
544 }
545
546 static const char *argv0 = NULL;
547
548 extern "C" {
549 /**
550 * main function. Parses command-line arguments.
551 */
bowtie_build(int argc,const char ** argv)552 int bowtie_build(int argc, const char **argv) {
553 string outfile;
554 try {
555 // Reset all global state, including getopt state
556 opterr = optind = 1;
557 resetOptions();
558
559 string infile;
560 EList<string> infiles(MISC_CAT);
561
562 if(parseOptions(argc, argv)) {
563 return 0;
564 }
565 argv0 = argv[0];
566 if(showVersion) {
567 cout << argv0 << " version " << string(BOWTIE2_VERSION).c_str() << endl;
568 if(sizeof(void*) == 4) {
569 cout << "32-bit" << endl;
570 } else if(sizeof(void*) == 8) {
571 cout << "64-bit" << endl;
572 } else {
573 cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl;
574 }
575 cout << "Built on " << BUILD_HOST << endl;
576 cout << BUILD_TIME << endl;
577 cout << "Compiler: " << COMPILER_VERSION << endl;
578 cout << "Options: " << COMPILER_OPTIONS << endl;
579 cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {"
580 << sizeof(int)
581 << ", " << sizeof(long) << ", " << sizeof(long long)
582 << ", " << sizeof(void *) << ", " << sizeof(size_t)
583 << ", " << sizeof(off_t) << "}" << endl;
584 return 0;
585 }
586
587 // Get input filename
588 if(optind >= argc) {
589 cerr << "No input sequence or sequence file specified!" << endl;
590 printUsage(cerr);
591 return 1;
592 }
593 infile = argv[optind++];
594
595 // Get output filename
596 if(optind >= argc) {
597 cerr << "No output file specified!" << endl;
598 printUsage(cerr);
599 return 1;
600 }
601 outfile = argv[optind++];
602
603 tokenize(infile, ",", infiles);
604 if(infiles.size() < 1) {
605 cerr << "Tokenized input file list was empty!" << endl;
606 printUsage(cerr);
607 return 1;
608 }
609
610 // Optionally summarize
611 if(verbose) {
612 cout << "Settings:" << endl
613 << " Output files: \"" << outfile.c_str() << ".*." + gEbwt_ext + "\"" << endl
614 << " Line rate: " << lineRate << " (line is " << (1<<lineRate) << " bytes)" << endl
615 << " Lines per side: " << linesPerSide << " (side is " << ((1<<lineRate)*linesPerSide) << " bytes)" << endl
616 << " Offset rate: " << offRate << " (one in " << (1<<offRate) << ")" << endl
617 << " FTable chars: " << ftabChars << endl
618 << " Strings: " << (packed? "packed" : "unpacked") << endl
619 ;
620 if(bmax == OFF_MASK) {
621 cout << " Max bucket size: default" << endl;
622 } else {
623 cout << " Max bucket size: " << bmax << endl;
624 }
625 if(bmaxMultSqrt == OFF_MASK) {
626 cout << " Max bucket size, sqrt multiplier: default" << endl;
627 } else {
628 cout << " Max bucket size, sqrt multiplier: " << bmaxMultSqrt << endl;
629 }
630 if(bmaxDivN == 0xffffffff) {
631 cout << " Max bucket size, len divisor: default" << endl;
632 } else {
633 cout << " Max bucket size, len divisor: " << bmaxDivN << endl;
634 }
635 cout << " Difference-cover sample period: " << dcv << endl;
636 cout << " Endianness: " << (bigEndian? "big":"little") << endl
637 << " Actual local endianness: " << (currentlyBigEndian()? "big":"little") << endl
638 << " Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl;
639 #ifdef NDEBUG
640 cout << " Assertions: disabled" << endl;
641 #else
642 cout << " Assertions: enabled" << endl;
643 #endif
644 cout << " Random seed: " << seed << endl;
645 cout << " Sizeofs: void*:" << sizeof(void*) << ", int:" << sizeof(int) << ", long:" << sizeof(long) << ", size_t:" << sizeof(size_t) << endl;
646 cout << "Input files DNA, " << file_format_names[format].c_str() << ":" << endl;
647 for(size_t i = 0; i < infiles.size(); i++) {
648 cout << " " << infiles[i].c_str() << endl;
649 }
650 }
651 // Seed random number generator
652 srand(seed);
653 {
654 Timer timer(cout, "Total time for call to driver() for forward index: ", verbose);
655 if(!packed) {
656 try {
657 driver<SString<char> >(infile, infiles, outfile, false, REF_READ_FORWARD);
658 } catch(bad_alloc& e) {
659 if(autoMem) {
660 cerr << "Switching to a packed string representation." << endl;
661 packed = true;
662 } else {
663 throw e;
664 }
665 }
666 }
667 if(packed) {
668 driver<S2bDnaString>(infile, infiles, outfile, true, REF_READ_FORWARD);
669 }
670 }
671 int reverseType = reverseEach ? REF_READ_REVERSE_EACH : REF_READ_REVERSE;
672 srand(seed);
673 Timer timer(cout, "Total time for backward call to driver() for mirror index: ", verbose);
674 if(!packed) {
675 try {
676 driver<SString<char> >(infile, infiles, outfile + ".rev", false, reverseType);
677 } catch(bad_alloc& e) {
678 if(autoMem) {
679 cerr << "Switching to a packed string representation." << endl;
680 packed = true;
681 } else {
682 throw e;
683 }
684 }
685 }
686 if(packed) {
687 driver<S2bDnaString>(infile, infiles, outfile + ".rev", true, reverseType);
688 }
689 return 0;
690 } catch(std::exception& e) {
691 cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
692 cerr << "Command: ";
693 for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
694 cerr << endl;
695 deleteIdxFiles(outfile, writeRef || justRef, justRef);
696 return 1;
697 } catch(int e) {
698 if(e != 0) {
699 cerr << "Error: Encountered internal Bowtie 2 exception (#" << e << ")" << endl;
700 cerr << "Command: ";
701 for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
702 cerr << endl;
703 }
704 deleteIdxFiles(outfile, writeRef || justRef, justRef);
705 return e;
706 }
707 }
708 }
709