1 /*
2 * Copyright 2013, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <getopt.h>
21 #include "assert_helpers.h"
22 #include "ds.h"
23 #include "simple_func.h"
24 #include "aligner_seed_policy.h"
25 #include "scoring.h"
26 #include "opts.h"
27 #include "aligner_sw.h"
28
29 using namespace std;
30
31 int gVerbose; // be talkative
32 int gQuiet; // print nothing but the alignments
33 static int sanityCheck; // enable expensive sanity checks
34 static int seed; // srandom() seed
35 static bool showVersion; // just print version and quit?
36 static uint64_t qUpto; // max # of queries to read
37 static int nthreads; // number of pthreads operating concurrently
38 static uint32_t skipReads; // # reads/read pairs to skip
39 int gGapBarrier; // # diags on top/bot only to be entered diagonally
40 static int bonusMatchType; // how to reward matches
41 static int bonusMatch; // constant reward if bonusMatchType=constant
42 static int penMmcType; // how to penalize mismatches
43 static int penMmcMax; // max mm penalty
44 static int penMmcMin; // min mm penalty
45 static int penNType; // how to penalize Ns in the read
46 static int penN; // constant if N pelanty is a constant
47 static bool penNCatPair; // concatenate mates before N filtering?
48 static bool localAlign; // do local alignment in DP steps
49 static int penRdGapConst; // constant cost of extending a gap in the read
50 static int penRfGapConst; // constant cost of extending a gap in the reference
51 static int penRdGapLinear; // coeff of linear term for cost of gap extension in read
52 static int penRfGapLinear; // coeff of linear term for cost of gap extension in ref
53 static SimpleFunc scoreMin; // minimum valid score as function of read len
54 static SimpleFunc nCeil; // max # Ns allowed as function of read len
55 static SimpleFunc msIval; // interval between seeds as function of read len
56 static bool enable8; // use 8-bit SSE where possible?
57 static size_t cminlen; // longer reads use checkpointing
58 static size_t cpow2; // checkpoint interval log2
59 static bool doTri; // do triangular mini-fills?
60 static bool ignoreQuals; // all mms incur same penalty, regardless of qual
61 static EList<string> queries; // list of query files
62 static string outfile; // write output to this file
63
resetOptions()64 static void resetOptions() {
65 gVerbose = 0;
66 gQuiet = false;
67 sanityCheck = 0; // enable expensive sanity checks
68 seed = 0; // srandom() seed
69 showVersion = false; // just print version and quit?
70 qUpto = 0xffffffffffffffff; // max # of queries to read
71 nthreads = 1; // number of pthreads operating concurrently
72 skipReads = 0; // # reads/read pairs to skip
73 gGapBarrier = 4; // disallow gaps within this many chars of either end of alignment
74 bonusMatchType = DEFAULT_MATCH_BONUS_TYPE;
75 bonusMatch = DEFAULT_MATCH_BONUS;
76 penMmcType = DEFAULT_MM_PENALTY_TYPE;
77 penMmcMax = DEFAULT_MM_PENALTY_MAX;
78 penMmcMin = DEFAULT_MM_PENALTY_MIN;
79 penNType = DEFAULT_N_PENALTY_TYPE;
80 penN = DEFAULT_N_PENALTY;
81 penNCatPair = DEFAULT_N_CAT_PAIR; // concatenate mates before N filtering?
82 localAlign = false; // do local alignment in DP steps
83 penRdGapConst = DEFAULT_READ_GAP_CONST;
84 penRfGapConst = DEFAULT_REF_GAP_CONST;
85 penRdGapLinear = DEFAULT_READ_GAP_LINEAR;
86 penRfGapLinear = DEFAULT_REF_GAP_LINEAR;
87 scoreMin.init (SIMPLE_FUNC_LINEAR, DEFAULT_MIN_CONST, DEFAULT_MIN_LINEAR);
88 nCeil.init (SIMPLE_FUNC_LINEAR, 0.0f, std::numeric_limits<double>::max(), 2.0f, 0.1f);
89 msIval.init (SIMPLE_FUNC_LINEAR, 1.0f, std::numeric_limits<double>::max(), DEFAULT_IVAL_B, DEFAULT_IVAL_A);
90 enable8 = true; // use 8-bit SSE where possible?
91 cminlen = 2000; // longer reads use checkpointing
92 cpow2 = 4; // checkpoint interval log2
93 doTri = false; // do triangular mini-fills?
94 ignoreQuals = false; // all mms incur same penalty, regardless of qual
95 queries.clear(); // list of query files
96 outfile.clear(); // write output to this file
97 }
98
99 static const char *short_options = "u:hp:P:S:";
100
101 static struct option long_options[] = {
102 {(char*)"verbose", no_argument, 0, ARG_VERBOSE},
103 {(char*)"quiet", no_argument, 0, ARG_QUIET},
104 {(char*)"sanity", no_argument, 0, ARG_SANITY},
105 {(char*)"qupto", required_argument, 0, 'u'},
106 {(char*)"upto", required_argument, 0, 'u'},
107 {(char*)"version", no_argument, 0, ARG_VERSION},
108 {(char*)"help", no_argument, 0, 'h'},
109 {(char*)"threads", required_argument, 0, 'p'},
110 {(char*)"usage", no_argument, 0, ARG_USAGE},
111 {(char*)"gbar", required_argument, 0, ARG_GAP_BAR},
112 {(char*)"policy", required_argument, 0, ARG_ALIGN_POLICY},
113 {(char*)"454", no_argument, 0, ARG_NOISY_HPOLY},
114 {(char*)"ion-torrent", no_argument, 0, ARG_NOISY_HPOLY},
115 {(char*)"local", no_argument, 0, ARG_LOCAL},
116 {(char*)"end-to-end", no_argument, 0, ARG_END_TO_END},
117 {(char*)"sse8", no_argument, 0, ARG_SSE8},
118 {(char*)"no-sse8", no_argument, 0, ARG_SSE8_NO},
119 {(char*)"ma", required_argument, 0, ARG_SCORE_MA},
120 {(char*)"mp", required_argument, 0, ARG_SCORE_MMP},
121 {(char*)"np", required_argument, 0, ARG_SCORE_NP},
122 {(char*)"rdg", required_argument, 0, ARG_SCORE_RDG},
123 {(char*)"rfg", required_argument, 0, ARG_SCORE_RFG},
124 {(char*)"score-min", required_argument, 0, ARG_SCORE_MIN},
125 {(char*)"min-score", required_argument, 0, ARG_SCORE_MIN},
126 {(char*)"n-ceil", required_argument, 0, ARG_N_CEIL},
127 {(char*)"ignore-quals", no_argument, 0, ARG_IGNORE_QUALS},
128 {(char*)"output", required_argument, 0, 'S'},
129 {(char*)"cp-min", required_argument, 0, ARG_CP_MIN},
130 {(char*)"cp-ival", required_argument, 0, ARG_CP_IVAL},
131 {(char*)"tri", no_argument, 0, ARG_TRI},
132 {(char*)0, 0, 0, 0} // terminator
133 };
134
135 /**
136 * Print a summary usage message to the provided output stream.
137 */
printUsage(ostream & out)138 static void printUsage(ostream& out) {
139 out << "Bowtie 2 dynamic programming engine, by Ben Langmead (langmea@cs.jhu.edu, www.cs.jhu.edu/~langmea)" << endl;
140 string tool_name = "bowtie2-dp";
141 out << "Usage: " << endl
142 << " " << tool_name.c_str() << " [options]* <in> <out>" << endl
143 << endl
144 << " <in> File with DP input problems (default: stdin)" << endl
145 << " <out> File with DP output solutions (default: stdout)" << endl
146 << endl
147 << "Options (defaults in parentheses):" << endl
148 << endl
149 << " Input:" << endl
150 << " -s/--skip <int> skip the first <int> problems in the input (none)" << endl
151 << " -u/--upto <int> stop after first <int> problems (no limit)" << endl
152 << endl
153 << " Alignment:" << endl
154 << " --n-ceil <func> func for max # non-A/C/G/Ts permitted in aln (L,0,0.15)" << endl
155 << " --gbar <int> disallow gaps within <int> nucs of read extremes (4)" << endl
156 << " --ignore-quals treat all quality values as 30 on Phred scale (off)" << endl
157 << endl
158 << " --end-to-end entire read must align; no clipping (on)" << endl
159 << " OR" << endl
160 << " --local local alignment; ends might be soft clipped (off)" << endl
161 << endl
162 << " Scoring:" << endl
163 << " --ma <int> match bonus (0 for --end-to-end, 2 for --local) " << endl
164 << " --mp <int> max penalty for mismatch; lower qual = lower penalty (6)" << endl
165 << " --np <int> penalty for non-A/C/G/Ts in read/ref (1)" << endl
166 << " --rdg <int>,<int> read gap open, extend penalties (5,3)" << endl
167 << " --rfg <int>,<int> reference gap open, extend penalties (5,3)" << endl
168 << " --score-min <func> min acceptable alignment score w/r/t read length" << endl
169 << " (G,20,8 for local, L,-0.6,-0.6 for end-to-end)" << endl
170 << " --quiet print nothing to stderr except serious errors" << endl
171 << endl
172 << " Performance:" << endl
173 << " -p/--threads <int> number of alignment threads to launch (1)" << endl
174 << endl
175 << " Other:" << endl
176 << " --version print version information and quit" << endl
177 << " -h/--help print this usage message" << endl
178 ;
179 }
180
181 /**
182 * Parse an int out of optarg and enforce that it be at least 'lower';
183 * if it is less than 'lower', than output the given error message and
184 * exit with an error and a usage message.
185 */
parseInt(int lower,int upper,const char * errmsg,const char * arg)186 static int parseInt(int lower, int upper, const char *errmsg, const char *arg) {
187 long l;
188 char *endPtr= NULL;
189 l = strtol(arg, &endPtr, 10);
190 if (endPtr != NULL) {
191 if (l < lower || l > upper) {
192 cerr << errmsg << endl;
193 printUsage(cerr);
194 throw 1;
195 }
196 return (int32_t)l;
197 }
198 cerr << errmsg << endl;
199 printUsage(cerr);
200 throw 1;
201 return -1;
202 }
203
204 /**
205 * Upper is maximum int by default.
206 */
parseInt(int lower,const char * errmsg,const char * arg)207 static int parseInt(int lower, const char *errmsg, const char *arg) {
208 return parseInt(lower, std::numeric_limits<int>::max(), errmsg, arg);
209 }
210
211 /**
212 * Parse a T string 'str'.
213 */
214 template<typename T>
parse(const char * s)215 T parse(const char *s) {
216 T tmp;
217 stringstream ss(s);
218 ss >> tmp;
219 return tmp;
220 }
221
parseFuncType(const std::string & otype)222 static int parseFuncType(const std::string& otype) {
223 string type = otype;
224 if(type == "C" || type == "Constant") {
225 return SIMPLE_FUNC_CONST;
226 } else if(type == "L" || type == "Linear") {
227 return SIMPLE_FUNC_LINEAR;
228 } else if(type == "S" || type == "Sqrt") {
229 return SIMPLE_FUNC_SQRT;
230 } else if(type == "G" || type == "Log") {
231 return SIMPLE_FUNC_LOG;
232 }
233 std::cerr << "Error: Bad function type '" << otype.c_str()
234 << "'. Should be C (constant), L (linear), "
235 << "S (square root) or G (natural log)." << std::endl;
236 throw 1;
237 }
238
239 #define PARSE_FUNC(fv) { \
240 if(args.size() >= 1) { \
241 fv.setType(parseFuncType(args[0])); \
242 } \
243 if(args.size() >= 2) { \
244 double co; \
245 istringstream tmpss(args[1]); \
246 tmpss >> co; \
247 fv.setConst(co); \
248 } \
249 if(args.size() >= 3) { \
250 double ce; \
251 istringstream tmpss(args[2]); \
252 tmpss >> ce; \
253 fv.setCoeff(ce); \
254 } \
255 if(args.size() >= 4) { \
256 double mn; \
257 istringstream tmpss(args[3]); \
258 tmpss >> mn; \
259 fv.setMin(mn); \
260 } \
261 if(args.size() >= 5) { \
262 double mx; \
263 istringstream tmpss(args[4]); \
264 tmpss >> mx; \
265 fv.setMin(mx); \
266 } \
267 }
268
269 /**
270 * TODO: Argument parsing is very, very flawed. The biggest problem is that
271 * there are two separate worlds of arguments, the ones set via polstr, and
272 * the ones set directly in variables. This makes for nasty interactions,
273 * e.g., with the -M option being resolved at an awkward time relative to
274 * the -k and -a options.
275 */
parseOption(int next_option,const char * arg)276 static void parseOption(int next_option, const char *arg) {
277 switch (next_option) {
278 case 's':
279 skipReads = (uint32_t)parseInt(0, "-s arg must be positive", arg);
280 break;
281 case ARG_GAP_BAR:
282 gGapBarrier = parseInt(1, "--gbar must be no less than 1", arg);
283 break;
284 case 'u':
285 qUpto = (uint32_t)parseInt(1, "-u/--qupto arg must be at least 1", arg);
286 break;
287 case 'p':
288 nthreads = parseInt(1, "-p/--threads arg must be at least 1", arg);
289 break;
290 case 'h': printUsage(cout); throw 0; break;
291 case ARG_USAGE: printUsage(cout); throw 0; break;
292 case ARG_VERBOSE: gVerbose = 1; break;
293 case ARG_QUIET: gQuiet = true; break;
294 case ARG_SANITY: sanityCheck = true; break;
295 case ARG_CP_MIN:
296 cminlen = parse<size_t>(arg);
297 break;
298 case ARG_CP_IVAL:
299 cpow2 = parse<size_t>(arg);
300 break;
301 case ARG_TRI:
302 doTri = true;
303 break;
304 case ARG_LOCAL: localAlign = true; break;
305 case ARG_END_TO_END: localAlign = false; break;
306 case ARG_SSE8: enable8 = true; break;
307 case ARG_SSE8_NO: enable8 = false; break;
308 case ARG_IGNORE_QUALS: ignoreQuals = true; break;
309 case ARG_N_CEIL: {
310 // Split argument by comma
311 EList<string> args;
312 tokenize(arg, ",", args);
313 if(args.size() > 3) {
314 cerr << "Error: expected 3 or fewer comma-separated "
315 << "arguments to --n-ceil option, got "
316 << args.size() << endl;
317 throw 1;
318 }
319 if(args.size() == 0) {
320 cerr << "Error: expected at least one argument to --n-ceil option" << endl;
321 throw 1;
322 }
323 PARSE_FUNC(nCeil);
324 break;
325 }
326 case ARG_SCORE_MA: {
327 // Split argument by comma
328 EList<string> args;
329 tokenize(arg, ",", args);
330 if(args.size() != 1) {
331 cerr << "Error parsing --ma; RHS must have 1 token" << endl;
332 assert(false); throw 1;
333 }
334 string tmp = args[0];
335 istringstream tmpss(tmp);
336 tmpss >> bonusMatch;
337 break;
338 }
339 case ARG_SCORE_MMP: {
340 // Split argument by comma
341 EList<string> args;
342 tokenize(arg, ",", args);
343 if(args.size() > 3) {
344 cerr << "Error parsing --mmp "
345 << "; RHS must have at most 3 tokens" << endl;
346 assert(false); throw 1;
347 }
348 if(args[0][0] == 'C') {
349 string tmp = args[0].substr(1);
350 // Parse constant penalty
351 istringstream tmpss(tmp);
352 tmpss >> penMmcMax;
353 penMmcMin = penMmcMax;
354 // Parse constant penalty
355 penMmcType = COST_MODEL_CONSTANT;
356 } else if(args[0][0] == 'Q') {
357 if(args.size() >= 2) {
358 string tmp = args[1];
359 istringstream tmpss(tmp);
360 tmpss >> penMmcMax;
361 } else {
362 penMmcMax = DEFAULT_MM_PENALTY_MAX;
363 }
364 if(args.size() >= 3) {
365 string tmp = args[2];
366 istringstream tmpss(tmp);
367 tmpss >> penMmcMin;
368 } else {
369 penMmcMin = DEFAULT_MM_PENALTY_MIN;
370 }
371 if(penMmcMin > penMmcMax) {
372 cerr << "Error: Maximum mismatch penalty (" << penMmcMax
373 << ") is less than minimum penalty (" << penMmcMin
374 << endl;
375 throw 1;
376 }
377 // Set type to =quality
378 penMmcType = COST_MODEL_QUAL;
379 } else if(args[0][0] == 'R') {
380 // Set type to=Maq-quality
381 penMmcType = COST_MODEL_ROUNDED_QUAL;
382 } else {
383 cerr << "Error parsing --mmp "
384 << "; RHS must start with C, Q or R" << endl;
385 assert(false); throw 1;
386 }
387 break;
388 }
389 case ARG_SCORE_NP: {
390 // Split argument by comma
391 EList<string> args;
392 tokenize(arg, ",", args);
393 if(args.size() != 1) {
394 cerr << "Error parsing --np "
395 << "; RHS must have 1 token" << endl;
396 assert(false); throw 1;
397 }
398 if(args[0][0] == 'C') {
399 string tmp = args[0].substr(1);
400 // Parse constant penalty
401 istringstream tmpss(tmp);
402 tmpss >> penN;
403 // Parse constant penalty
404 penNType = COST_MODEL_CONSTANT;
405 } else if(args[0][0] == 'Q') {
406 // Set type to =quality
407 penNType = COST_MODEL_QUAL;
408 } else if(args[0][0] == 'R') {
409 // Set type to=Maq-quality
410 penNType = COST_MODEL_ROUNDED_QUAL;
411 } else {
412 cerr << "Error parsing --np "
413 << "; RHS must start with C, Q or R" << endl;
414 assert(false); throw 1;
415 }
416 }
417 case ARG_SCORE_RDG: {
418 EList<string> args;
419 tokenize(arg, ",", args);
420 if(args.size() >= 1) {
421 istringstream tmpss(args[0]);
422 tmpss >> penRdGapConst;
423 } else {
424 penRdGapConst = DEFAULT_READ_GAP_CONST;
425 }
426 if(args.size() >= 2) {
427 istringstream tmpss(args[1]);
428 tmpss >> penRdGapLinear;
429 } else {
430 penRdGapLinear = DEFAULT_READ_GAP_LINEAR;
431 }
432 }
433 case ARG_SCORE_RFG: {
434 EList<string> args;
435 tokenize(arg, ",", args);
436 if(args.size() >= 1) {
437 istringstream tmpss(args[0]);
438 tmpss >> penRfGapConst;
439 } else {
440 penRfGapConst = DEFAULT_REF_GAP_CONST;
441 }
442 if(args.size() >= 2) {
443 istringstream tmpss(args[1]);
444 tmpss >> penRfGapLinear;
445 } else {
446 penRfGapLinear = DEFAULT_REF_GAP_LINEAR;
447 }
448 }
449 case ARG_SCORE_MIN: {
450 EList<string> args;
451 tokenize(arg, ",", args);
452 if(args.size() > 3 && args.size() == 0) {
453 cerr << "Error: expected 3 or fewer comma-separated "
454 << "arguments to --n-ceil option, got "
455 << args.size() << endl;
456 throw 1;
457 }
458 PARSE_FUNC(scoreMin);
459 break;
460 }
461 case 'S': outfile = arg; break;
462 case 'U': {
463 EList<string> args;
464 tokenize(arg, ",", args);
465 for(size_t i = 0; i < args.size(); i++) {
466 queries.push_back(args[i]);
467 }
468 break;
469 }
470 case ARG_VERSION: showVersion = 1; break;
471 default:
472 printUsage(cerr);
473 throw 1;
474 }
475 }
476
477 /**
478 * Read command-line arguments
479 */
parseOptions(int argc,const char ** argv)480 static void parseOptions(int argc, const char **argv) {
481 int option_index = 0;
482 int next_option;
483 while(true) {
484 next_option = getopt_long(
485 argc, const_cast<char**>(argv),
486 short_options, long_options, &option_index);
487 const char * arg = optarg;
488 if(next_option == EOF) {
489 break;
490 }
491 parseOption(next_option, arg);
492 }
493 // If both -s and -u are used, we need to adjust qUpto accordingly
494 // since it uses rdid to know if we've reached the -u limit (and
495 // rdids are all shifted up by skipReads characters)
496 if(qUpto + skipReads > qUpto) {
497 qUpto += skipReads;
498 }
499 if(gGapBarrier < 1) {
500 cerr << "Warning: --gbar was set less than 1 (=" << gGapBarrier
501 << "); setting to 1 instead" << endl;
502 gGapBarrier = 1;
503 }
504 #ifndef NDEBUG
505 if(!gQuiet) {
506 cerr << "Warning: Running in debug mode. Please use debug mode only "
507 << "for diagnosing errors, and not for typical use of Bowtie 2."
508 << endl;
509 }
510 #endif
511 }
512
513 struct DpProblem {
resetDpProblem514 void reset() {
515 ref.clear();
516 }
517
518 TRefId refidx;
519 TRefOff reflen;
520 TAlScore minsc;
521 BTString ref;
522 bool fw;
523 DPRect rect;
524 bool aligned;
525 TAlScore score;
526 };
527
528 class DpLogReader {
529
530 public:
531
DpLogReader()532 DpLogReader() { }
533
~DpLogReader()534 ~DpLogReader() { reset(); }
535
init(const string & fn)536 void init(const string& fn) {
537 reset();
538 fn_ = fn;
539 ih_.open(fn_.c_str());
540 ih_.sync_with_stdio(false);
541 }
542
reset()543 void reset() {
544 if(ih_.is_open()) {
545 ih_.close();
546 }
547 }
548
nextRead(BTDnaString & seq,BTString & qual,EList<DpProblem> & refs)549 bool nextRead(
550 BTDnaString& seq,
551 BTString& qual,
552 EList<DpProblem>& refs)
553 {
554 if(done()) {
555 return false;
556 }
557 ln_.clear();
558 getline(ih_, ln_);
559 while(ln_.empty() && ih_.good()) {
560 getline(ih_, ln_);
561 }
562 if(ln_.empty() && !ih_.good()) {
563 return false;
564 }
565 EList<string> buf;
566 tokenize(ln_, '\t', buf);
567 assert_gt(buf.size(), 2);
568 seq.install(buf[0].c_str(), true);
569 qual = buf[1];
570 for(size_t i = 2; i < buf.size(); i++) {
571 refs.expand();
572 istringstream is(buf[i]);
573 char comma, tmp;
574 // ref idx
575 is >> refs.back().refidx;
576 is >> comma; assert_eq(',', comma);
577 // ref length
578 is >> refs.back().reflen;
579 is >> comma; assert_eq(',', comma);
580 // minimum score
581 is >> refs.back().minsc;
582 is >> comma; assert_eq(',', comma);
583 // read orientation
584 is >> tmp;
585 assert(tmp == '-' || tmp == '+');
586 refs.back().fw = (tmp == '+');
587 is >> comma; assert_eq(',', comma);
588 // DP rectangle
589 is >> refs.back().rect.refl;
590 is >> comma; assert_eq(',', comma);
591 is >> refs.back().rect.refr;
592 is >> comma; assert_eq(',', comma);
593 is >> refs.back().rect.refl_pretrim;
594 is >> comma; assert_eq(',', comma);
595 is >> refs.back().rect.refr_pretrim;
596 is >> comma; assert_eq(',', comma);
597 is >> refs.back().rect.triml;
598 is >> comma; assert_eq(',', comma);
599 is >> refs.back().rect.trimr;
600 is >> comma; assert_eq(',', comma);
601 is >> refs.back().rect.corel;
602 is >> comma; assert_eq(',', comma);
603 is >> refs.back().rect.corer;
604 is >> comma; assert_eq(',', comma);
605 is >> refs.back().rect.maxgap;
606 is >> comma; assert_eq(',', comma);
607 // reference string
608 string ref;
609 while(true) {
610 char c;
611 is >> c;
612 if(c == ',') break;
613 ref.push_back(c);
614 }
615 refs.back().ref.install(ref.c_str());
616 for(size_t i = 0; i < ref.length(); i++) {
617 int m = asc2dnamask[(int)refs.back().ref[i]];
618 if(m == 15) {
619 m = 16; // N
620 }
621 refs.back().ref.set(m, i);
622 }
623 // whether the DP alignment was successful
624 int aligned;
625 is >> aligned;
626 refs.back().aligned = (aligned == 1);
627 // alignment score
628 is >> comma; assert_eq(',', comma);
629 is >> refs.back().score;
630 }
631 return true;
632 }
633
done() const634 bool done() const {
635 return !ih_.good();
636 }
637
638 protected:
639
640 string fn_; // file name
641 ifstream ih_; // file handle
642 string ln_; // line buffer
643 };
644
main(int argc,const char ** argv)645 int main(int argc, const char **argv) {
646 try {
647 // Reset all global state, including getopt state
648 opterr = optind = 1;
649 resetOptions();
650 parseOptions(argc, argv);
651 if(showVersion) {
652 if(sizeof(void*) == 4) {
653 cout << "32-bit" << endl;
654 } else if(sizeof(void*) == 8) {
655 cout << "64-bit" << endl;
656 } else {
657 cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl;
658 }
659 cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {"
660 << sizeof(int)
661 << ", " << sizeof(long) << ", " << sizeof(long long)
662 << ", " << sizeof(void *) << ", " << sizeof(size_t)
663 << ", " << sizeof(off_t) << "}" << endl;
664 return 0;
665 }
666 while(optind < argc) {
667 queries.push_back(argv[optind++]);
668 }
669 {
670 // Optionally summarize
671 if(gVerbose) {
672 cout << "DP inputs:" << endl;
673 for(size_t i = 0; i < queries.size(); i++) {
674 cout << " " << queries[i].c_str() << endl;
675 }
676 cout << "Output file: \"" << outfile.c_str() << "\"" << endl;
677 cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl;
678 #ifdef NDEBUG
679 cout << "Assertions: disabled" << endl;
680 #else
681 cout << "Assertions: enabled" << endl;
682 #endif
683 }
684 }
685 // Do stuff
686 SwAligner sw(NULL);
687 DpLogReader logrd;
688 Scoring sc(
689 bonusMatch, // constant reward for match
690 penMmcType, // how to penalize mismatches
691 penMmcMax, // max mm pelanty
692 penMmcMin, // min mm pelanty
693 scoreMin, // min score as function of read len
694 nCeil, // max # Ns as function of read len
695 penNType, // how to penalize Ns in the read
696 penN, // constant if N pelanty is a constant
697 penNCatPair, // whether to concat mates before N filtering
698 penRdGapConst, // constant coeff for read gap cost
699 penRfGapConst, // constant coeff for ref gap cost
700 penRdGapLinear, // linear coeff for read gap cost
701 penRfGapLinear, // linear coeff for ref gap cost
702 gGapBarrier); // # rows at top/bot only entered diagonally
703 RandomSource rnd(seed);
704 {
705 Timer tim(std::cerr, "Alignment ", true);
706 BTDnaString seq, seqrc;
707 BTString qual, qualrc;
708 EList<DpProblem> probs;
709 size_t qid = 0;
710 size_t totnuc = 0, totcup = 0;
711 for(size_t i = 0; i < queries.size(); i++) {
712 logrd.init(queries[i]);
713 while(logrd.nextRead(seq, qual, probs)) {
714 totnuc += seq.length();
715 seqrc = seq;
716 seqrc.reverseComp();
717 qualrc = qual;
718 qualrc.reverse();
719 //cerr << "Initing read with " << probs.size() << " problems" << endl;
720 sw.initRead(seq, seqrc, qual, qualrc, 0, seq.length(), sc);
721 // Calculate minimum score
722 bool extend = true;
723 for(size_t j = 0; j < probs.size(); j++) {
724 sw.initRef(
725 probs[j].fw,
726 probs[j].refidx,
727 probs[j].rect,
728 const_cast<char *>(probs[j].ref.toZBuf()),
729 0,
730 probs[j].ref.length(),
731 probs[j].reflen,
732 sc,
733 probs[j].minsc,
734 enable8,
735 cminlen,
736 cpow2,
737 doTri,
738 extend);
739 // Now fill the dynamic programming matrix and return true iff
740 // there is at least one valid alignment
741 TAlScore bestCell = std::numeric_limits<TAlScore>::min();
742 ASSERT_ONLY(bool aligned =) sw.align(bestCell);
743 assert(aligned == probs[j].aligned);
744 assert(!aligned || bestCell == probs[j].score);
745 totcup += (seq.length() * probs[j].ref.length());
746 }
747 seq.clear(); seqrc.clear();
748 qual.clear(); qualrc.clear();
749 probs.clear();
750 qid++;
751 }
752 }
753 size_t el = (size_t)tim.elapsed();
754 double cups = 0.0;
755 double totnucps = 0.0;
756 double readps = 0.0;
757 if(el > 0) {
758 cups = totcup / (double)el;
759 totnucps = totnuc / (double)el;
760 readps = qid / (double)el;
761 }
762 cerr << qid << " reads" << endl;
763 cerr << std::setprecision(4) << " " << readps << " reads per second" << endl;
764 cerr << totnuc << " nucleotides" << endl;
765 cerr << std::setprecision(4) << " " << totnucps << " nucleotides per second" << endl;
766 cerr << totcup << " cell updates" << endl;
767 cerr << std::setprecision(4) << " " << cups << " cell updates per second (CUPS)" << endl;
768 }
769 return 0;
770 } catch(std::exception& e) {
771 cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
772 cerr << "Command: ";
773 for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
774 cerr << endl;
775 return 1;
776 } catch(int e) {
777 if(e != 0) {
778 cerr << "Error: Encountered internal Bowtie 2 exception (#" << e << ")" << endl;
779 cerr << "Command: ";
780 for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
781 cerr << endl;
782 }
783 return e;
784 }
785 }
786
787