1 /*
2  * Copyright 2014, Daehwan Kim <infphilo@gmail.com>
3  *
4  * This file is part of HISAT.
5  *
6  * HISAT is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * HISAT is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with HISAT.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <stdlib.h>
21 #include <iostream>
22 #include <fstream>
23 #include <string>
24 #include <cassert>
25 #include <stdexcept>
26 #include <getopt.h>
27 #include <math.h>
28 #include <utility>
29 #include <limits>
30 #include "alphabet.h"
31 #include "assert_helpers.h"
32 #include "endian_swap.h"
33 #include "bt2_idx.h"
34 #include "bt2_io.h"
35 #include "bt2_util.h"
36 #include "hier_idx.h"
37 #include "formats.h"
38 #include "sequence_io.h"
39 #include "tokenize.h"
40 #include "aln_sink.h"
41 #include "pat.h"
42 #include "threading.h"
43 #include "ds.h"
44 #include "aligner_metrics.h"
45 #include "sam.h"
46 #include "aligner_seed.h"
47 #include "splice_site.h"
48 #include "bp_aligner.h"
49 #include "aligner_seed_policy.h"
50 #include "aligner_driver.h"
51 #include "aligner_sw.h"
52 #include "aligner_sw_driver.h"
53 #include "aligner_cache.h"
54 #include "util.h"
55 #include "pe.h"
56 #include "simple_func.h"
57 #include "presets.h"
58 #include "opts.h"
59 #include "outq.h"
60 #include "aligner_seed2.h"
61 
62 using namespace std;
63 
64 static EList<string> mates1;  // mated reads (first mate)
65 static EList<string> mates2;  // mated reads (second mate)
66 static EList<string> mates12; // mated reads (1st/2nd interleaved in 1 file)
67 static string adjIdxBase;
68 bool gColor;              // colorspace (not supported)
69 int gVerbose;             // be talkative
70 static bool startVerbose; // be talkative at startup
71 int gQuiet;               // print nothing but the alignments
72 static int sanityCheck;   // enable expensive sanity checks
73 static int format;        // default read format is FASTQ
74 static string origString; // reference text, or filename(s)
75 static int seed;          // srandom() seed
76 static int timing;        // whether to report basic timing data
77 static int metricsIval;   // interval between alignment metrics messages (0 = no messages)
78 static string metricsFile;// output file to put alignment metrics in
79 static bool metricsStderr;// output file to put alignment metrics in
80 static bool metricsPerRead; // report a metrics tuple for every read
81 static bool allHits;      // for multihits, report just one
82 static bool showVersion;  // just print version and quit?
83 static int ipause;        // pause before maching?
84 static uint32_t qUpto;    // max # of queries to read
85 int gTrim5;               // amount to trim from 5' end
86 int gTrim3;               // amount to trim from 3' end
87 static int offRate;       // keep default offRate
88 static bool solexaQuals;  // quality strings are solexa quals, not phred, and subtract 64 (not 33)
89 static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33)
90 static bool integerQuals; // quality strings are space-separated strings of integers, not ASCII
91 static int nthreads;      // number of pthreads operating concurrently
92 static int outType;       // style of output
93 static bool noRefNames;   // true -> print reference indexes; not names
94 static uint32_t khits;    // number of hits per read; >1 is much slower
95 static uint32_t mhits;    // don't report any hits if there are > mhits
96 static int partitionSz;   // output a partitioning key in first field
97 static bool useSpinlock;  // false -> don't use of spinlocks even if they're #defines
98 static bool fileParallel; // separate threads read separate input files in parallel
99 static bool useShmem;     // use shared memory to hold the index
100 static bool useMm;        // use memory-mapped files to hold the index
101 static bool mmSweep;      // sweep through memory-mapped files immediately after mapping
102 int gMinInsert;           // minimum insert size
103 int gMaxInsert;           // maximum insert size
104 bool gMate1fw;            // -1 mate aligns in fw orientation on fw strand
105 bool gMate2fw;            // -2 mate aligns in rc orientation on fw strand
106 bool gFlippedMatesOK;     // allow mates to be in wrong order
107 bool gDovetailMatesOK;    // allow one mate to extend off the end of the other
108 bool gContainMatesOK;     // allow one mate to contain the other in PE alignment
109 bool gOlapMatesOK;        // allow mates to overlap in PE alignment
110 bool gExpandToFrag;       // incr max frag length to =larger mate len if necessary
111 bool gReportDiscordant;   // find and report discordant paired-end alignments
112 bool gReportMixed;        // find and report unpaired alignments for paired reads
113 static uint32_t cacheLimit;      // ranges w/ size > limit will be cached
114 static uint32_t cacheSize;       // # words per range cache
115 static uint32_t skipReads;       // # reads/read pairs to skip
116 bool gNofw; // don't align fw orientation of read
117 bool gNorc; // don't align rc orientation of read
118 static uint32_t fastaContLen;
119 static uint32_t fastaContFreq;
120 static bool hadoopOut; // print Hadoop status and summary messages
121 static bool fuzzy;
122 static bool fullRef;
123 static bool samTruncQname; // whether to truncate QNAME to 255 chars
124 static bool samOmitSecSeqQual; // omit SEQ/QUAL for 2ndary alignments?
125 static bool samNoUnal; // don't print records for unaligned reads
126 static bool samNoHead; // don't print any header lines in SAM output
127 static bool samNoSQ;   // don't print @SQ header lines
128 static bool sam_print_as;
129 static bool sam_print_xs;  // XS:i
130 static bool sam_print_xss; // Xs:i and Ys:i
131 static bool sam_print_yn;  // YN:i and Yn:i
132 static bool sam_print_xn;
133 static bool sam_print_cs;
134 static bool sam_print_cq;
135 static bool sam_print_x0;
136 static bool sam_print_x1;
137 static bool sam_print_xm;
138 static bool sam_print_xo;
139 static bool sam_print_xg;
140 static bool sam_print_nm;
141 static bool sam_print_md;
142 static bool sam_print_yf;
143 static bool sam_print_yi;
144 static bool sam_print_ym;
145 static bool sam_print_yp;
146 static bool sam_print_yt;
147 static bool sam_print_ys;
148 static bool sam_print_zs;
149 static bool sam_print_xr;
150 static bool sam_print_xt;
151 static bool sam_print_xd;
152 static bool sam_print_xu;
153 static bool sam_print_yl;
154 static bool sam_print_ye;
155 static bool sam_print_yu;
156 static bool sam_print_xp;
157 static bool sam_print_yr;
158 static bool sam_print_zb;
159 static bool sam_print_zr;
160 static bool sam_print_zf;
161 static bool sam_print_zm;
162 static bool sam_print_zi;
163 static bool sam_print_zp;
164 static bool sam_print_zu;
165 static bool sam_print_xs_a;
166 static bool bwaSwLike;
167 static float bwaSwLikeC;
168 static float bwaSwLikeT;
169 static bool qcFilter;
170 static bool sortByScore;      // prioritize alignments to report by score?
171 bool gReportOverhangs;        // false -> filter out alignments that fall off the end of a reference sequence
172 static string rgid;           // ID: setting for @RG header line
173 static string rgs;            // SAM outputs for @RG header line
174 static string rgs_optflag;    // SAM optional flag to add corresponding to @RG ID
175 static bool msample;          // whether to report a random alignment when maxed-out via -m/-M
176 int      gGapBarrier;         // # diags on top/bot only to be entered diagonally
177 static EList<string> qualities;
178 static EList<string> qualities1;
179 static EList<string> qualities2;
180 static string polstr;         // temporary holder for policy string
181 static bool  msNoCache;       // true -> disable local cache
182 static int   bonusMatchType;  // how to reward matches
183 static int   bonusMatch;      // constant reward if bonusMatchType=constant
184 static int   penMmcType;      // how to penalize mismatches
185 static int   penMmcMax;       // max mm penalty
186 static int   penMmcMin;       // min mm penalty
187 static int   penNType;        // how to penalize Ns in the read
188 static int   penN;            // constant if N pelanty is a constant
189 static bool  penNCatPair;     // concatenate mates before N filtering?
190 static bool  localAlign;      // do local alignment in DP steps
191 static bool  noisyHpolymer;   // set to true if gap penalties should be reduced to be consistent with a sequencer that under- and overcalls homopolymers
192 static int   penRdGapConst;   // constant cost of extending a gap in the read
193 static int   penRfGapConst;   // constant cost of extending a gap in the reference
194 static int   penRdGapLinear;  // coeff of linear term for cost of gap extension in read
195 static int   penRfGapLinear;  // coeff of linear term for cost of gap extension in ref
196 static SimpleFunc scoreMin;   // minimum valid score as function of read len
197 static SimpleFunc nCeil;      // max # Ns allowed as function of read len
198 static SimpleFunc msIval;     // interval between seeds as function of read len
199 static double descConsExp;    // how to adjust score minimum as we descent further into index-assisted alignment
200 static size_t descentLanding; // don't place a search root if it's within this many positions of end
201 static SimpleFunc descentTotSz;    // maximum space a DescentDriver can use in bytes
202 static SimpleFunc descentTotFmops; // maximum # FM ops a DescentDriver can perform
203 static int    multiseedMms;   // mismatches permitted in a multiseed seed
204 static int    multiseedLen;   // length of multiseed seeds
205 static size_t multiseedOff;   // offset to begin extracting seeds
206 static uint32_t seedCacheLocalMB;   // # MB to use for non-shared seed alignment cacheing
207 static uint32_t seedCacheCurrentMB; // # MB to use for current-read seed hit cacheing
208 static uint32_t exactCacheCurrentMB; // # MB to use for current-read seed hit cacheing
209 static size_t maxhalf;        // max width on one side of DP table
210 static bool seedSumm;         // print summary information about seed hits, not alignments
211 static bool doUngapped;       // do ungapped alignment
212 static size_t maxIters;       // stop after this many extend loop iterations
213 static size_t maxUg;          // stop after this many ungap extends
214 static size_t maxDp;          // stop after this many DPs
215 static size_t maxItersIncr;   // amt to add to maxIters for each -k > 1
216 static size_t maxEeStreak;    // stop after this many end-to-end fails in a row
217 static size_t maxUgStreak;    // stop after this many ungap fails in a row
218 static size_t maxDpStreak;    // stop after this many dp fails in a row
219 static size_t maxStreakIncr;  // amt to add to streak for each -k > 1
220 static size_t maxMateStreak;  // stop seed range after this many mate-find fails
221 static bool doExtend;         // extend seed hits
222 static bool enable8;          // use 8-bit SSE where possible?
223 static size_t cminlen;        // longer reads use checkpointing
224 static size_t cpow2;          // checkpoint interval log2
225 static bool doTri;            // do triangular mini-fills?
226 static string defaultPreset;  // default preset; applied immediately
227 static bool ignoreQuals;      // all mms incur same penalty, regardless of qual
228 static string wrapper;        // type of wrapper script, so we can print correct usage
229 static EList<string> queries; // list of query files
230 static string outfile;        // write SAM output to this file
231 static int mapqv;             // MAPQ calculation version
232 static int tighten;           // -M tighten mode (0=none, 1=best, 2=secbest+1)
233 static bool doExactUpFront;   // do exact search up front if seeds seem good enough
234 static bool do1mmUpFront;     // do 1mm search up front if seeds seem good enough
235 static size_t do1mmMinLen;    // length below which we disable 1mm e2e search
236 static int seedBoostThresh;   // if average non-zero position has more than this many elements
237 static size_t nSeedRounds;    // # seed rounds
238 static bool reorder;          // true -> reorder SAM recs in -p mode
239 static float sampleFrac;      // only align random fraction of input reads
240 static bool arbitraryRandom;  // pseudo-randoms no longer a function of read properties
241 static bool bowtie2p5;
242 static bool useTempSpliceSite;
243 static int penCanSplice;
244 static int penNoncanSplice;
245 static SimpleFunc penIntronLen;
246 static string knownSpliceSiteInfile;  //
247 static string novelSpliceSiteInfile;  //
248 static string novelSpliceSiteOutfile; //
249 static bool no_spliced_alignment;
250 static int rna_strandness; //
251 
252 static string bt2index;      // read Bowtie 2 index from files with this prefix
253 static EList<pair<int, string> > extra_opts;
254 static size_t extra_opts_cur;
255 
256 static EList<uint64_t> thread_rids;
257 static MUTEX_T         thread_rids_mutex;
258 static uint64_t        thread_rids_mindist;
259 
260 #define DMAX std::numeric_limits<double>::max()
261 
resetOptions()262 static void resetOptions() {
263 	mates1.clear();
264 	mates2.clear();
265 	mates12.clear();
266 	adjIdxBase	            = "";
267 	gColor                  = false;
268 	gVerbose                = 0;
269 	startVerbose			= 0;
270 	gQuiet					= false;
271 	sanityCheck				= 0;  // enable expensive sanity checks
272 	format					= FASTQ; // default read format is FASTQ
273 	origString				= ""; // reference text, or filename(s)
274 	seed					= 0; // srandom() seed
275 	timing					= 0; // whether to report basic timing data
276 	metricsIval				= 1; // interval between alignment metrics messages (0 = no messages)
277 	metricsFile             = ""; // output file to put alignment metrics in
278 	metricsStderr           = false; // print metrics to stderr (in addition to --metrics-file if it's specified
279 	metricsPerRead          = false; // report a metrics tuple for every read?
280 	allHits					= false; // for multihits, report just one
281 	showVersion				= false; // just print version and quit?
282 	ipause					= 0; // pause before maching?
283 	qUpto					= 0xffffffff; // max # of queries to read
284 	gTrim5					= 0; // amount to trim from 5' end
285 	gTrim3					= 0; // amount to trim from 3' end
286 	offRate					= -1; // keep default offRate
287 	solexaQuals				= false; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
288 	phred64Quals			= false; // quality chars are phred, but must subtract 64 (not 33)
289 	integerQuals			= false; // quality strings are space-separated strings of integers, not ASCII
290 	nthreads				= 1;     // number of pthreads operating concurrently
291 	outType					= OUTPUT_SAM;  // style of output
292 	noRefNames				= false; // true -> print reference indexes; not names
293 	khits					= 5;     // number of hits per read; >1 is much slower
294 	mhits					= 0;     // stop after finding this many alignments+1
295 	partitionSz				= 0;     // output a partitioning key in first field
296 	useSpinlock				= true;  // false -> don't use of spinlocks even if they're #defines
297 	fileParallel			= false; // separate threads read separate input files in parallel
298 	useShmem				= false; // use shared memory to hold the index
299 	useMm					= false; // use memory-mapped files to hold the index
300 	mmSweep					= false; // sweep through memory-mapped files immediately after mapping
301 	gMinInsert				= 0;     // minimum insert size
302 	gMaxInsert				= 500;   // maximum insert size
303 	gMate1fw				= true;  // -1 mate aligns in fw orientation on fw strand
304 	gMate2fw				= false; // -2 mate aligns in rc orientation on fw strand
305 	gFlippedMatesOK         = false; // allow mates to be in wrong order
306 	gDovetailMatesOK        = false; // allow one mate to extend off the end of the other
307 	gContainMatesOK         = true;  // allow one mate to contain the other in PE alignment
308 	gOlapMatesOK            = true;  // allow mates to overlap in PE alignment
309 	gExpandToFrag           = true;  // incr max frag length to =larger mate len if necessary
310 	gReportDiscordant       = true;  // find and report discordant paired-end alignments
311 	gReportMixed            = true;  // find and report unpaired alignments for paired reads
312 
313 	cacheLimit				= 5;     // ranges w/ size > limit will be cached
314 	cacheSize				= 0;     // # words per range cache
315 	skipReads				= 0;     // # reads/read pairs to skip
316 	gNofw					= false; // don't align fw orientation of read
317 	gNorc					= false; // don't align rc orientation of read
318 	fastaContLen			= 0;
319 	fastaContFreq			= 0;
320 	hadoopOut				= false; // print Hadoop status and summary messages
321 	fuzzy					= false; // reads will have alternate basecalls w/ qualities
322 	fullRef					= false; // print entire reference name instead of just up to 1st space
323 	samTruncQname           = true;  // whether to truncate QNAME to 255 chars
324 	samOmitSecSeqQual       = false; // omit SEQ/QUAL for 2ndary alignments?
325 	samNoUnal               = false; // omit SAM records for unaligned reads
326 	samNoHead				= false; // don't print any header lines in SAM output
327 	samNoSQ					= false; // don't print @SQ header lines
328 	sam_print_as            = true;
329 	sam_print_xs            = true;
330 	sam_print_xss           = false; // Xs:i and Ys:i
331 	sam_print_yn            = false; // YN:i and Yn:i
332 	sam_print_xn            = true;
333 	sam_print_cs            = false;
334 	sam_print_cq            = false;
335 	sam_print_x0            = true;
336 	sam_print_x1            = true;
337 	sam_print_xm            = true;
338 	sam_print_xo            = true;
339 	sam_print_xg            = true;
340 	sam_print_nm            = true;
341 	sam_print_md            = true;
342 	sam_print_yf            = true;
343 	sam_print_yi            = false;
344 	sam_print_ym            = false;
345 	sam_print_yp            = false;
346 	sam_print_yt            = true;
347 	sam_print_ys            = true;
348 	sam_print_zs            = false;
349 	sam_print_xr            = false;
350 	sam_print_xt            = false;
351 	sam_print_xd            = false;
352 	sam_print_xu            = false;
353 	sam_print_yl            = false;
354 	sam_print_ye            = false;
355 	sam_print_yu            = false;
356 	sam_print_xp            = false;
357 	sam_print_yr            = false;
358 	sam_print_zb            = false;
359 	sam_print_zr            = false;
360 	sam_print_zf            = false;
361 	sam_print_zm            = false;
362 	sam_print_zi            = false;
363 	sam_print_zp            = false;
364 	sam_print_zu            = false;
365     sam_print_xs_a          = true;
366 	bwaSwLike               = false;
367 	bwaSwLikeC              = 5.5f;
368 	bwaSwLikeT              = 20.0f;
369 	qcFilter                = false; // don't believe upstream qc by default
370 	sortByScore             = true;  // prioritize alignments to report by score?
371 	rgid					= "";    // SAM outputs for @RG header line
372 	rgs						= "";    // SAM outputs for @RG header line
373 	rgs_optflag				= "";    // SAM optional flag to add corresponding to @RG ID
374 	msample				    = true;
375 	gGapBarrier				= 4;     // disallow gaps within this many chars of either end of alignment
376 	qualities.clear();
377 	qualities1.clear();
378 	qualities2.clear();
379 	polstr.clear();
380 	msNoCache       = true; // true -> disable local cache
381 	bonusMatchType  = DEFAULT_MATCH_BONUS_TYPE;
382 	bonusMatch      = DEFAULT_MATCH_BONUS;
383 	penMmcType      = DEFAULT_MM_PENALTY_TYPE;
384 	penMmcMax       = DEFAULT_MM_PENALTY_MAX;
385 	penMmcMin       = DEFAULT_MM_PENALTY_MIN;
386 	penNType        = DEFAULT_N_PENALTY_TYPE;
387 	penN            = DEFAULT_N_PENALTY;
388 	penNCatPair     = DEFAULT_N_CAT_PAIR; // concatenate mates before N filtering?
389 	localAlign      = false;     // do local alignment in DP steps
390 	noisyHpolymer   = false;
391 	penRdGapConst   = DEFAULT_READ_GAP_CONST;
392 	penRfGapConst   = DEFAULT_REF_GAP_CONST;
393 	penRdGapLinear  = DEFAULT_READ_GAP_LINEAR;
394 	penRfGapLinear  = DEFAULT_REF_GAP_LINEAR;
395 	// scoreMin.init  (SIMPLE_FUNC_LINEAR, DEFAULT_MIN_CONST,   DEFAULT_MIN_LINEAR);
396     scoreMin.init  (SIMPLE_FUNC_CONST, -18, 0);
397 	nCeil.init     (SIMPLE_FUNC_LINEAR, 0.0f, DMAX, 2.0f, 0.1f);
398 	msIval.init    (SIMPLE_FUNC_LINEAR, 1.0f, DMAX, DEFAULT_IVAL_B, DEFAULT_IVAL_A);
399 	descConsExp     = 2.0;
400 	descentLanding  = 20;
401 	descentTotSz.init(SIMPLE_FUNC_LINEAR, 1024.0, DMAX, 0.0, 1024.0);
402 	descentTotFmops.init(SIMPLE_FUNC_LINEAR, 100.0, DMAX, 0.0, 10.0);
403 	multiseedMms    = DEFAULT_SEEDMMS;
404 	multiseedLen    = DEFAULT_SEEDLEN;
405 	multiseedOff    = 0;
406 	seedCacheLocalMB   = 32; // # MB to use for non-shared seed alignment cacheing
407 	seedCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing
408 	exactCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing
409 	maxhalf            = 15; // max width on one side of DP table
410 	seedSumm           = false; // print summary information about seed hits, not alignments
411 	doUngapped         = true;  // do ungapped alignment
412 	maxIters           = 400;   // max iterations of extend loop
413 	maxUg              = 300;   // stop after this many ungap extends
414 	maxDp              = 300;   // stop after this many dp extends
415 	maxItersIncr       = 20;    // amt to add to maxIters for each -k > 1
416 	maxEeStreak        = 15;    // stop after this many end-to-end fails in a row
417 	maxUgStreak        = 15;    // stop after this many ungap fails in a row
418 	maxDpStreak        = 15;    // stop after this many dp fails in a row
419 	maxStreakIncr      = 10;    // amt to add to streak for each -k > 1
420 	maxMateStreak      = 10;    // in PE: abort seed range after N mate-find fails
421 	doExtend           = true;  // do seed extensions
422 	enable8            = true;  // use 8-bit SSE where possible?
423 	cminlen            = 2000;  // longer reads use checkpointing
424 	cpow2              = 4;     // checkpoint interval log2
425 	doTri              = false; // do triangular mini-fills?
426 	defaultPreset      = "sensitive%LOCAL%"; // default preset; applied immediately
427 	extra_opts.clear();
428 	extra_opts_cur = 0;
429 	bt2index.clear();        // read Bowtie 2 index from files with this prefix
430 	ignoreQuals = false;     // all mms incur same penalty, regardless of qual
431 	wrapper.clear();         // type of wrapper script, so we can print correct usage
432 	queries.clear();         // list of query files
433 	outfile.clear();         // write SAM output to this file
434 	mapqv = 2;               // MAPQ calculation version
435 	tighten = 3;             // -M tightening mode
436 	doExactUpFront = true;   // do exact search up front if seeds seem good enough
437 	do1mmUpFront = true;     // do 1mm search up front if seeds seem good enough
438 	seedBoostThresh = 300;   // if average non-zero position has more than this many elements
439 	nSeedRounds = 2;         // # rounds of seed searches to do for repetitive reads
440 	do1mmMinLen = 60;        // length below which we disable 1mm search
441 	reorder = false;         // reorder SAM records with -p > 1
442 	sampleFrac = 1.1f;       // align all reads
443 	arbitraryRandom = false; // let pseudo-random seeds be a function of read properties
444 	bowtie2p5 = false;
445     useTempSpliceSite = true;
446     penCanSplice = 0;
447     penNoncanSplice = 3;
448     penIntronLen.init(SIMPLE_FUNC_LOG, -8, 1);
449     knownSpliceSiteInfile = "";
450     novelSpliceSiteInfile = "";
451     novelSpliceSiteOutfile = "";
452     no_spliced_alignment = false;
453     rna_strandness = RNA_STRANDNESS_UNKNOWN;
454 }
455 
456 static const char *short_options = "fF:qbzhcu:rv:s:aP:t3:5:w:p:k:M:1:2:I:X:CQ:N:i:L:U:x:S:g:O:D:R:";
457 
458 static struct option long_options[] = {
459 	{(char*)"verbose",      no_argument,       0,            ARG_VERBOSE},
460 	{(char*)"startverbose", no_argument,       0,            ARG_STARTVERBOSE},
461 	{(char*)"quiet",        no_argument,       0,            ARG_QUIET},
462 	{(char*)"sanity",       no_argument,       0,            ARG_SANITY},
463 	{(char*)"pause",        no_argument,       &ipause,      1},
464 	{(char*)"orig",         required_argument, 0,            ARG_ORIG},
465 	{(char*)"all",          no_argument,       0,            'a'},
466 	{(char*)"solexa-quals", no_argument,       0,            ARG_SOLEXA_QUALS},
467 	{(char*)"integer-quals",no_argument,       0,            ARG_INTEGER_QUALS},
468 	{(char*)"int-quals",    no_argument,       0,            ARG_INTEGER_QUALS},
469 	{(char*)"metrics",      required_argument, 0,            ARG_METRIC_IVAL},
470 	{(char*)"metrics-file", required_argument, 0,            ARG_METRIC_FILE},
471 	{(char*)"metrics-stderr",no_argument,      0,            ARG_METRIC_STDERR},
472 	{(char*)"metrics-per-read", no_argument,   0,            ARG_METRIC_PER_READ},
473 	{(char*)"met-read",     no_argument,       0,            ARG_METRIC_PER_READ},
474 	{(char*)"met",          required_argument, 0,            ARG_METRIC_IVAL},
475 	{(char*)"met-file",     required_argument, 0,            ARG_METRIC_FILE},
476 	{(char*)"met-stderr",   no_argument,       0,            ARG_METRIC_STDERR},
477 	{(char*)"time",         no_argument,       0,            't'},
478 	{(char*)"trim3",        required_argument, 0,            '3'},
479 	{(char*)"trim5",        required_argument, 0,            '5'},
480 	{(char*)"seed",         required_argument, 0,            ARG_SEED},
481 	{(char*)"qupto",        required_argument, 0,            'u'},
482 	{(char*)"upto",         required_argument, 0,            'u'},
483 	{(char*)"version",      no_argument,       0,            ARG_VERSION},
484 	{(char*)"filepar",      no_argument,       0,            ARG_FILEPAR},
485 	{(char*)"help",         no_argument,       0,            'h'},
486 	{(char*)"threads",      required_argument, 0,            'p'},
487 	{(char*)"khits",        required_argument, 0,            'k'},
488 	{(char*)"minins",       required_argument, 0,            'I'},
489 	{(char*)"maxins",       required_argument, 0,            'X'},
490 	{(char*)"quals",        required_argument, 0,            'Q'},
491 	{(char*)"Q1",           required_argument, 0,            ARG_QUALS1},
492 	{(char*)"Q2",           required_argument, 0,            ARG_QUALS2},
493 	{(char*)"refidx",       no_argument,       0,            ARG_REFIDX},
494 	{(char*)"partition",    required_argument, 0,            ARG_PARTITION},
495 	{(char*)"ff",           no_argument,       0,            ARG_FF},
496 	{(char*)"fr",           no_argument,       0,            ARG_FR},
497 	{(char*)"rf",           no_argument,       0,            ARG_RF},
498 	{(char*)"cachelim",     required_argument, 0,            ARG_CACHE_LIM},
499 	{(char*)"cachesz",      required_argument, 0,            ARG_CACHE_SZ},
500 	{(char*)"nofw",         no_argument,       0,            ARG_NO_FW},
501 	{(char*)"norc",         no_argument,       0,            ARG_NO_RC},
502 	{(char*)"skip",         required_argument, 0,            's'},
503 	{(char*)"12",           required_argument, 0,            ARG_ONETWO},
504 	{(char*)"tab5",         required_argument, 0,            ARG_TAB5},
505 	{(char*)"tab6",         required_argument, 0,            ARG_TAB6},
506 	{(char*)"phred33-quals", no_argument,      0,            ARG_PHRED33},
507 	{(char*)"phred64-quals", no_argument,      0,            ARG_PHRED64},
508 	{(char*)"phred33",       no_argument,      0,            ARG_PHRED33},
509 	{(char*)"phred64",      no_argument,       0,            ARG_PHRED64},
510 	{(char*)"solexa1.3-quals", no_argument,    0,            ARG_PHRED64},
511 	{(char*)"mm",           no_argument,       0,            ARG_MM},
512 	{(char*)"shmem",        no_argument,       0,            ARG_SHMEM},
513 	{(char*)"mmsweep",      no_argument,       0,            ARG_MMSWEEP},
514 	{(char*)"hadoopout",    no_argument,       0,            ARG_HADOOPOUT},
515 	{(char*)"fuzzy",        no_argument,       0,            ARG_FUZZY},
516 	{(char*)"fullref",      no_argument,       0,            ARG_FULLREF},
517 	{(char*)"usage",        no_argument,       0,            ARG_USAGE},
518 	{(char*)"sam-no-qname-trunc", no_argument, 0,            ARG_SAM_NO_QNAME_TRUNC},
519 	{(char*)"sam-omit-sec-seq", no_argument,   0,            ARG_SAM_OMIT_SEC_SEQ},
520 	{(char*)"omit-sec-seq", no_argument,       0,            ARG_SAM_OMIT_SEC_SEQ},
521 	{(char*)"sam-no-head",  no_argument,       0,            ARG_SAM_NOHEAD},
522 	{(char*)"sam-nohead",   no_argument,       0,            ARG_SAM_NOHEAD},
523 	{(char*)"sam-noHD",     no_argument,       0,            ARG_SAM_NOHEAD},
524 	{(char*)"sam-no-hd",    no_argument,       0,            ARG_SAM_NOHEAD},
525 	{(char*)"sam-nosq",     no_argument,       0,            ARG_SAM_NOSQ},
526 	{(char*)"sam-no-sq",    no_argument,       0,            ARG_SAM_NOSQ},
527 	{(char*)"sam-noSQ",     no_argument,       0,            ARG_SAM_NOSQ},
528 	{(char*)"no-head",      no_argument,       0,            ARG_SAM_NOHEAD},
529 	{(char*)"no-hd",        no_argument,       0,            ARG_SAM_NOHEAD},
530 	{(char*)"no-sq",        no_argument,       0,            ARG_SAM_NOSQ},
531 	{(char*)"no-HD",        no_argument,       0,            ARG_SAM_NOHEAD},
532 	{(char*)"no-SQ",        no_argument,       0,            ARG_SAM_NOSQ},
533 	{(char*)"no-unal",      no_argument,       0,            ARG_SAM_NO_UNAL},
534 	{(char*)"color",        no_argument,       0,            'C'},
535 	{(char*)"sam-RG",       required_argument, 0,            ARG_SAM_RG},
536 	{(char*)"sam-rg",       required_argument, 0,            ARG_SAM_RG},
537 	{(char*)"sam-rg-id",    required_argument, 0,            ARG_SAM_RGID},
538 	{(char*)"RG",           required_argument, 0,            ARG_SAM_RG},
539 	{(char*)"rg",           required_argument, 0,            ARG_SAM_RG},
540 	{(char*)"rg-id",        required_argument, 0,            ARG_SAM_RGID},
541 	{(char*)"snpphred",     required_argument, 0,            ARG_SNPPHRED},
542 	{(char*)"snpfrac",      required_argument, 0,            ARG_SNPFRAC},
543 	{(char*)"gbar",         required_argument, 0,            ARG_GAP_BAR},
544 	{(char*)"qseq",         no_argument,       0,            ARG_QSEQ},
545 	{(char*)"policy",       required_argument, 0,            ARG_ALIGN_POLICY},
546 	{(char*)"preset",       required_argument, 0,            'P'},
547 	{(char*)"seed-summ",    no_argument,       0,            ARG_SEED_SUMM},
548 	{(char*)"seed-summary", no_argument,       0,            ARG_SEED_SUMM},
549 	{(char*)"overhang",     no_argument,       0,            ARG_OVERHANG},
550 	{(char*)"no-cache",     no_argument,       0,            ARG_NO_CACHE},
551 	{(char*)"cache",        no_argument,       0,            ARG_USE_CACHE},
552 	{(char*)"454",          no_argument,       0,            ARG_NOISY_HPOLY},
553 	{(char*)"ion-torrent",  no_argument,       0,            ARG_NOISY_HPOLY},
554 	{(char*)"no-mixed",     no_argument,       0,            ARG_NO_MIXED},
555 	{(char*)"no-discordant",no_argument,       0,            ARG_NO_DISCORDANT},
556 	{(char*)"local",        no_argument,       0,            ARG_LOCAL},
557 	{(char*)"end-to-end",   no_argument,       0,            ARG_END_TO_END},
558 	{(char*)"ungapped",     no_argument,       0,            ARG_UNGAPPED},
559 	{(char*)"no-ungapped",  no_argument,       0,            ARG_UNGAPPED_NO},
560 	{(char*)"sse8",         no_argument,       0,            ARG_SSE8},
561 	{(char*)"no-sse8",      no_argument,       0,            ARG_SSE8_NO},
562 	{(char*)"scan-narrowed",no_argument,       0,            ARG_SCAN_NARROWED},
563 	{(char*)"qc-filter",    no_argument,       0,            ARG_QC_FILTER},
564 	{(char*)"bwa-sw-like",  no_argument,       0,            ARG_BWA_SW_LIKE},
565 	{(char*)"multiseed",        required_argument, 0,        ARG_MULTISEED_IVAL},
566 	{(char*)"ma",               required_argument, 0,        ARG_SCORE_MA},
567 	{(char*)"mp",               required_argument, 0,        ARG_SCORE_MMP},
568 	{(char*)"np",               required_argument, 0,        ARG_SCORE_NP},
569 	{(char*)"rdg",              required_argument, 0,        ARG_SCORE_RDG},
570 	{(char*)"rfg",              required_argument, 0,        ARG_SCORE_RFG},
571 	{(char*)"score-min",        required_argument, 0,        ARG_SCORE_MIN},
572 	{(char*)"min-score",        required_argument, 0,        ARG_SCORE_MIN},
573 	{(char*)"n-ceil",           required_argument, 0,        ARG_N_CEIL},
574 	{(char*)"dpad",             required_argument, 0,        ARG_DPAD},
575 	{(char*)"mapq-print-inputs",no_argument,       0,        ARG_SAM_PRINT_YI},
576 	{(char*)"very-fast",        no_argument,       0,        ARG_PRESET_VERY_FAST},
577 	{(char*)"fast",             no_argument,       0,        ARG_PRESET_FAST},
578 	{(char*)"sensitive",        no_argument,       0,        ARG_PRESET_SENSITIVE},
579 	{(char*)"very-sensitive",   no_argument,       0,        ARG_PRESET_VERY_SENSITIVE},
580 	{(char*)"very-fast-local",      no_argument,   0,        ARG_PRESET_VERY_FAST_LOCAL},
581 	{(char*)"fast-local",           no_argument,   0,        ARG_PRESET_FAST_LOCAL},
582 	{(char*)"sensitive-local",      no_argument,   0,        ARG_PRESET_SENSITIVE_LOCAL},
583 	{(char*)"very-sensitive-local", no_argument,   0,        ARG_PRESET_VERY_SENSITIVE_LOCAL},
584 	{(char*)"no-score-priority",no_argument,       0,        ARG_NO_SCORE_PRIORITY},
585 	{(char*)"seedlen",          required_argument, 0,        'L'},
586 	{(char*)"seedmms",          required_argument, 0,        'N'},
587 	{(char*)"seedival",         required_argument, 0,        'i'},
588 	{(char*)"ignore-quals",     no_argument,       0,        ARG_IGNORE_QUALS},
589 	{(char*)"index",            required_argument, 0,        'x'},
590 	{(char*)"arg-desc",         no_argument,       0,        ARG_DESC},
591 	{(char*)"wrapper",          required_argument, 0,        ARG_WRAPPER},
592 	{(char*)"unpaired",         required_argument, 0,        'U'},
593 	{(char*)"output",           required_argument, 0,        'S'},
594 	{(char*)"mapq-v",           required_argument, 0,        ARG_MAPQ_V},
595 	{(char*)"dovetail",         no_argument,       0,        ARG_DOVETAIL},
596 	{(char*)"no-dovetail",      no_argument,       0,        ARG_NO_DOVETAIL},
597 	{(char*)"contain",          no_argument,       0,        ARG_CONTAIN},
598 	{(char*)"no-contain",       no_argument,       0,        ARG_NO_CONTAIN},
599 	{(char*)"overlap",          no_argument,       0,        ARG_OVERLAP},
600 	{(char*)"no-overlap",       no_argument,       0,        ARG_NO_OVERLAP},
601 	{(char*)"tighten",          required_argument, 0,        ARG_TIGHTEN},
602 	{(char*)"exact-upfront",    no_argument,       0,        ARG_EXACT_UPFRONT},
603 	{(char*)"1mm-upfront",      no_argument,       0,        ARG_1MM_UPFRONT},
604 	{(char*)"no-exact-upfront", no_argument,       0,        ARG_EXACT_UPFRONT_NO},
605 	{(char*)"no-1mm-upfront",   no_argument,       0,        ARG_1MM_UPFRONT_NO},
606 	{(char*)"1mm-minlen",       required_argument, 0,        ARG_1MM_MINLEN},
607 	{(char*)"seed-off",         required_argument, 0,        'O'},
608 	{(char*)"seed-boost",       required_argument, 0,        ARG_SEED_BOOST_THRESH},
609 	{(char*)"read-times",       no_argument,       0,        ARG_READ_TIMES},
610 	{(char*)"show-rand-seed",   no_argument,       0,        ARG_SHOW_RAND_SEED},
611 	{(char*)"dp-fail-streak",   required_argument, 0,        ARG_DP_FAIL_STREAK_THRESH},
612 	{(char*)"ee-fail-streak",   required_argument, 0,        ARG_EE_FAIL_STREAK_THRESH},
613 	{(char*)"ug-fail-streak",   required_argument, 0,        ARG_UG_FAIL_STREAK_THRESH},
614 	{(char*)"fail-streak",      required_argument, 0,        'D'},
615 	{(char*)"dp-fails",         required_argument, 0,        ARG_DP_FAIL_THRESH},
616 	{(char*)"ug-fails",         required_argument, 0,        ARG_UG_FAIL_THRESH},
617 	{(char*)"extends",          required_argument, 0,        ARG_EXTEND_ITERS},
618 	{(char*)"no-extend",        no_argument,       0,        ARG_NO_EXTEND},
619 	{(char*)"mapq-extra",       no_argument,       0,        ARG_MAPQ_EX},
620 	{(char*)"seed-rounds",      required_argument, 0,        'R'},
621 	{(char*)"reorder",          no_argument,       0,        ARG_REORDER},
622 	{(char*)"passthrough",      no_argument,       0,        ARG_READ_PASSTHRU},
623 	{(char*)"sample",           required_argument, 0,        ARG_SAMPLE},
624 	{(char*)"cp-min",           required_argument, 0,        ARG_CP_MIN},
625 	{(char*)"cp-ival",          required_argument, 0,        ARG_CP_IVAL},
626 	{(char*)"tri",              no_argument,       0,        ARG_TRI},
627 	{(char*)"nondeterministic", no_argument,       0,        ARG_NON_DETERMINISTIC},
628 	{(char*)"non-deterministic", no_argument,      0,        ARG_NON_DETERMINISTIC},
629 	{(char*)"local-seed-cache-sz", required_argument, 0,     ARG_LOCAL_SEED_CACHE_SZ},
630 	{(char*)"seed-cache-sz",       required_argument, 0,     ARG_CURRENT_SEED_CACHE_SZ},
631 	{(char*)"no-unal",          no_argument,       0,        ARG_SAM_NO_UNAL},
632 	{(char*)"test-25",          no_argument,       0,        ARG_TEST_25},
633 	// TODO: following should be a function of read length?
634 	{(char*)"desc-kb",          required_argument, 0,        ARG_DESC_KB},
635 	{(char*)"desc-landing",     required_argument, 0,        ARG_DESC_LANDING},
636 	{(char*)"desc-exp",         required_argument, 0,        ARG_DESC_EXP},
637 	{(char*)"desc-fmops",       required_argument, 0,        ARG_DESC_FMOPS},
638     {(char*)"no-temp-splicesite",  no_argument, 0,     ARG_NO_TEMPSPLICESITE},
639     {(char*)"pen-cansplice",  required_argument, 0,        ARG_PEN_CANSPLICE},
640     {(char*)"pen-noncansplice",  required_argument, 0,     ARG_PEN_NONCANSPLICE},
641     {(char*)"pen-intronlen",  required_argument, 0,     ARG_PEN_INTRONLEN},
642     {(char*)"known-splicesite-infile",       required_argument, 0,        ARG_KNOWN_SPLICESITE_INFILE},
643     {(char*)"novel-splicesite-infile",       required_argument, 0,        ARG_NOVEL_SPLICESITE_INFILE},
644     {(char*)"novel-splicesite-outfile",      required_argument, 0,        ARG_NOVEL_SPLICESITE_OUTFILE},
645     {(char*)"no-spliced-alignment",   no_argument, 0,        ARG_NO_SPLICED_ALIGNMENT},
646     {(char*)"rna-strandness",   required_argument, 0,        ARG_RNA_STRANDNESS},
647 	{(char*)0, 0, 0, 0} // terminator
648 };
649 
650 /**
651  * Print out a concise description of what options are taken and whether they
652  * take an argument.
653  */
printArgDesc(ostream & out)654 static void printArgDesc(ostream& out) {
655 	// struct option {
656 	//   const char *name;
657 	//   int has_arg;
658 	//   int *flag;
659 	//   int val;
660 	// };
661 	size_t i = 0;
662 	while(long_options[i].name != 0) {
663 		out << long_options[i].name << "\t"
664 		    << (long_options[i].has_arg == no_argument ? 0 : 1)
665 		    << endl;
666 		i++;
667 	}
668 	size_t solen = strlen(short_options);
669 	for(i = 0; i < solen; i++) {
670 		// Has an option?  Does if next char is :
671 		if(i == solen-1) {
672 			assert_neq(':', short_options[i]);
673 			cout << (char)short_options[i] << "\t" << 0 << endl;
674 		} else {
675 			if(short_options[i+1] == ':') {
676 				// Option with argument
677 				cout << (char)short_options[i] << "\t" << 1 << endl;
678 				i++; // skip the ':'
679 			} else {
680 				// Option with no argument
681 				cout << (char)short_options[i] << "\t" << 0 << endl;
682 			}
683 		}
684 	}
685 }
686 
687 /**
688  * Print a summary usage message to the provided output stream.
689  */
printUsage(ostream & out)690 static void printUsage(ostream& out) {
691 	out << "HISAT version " << string(HISAT_VERSION).c_str() << " by Daehwan Kim (infphilo@gmail.com, www.ccb.jhu.edu/people/infphilo)" << endl;
692 	string tool_name = "hisat2-align";
693 	if(wrapper == "basic-0") {
694 		tool_name = "hisat";
695 	}
696 	out << "Usage: " << endl
697 	    << "  " << tool_name.c_str() << " [options]* -x <ht2-idx> {-1 <m1> -2 <m2> | -U <r>} [-S <sam>]" << endl
698 	    << endl
699 		<<     "  <ht2-idx>  Index filename prefix (minus trailing .X." << gfm_ext << ")." << endl
700 	    <<     "  <m1>       Files with #1 mates, paired with files in <m2>." << endl;
701 	if(wrapper == "basic-0") {
702 		out << "             Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
703 	}
704 	out <<     "  <m2>       Files with #2 mates, paired with files in <m1>." << endl;
705 	if(wrapper == "basic-0") {
706 		out << "             Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
707 	}
708 	out <<     "  <r>        Files with unpaired reads." << endl;
709 	if(wrapper == "basic-0") {
710 		out << "             Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
711 	}
712 	out <<     "  <sam>      File for SAM output (default: stdout)" << endl
713 	    << endl
714 	    << "  <m1>, <m2>, <r> can be comma-separated lists (no whitespace) and can be" << endl
715 		<< "  specified many times.  E.g. '-U file1.fq,file2.fq -U file3.fq'." << endl
716 		// Wrapper script should write <bam> line next
717 		<< endl
718 	    << "Options (defaults in parentheses):" << endl
719 		<< endl
720 	    << " Input:" << endl
721 	    << "  -q                 query input files are FASTQ .fq/.fastq (default)" << endl
722 	    << "  --qseq             query input files are in Illumina's qseq format" << endl
723 	    << "  -f                 query input files are (multi-)FASTA .fa/.mfa" << endl
724 	    << "  -r                 query input files are raw one-sequence-per-line" << endl
725 	    << "  -c                 <m1>, <m2>, <r> are sequences themselves, not files" << endl
726 	    << "  -s/--skip <int>    skip the first <int> reads/pairs in the input (none)" << endl
727 	    << "  -u/--upto <int>    stop after first <int> reads/pairs (no limit)" << endl
728 	    << "  -5/--trim5 <int>   trim <int> bases from 5'/left end of reads (0)" << endl
729 	    << "  -3/--trim3 <int>   trim <int> bases from 3'/right end of reads (0)" << endl
730 	    << "  --phred33          qualities are Phred+33 (default)" << endl
731 	    << "  --phred64          qualities are Phred+64" << endl
732 	    << "  --int-quals        qualities encoded as space-delimited integers" << endl
733 		<< endl
734 	    << " Presets:                 Same as:" << endl
735 		<< "  For --end-to-end:" << endl
736 		<< "   --very-fast            -D 5 -R 1 -N 0 -L 22 -i S,0,2.50" << endl
737 		<< "   --fast                 -D 10 -R 2 -N 0 -L 22 -i S,0,2.50" << endl
738 		<< "   --sensitive            -D 15 -R 2 -N 0 -L 22 -i S,1,1.15 (default)" << endl
739 		<< "   --very-sensitive       -D 20 -R 3 -N 0 -L 20 -i S,1,0.50" << endl
740 		<< endl
741 		<< "  For --local:" << endl
742 		<< "   --very-fast-local      -D 5 -R 1 -N 0 -L 25 -i S,1,2.00" << endl
743 		<< "   --fast-local           -D 10 -R 2 -N 0 -L 22 -i S,1,1.75" << endl
744 		<< "   --sensitive-local      -D 15 -R 2 -N 0 -L 20 -i S,1,0.75 (default)" << endl
745 		<< "   --very-sensitive-local -D 20 -R 3 -N 0 -L 20 -i S,1,0.50" << endl
746 		<< endl
747 	    << " Alignment:" << endl
748 		<< "  -N <int>           max # mismatches in seed alignment; can be 0 or 1 (0)" << endl
749 		<< "  -L <int>           length of seed substrings; must be >3, <32 (22)" << endl
750 		<< "  -i <func>          interval between seed substrings w/r/t read len (S,1,1.15)" << endl
751 		<< "  --n-ceil <func>    func for max # non-A/C/G/Ts permitted in aln (L,0,0.15)" << endl
752 		<< "  --dpad <int>       include <int> extra ref chars on sides of DP table (15)" << endl
753 		<< "  --gbar <int>       disallow gaps within <int> nucs of read extremes (4)" << endl
754 		<< "  --ignore-quals     treat all quality values as 30 on Phred scale (off)" << endl
755 	    << "  --nofw             do not align forward (original) version of read (off)" << endl
756 	    << "  --norc             do not align reverse-complement version of read (off)" << endl
757 		<< endl
758         << " Spliced Alignment:" << endl
759         << "  --pen-cansplice <int>              penalty for a canonical splice site (0)" << endl
760         << "  --pen-noncansplice <int>           penalty for a non-canonical splice site (3)" << endl
761         << "  --pen-intronlen <func>             penalty for long introns (G,-8,1)" << endl
762         << "  --known-splicesite-infile <path>   provide a list of known splice sites" << endl
763         << "  --novel-splicesite-outfile <path>  report a list of splice sites" << endl
764         << "  --novel-splicesite-infile <path>   provide a list of novel splice sites" << endl
765         << "  --no-temp-splicesite               disable the use of splice sites found" << endl
766         << "  --no-spliced-alignment             disable spliced alignment" << endl
767         << "  --rna-strandness <string>          Specify strand-specific information (unstranded)" << endl
768         << endl
769 		<< " Scoring:" << endl
770 		<< "  --ma <int>         match bonus (0 for --end-to-end, 2 for --local) " << endl
771 		<< "  --mp <int>         max penalty for mismatch; lower qual = lower penalty (6)" << endl
772 		<< "  --np <int>         penalty for non-A/C/G/Ts in read/ref (1)" << endl
773 		<< "  --rdg <int>,<int>  read gap open, extend penalties (5,3)" << endl
774 		<< "  --rfg <int>,<int>  reference gap open, extend penalties (5,3)" << endl
775 		<< "  --score-min <func> min acceptable alignment score w/r/t read length" << endl
776 		<< "                     (G,20,8 for local, L,-0.6,-0.6 for end-to-end)" << endl
777 		<< endl
778 	    << " Reporting:" << endl
779 	    << "  (default)          look for multiple alignments, report best, with MAPQ" << endl
780 		<< "   OR" << endl
781 	    << "  -k <int>           report up to <int> alns per read; MAPQ not meaningful" << endl
782 		<< "   OR" << endl
783 	    << "  -a/--all           report all alignments; very slow, MAPQ not meaningful" << endl
784 		<< endl
785 	    << " Effort:" << endl
786 	    << "  -D <int>           give up extending after <int> failed extends in a row (15)" << endl
787 	    << "  -R <int>           for reads w/ repetitive seeds, try <int> sets of seeds (2)" << endl
788 		<< endl
789 		<< " Paired-end:" << endl
790 	    << "  -I/--minins <int>  minimum fragment length (0)" << endl
791 	    << "  -X/--maxins <int>  maximum fragment length (500)" << endl
792 	    << "  --fr/--rf/--ff     -1, -2 mates align fw/rev, rev/fw, fw/fw (--fr)" << endl
793 		<< "  --no-mixed         suppress unpaired alignments for paired reads" << endl
794 		<< "  --no-discordant    suppress discordant alignments for paired reads" << endl
795 		<< "  --no-dovetail      not concordant when mates extend past each other" << endl
796 		<< "  --no-contain       not concordant when one mate alignment contains other" << endl
797 		<< "  --no-overlap       not concordant when mates overlap at all" << endl
798 		<< endl
799 	    << " Output:" << endl;
800 	//if(wrapper == "basic-0") {
801 	//	out << "  --bam              output directly to BAM (by piping through 'samtools view')" << endl;
802 	//}
803 	out << "  -t/--time          print wall-clock time taken by search phases" << endl;
804 	if(wrapper == "basic-0") {
805 	out << "  --un <path>           write unpaired reads that didn't align to <path>" << endl
806 	    << "  --al <path>           write unpaired reads that aligned at least once to <path>" << endl
807 	    << "  --un-conc <path>      write pairs that didn't align concordantly to <path>" << endl
808 	    << "  --al-conc <path>      write pairs that aligned concordantly at least once to <path>" << endl
809 	    << "  (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g." << endl
810 		<< "  --un-gz <path>, to gzip compress output, or add '-bz2' to bzip2 compress output.)" << endl;
811 	}
812 	out << "  --quiet            print nothing to stderr except serious errors" << endl
813 	//  << "  --refidx           refer to ref. seqs by 0-based index rather than name" << endl
814 		<< "  --met-file <path>  send metrics to file at <path> (off)" << endl
815 		<< "  --met-stderr       send metrics to stderr (off)" << endl
816 		<< "  --met <int>        report internal counters & metrics every <int> secs (1)" << endl
817 	// Following is supported in the wrapper instead
818 	//  << "  --no-unal          supppress SAM records for unaligned reads" << endl
819 	    << "  --no-head          supppress header lines, i.e. lines starting with @" << endl
820 	    << "  --no-sq            supppress @SQ header lines" << endl
821 	    << "  --rg-id <text>     set read group id, reflected in @RG line and RG:Z: opt field" << endl
822 	    << "  --rg <text>        add <text> (\"lab:value\") to @RG line of SAM header." << endl
823 	    << "                     Note: @RG line only printed when --rg-id is set." << endl
824 	    << "  --omit-sec-seq     put '*' in SEQ and QUAL fields for secondary alignments." << endl
825 		<< endl
826 	    << " Performance:" << endl
827 	    << "  -o/--offrate <int> override offrate of index; must be >= index's offrate" << endl
828 	    << "  -p/--threads <int> number of alignment threads to launch (1)" << endl
829 	    << "  --reorder          force SAM output order to match order of input reads" << endl
830 #ifdef BOWTIE_MM
831 	    << "  --mm               use memory-mapped I/O for index; many 'hisat2's can share" << endl
832 #endif
833 #ifdef BOWTIE_SHARED_MEM
834 		//<< "  --shmem            use shared mem for index; many 'hisat2's can share" << endl
835 #endif
836 		<< endl
837 	    << " Other:" << endl
838 		<< "  --qc-filter        filter out reads that are bad according to QSEQ filter" << endl
839 	    << "  --seed <int>       seed for random number generator (0)" << endl
840 	    << "  --non-deterministic seed rand. gen. arbitrarily instead of using read attributes" << endl
841 	//  << "  --verbose          verbose output for debugging" << endl
842 	    << "  --version          print version information and quit" << endl
843 	    << "  -h/--help          print this usage message" << endl
844 	    ;
845 	if(wrapper.empty()) {
846 		cerr << endl
847 		     << "*** Warning ***" << endl
848 			 << "'hisat2-align' was run directly.  It is recommended that you run the wrapper script 'hisat2' instead." << endl
849 			 << endl;
850 	}
851 }
852 
853 /**
854  * Parse an int out of optarg and enforce that it be at least 'lower';
855  * if it is less than 'lower', than output the given error message and
856  * exit with an error and a usage message.
857  */
parseInt(int lower,int upper,const char * errmsg,const char * arg)858 static int parseInt(int lower, int upper, const char *errmsg, const char *arg) {
859 	long l;
860 	char *endPtr= NULL;
861 	l = strtol(arg, &endPtr, 10);
862 	if (endPtr != NULL) {
863 		if (l < lower || l > upper) {
864 			cerr << errmsg << endl;
865 			printUsage(cerr);
866 			throw 1;
867 		}
868 		return (int32_t)l;
869 	}
870 	cerr << errmsg << endl;
871 	printUsage(cerr);
872 	throw 1;
873 	return -1;
874 }
875 
876 /**
877  * Upper is maximum int by default.
878  */
parseInt(int lower,const char * errmsg,const char * arg)879 static int parseInt(int lower, const char *errmsg, const char *arg) {
880 	return parseInt(lower, std::numeric_limits<int>::max(), errmsg, arg);
881 }
882 
883 /**
884  * Parse a T string 'str'.
885  */
886 template<typename T>
parse(const char * s)887 T parse(const char *s) {
888 	T tmp;
889 	stringstream ss(s);
890 	ss >> tmp;
891 	return tmp;
892 }
893 
894 /**
895  * Parse a pair of Ts from a string, 'str', delimited with 'delim'.
896  */
897 template<typename T>
parsePair(const char * str,char delim)898 pair<T, T> parsePair(const char *str, char delim) {
899 	string s(str);
900 	EList<string> ss;
901 	tokenize(s, delim, ss);
902 	pair<T, T> ret;
903 	ret.first = parse<T>(ss[0].c_str());
904 	ret.second = parse<T>(ss[1].c_str());
905 	return ret;
906 }
907 
908 /**
909  * Parse a pair of Ts from a string, 'str', delimited with 'delim'.
910  */
911 template<typename T>
parseTuple(const char * str,char delim,EList<T> & ret)912 void parseTuple(const char *str, char delim, EList<T>& ret) {
913 	string s(str);
914 	EList<string> ss;
915 	tokenize(s, delim, ss);
916 	for(size_t i = 0; i < ss.size(); i++) {
917 		ret.push_back(parse<T>(ss[i].c_str()));
918 	}
919 }
920 
applyPreset(const string & sorig,Presets & presets)921 static string applyPreset(const string& sorig, Presets& presets) {
922 	string s = sorig;
923 	size_t found = s.find("%LOCAL%");
924 	if(found != string::npos) {
925 		s.replace(found, strlen("%LOCAL%"), localAlign ? "-local" : "");
926 	}
927 	if(gVerbose) {
928 		cerr << "Applying preset: '" << s.c_str() << "' using preset menu '"
929 			 << presets.name() << "'" << endl;
930 	}
931 	string pol;
932 	presets.apply(s, pol, extra_opts);
933 	return pol;
934 }
935 
936 static bool saw_M;
937 static bool saw_a;
938 static bool saw_k;
939 static EList<string> presetList;
940 
941 /**
942  * TODO: Argument parsing is very, very flawed.  The biggest problem is that
943  * there are two separate worlds of arguments, the ones set via polstr, and
944  * the ones set directly in variables.  This makes for nasty interactions,
945  * e.g., with the -M option being resolved at an awkward time relative to
946  * the -k and -a options.
947  */
parseOption(int next_option,const char * arg)948 static void parseOption(int next_option, const char *arg) {
949 	switch (next_option) {
950 		case ARG_TEST_25: bowtie2p5 = true; break;
951 		case ARG_DESC_KB: descentTotSz = SimpleFunc::parse(arg, 0.0, 1024.0, 1024.0, DMAX); break;
952 		case ARG_DESC_FMOPS: descentTotFmops = SimpleFunc::parse(arg, 0.0, 10.0, 100.0, DMAX); break;
953 		case ARG_DESC_LANDING: descentLanding = parse<int>(arg); break;
954 		case ARG_DESC_EXP: {
955 			descConsExp = parse<double>(arg);
956 			if(descConsExp < 0.0) {
957 				cerr << "Error: --desc-exp must be greater than or equal to 0" << endl;
958 				throw 1;
959 			}
960 			break;
961 		}
962 		case '1': tokenize(arg, ",", mates1); break;
963 		case '2': tokenize(arg, ",", mates2); break;
964 		case ARG_ONETWO: tokenize(arg, ",", mates12); format = TAB_MATE5; break;
965 		case ARG_TAB5:   tokenize(arg, ",", mates12); format = TAB_MATE5; break;
966 		case ARG_TAB6:   tokenize(arg, ",", mates12); format = TAB_MATE6; break;
967 		case 'f': format = FASTA; break;
968 		case 'F': {
969 			format = FASTA_CONT;
970 			pair<uint32_t, uint32_t> p = parsePair<uint32_t>(arg, ',');
971 			fastaContLen = p.first;
972 			fastaContFreq = p.second;
973 			break;
974 		}
975 		case ARG_BWA_SW_LIKE: {
976 			bwaSwLikeC = 5.5f;
977 			bwaSwLikeT = 30;
978 			bwaSwLike = true;
979 			localAlign = true;
980 			// -a INT   Score of a match [1]
981 			// -b INT   Mismatch penalty [3]
982 			// -q INT   Gap open penalty [5]
983 			// -r INT   Gap extension penalty. The penalty for a contiguous
984 			//          gap of size k is q+k*r. [2]
985 			polstr += ";MA=1;MMP=C3;RDG=5,2;RFG=5,2";
986 			break;
987 		}
988 		case 'q': format = FASTQ; break;
989 		case 'r': format = RAW; break;
990 		case 'c': format = CMDLINE; break;
991 		case ARG_QSEQ: format = QSEQ; break;
992 		case 'C': {
993 			cerr << "Error: -C specified but Bowtie 2 does not support colorspace input." << endl;
994 			throw 1;
995 			break;
996 		}
997 		case 'I':
998 			gMinInsert = parseInt(0, "-I arg must be positive", arg);
999 			break;
1000 		case 'X':
1001 			gMaxInsert = parseInt(1, "-X arg must be at least 1", arg);
1002 			break;
1003 		case ARG_NO_DISCORDANT: gReportDiscordant = false; break;
1004 		case ARG_NO_MIXED: gReportMixed = false; break;
1005 		case 's':
1006 			skipReads = (uint32_t)parseInt(0, "-s arg must be positive", arg);
1007 			break;
1008 		case ARG_FF: gMate1fw = true;  gMate2fw = true;  break;
1009 		case ARG_RF: gMate1fw = false; gMate2fw = true;  break;
1010 		case ARG_FR: gMate1fw = true;  gMate2fw = false; break;
1011 		case ARG_SHMEM: useShmem = true; break;
1012 		case ARG_SEED_SUMM: seedSumm = true; break;
1013 		case ARG_MM: {
1014 #ifdef BOWTIE_MM
1015 			useMm = true;
1016 			break;
1017 #else
1018 			cerr << "Memory-mapped I/O mode is disabled because bowtie was not compiled with" << endl
1019 				 << "BOWTIE_MM defined.  Memory-mapped I/O is not supported under Windows.  If you" << endl
1020 				 << "would like to use memory-mapped I/O on a platform that supports it, please" << endl
1021 				 << "refrain from specifying BOWTIE_MM=0 when compiling Bowtie." << endl;
1022 			throw 1;
1023 #endif
1024 		}
1025 		case ARG_MMSWEEP: mmSweep = true; break;
1026 		case ARG_HADOOPOUT: hadoopOut = true; break;
1027 		case ARG_SOLEXA_QUALS: solexaQuals = true; break;
1028 		case ARG_INTEGER_QUALS: integerQuals = true; break;
1029 		case ARG_PHRED64: phred64Quals = true; break;
1030 		case ARG_PHRED33: solexaQuals = false; phred64Quals = false; break;
1031 		case ARG_OVERHANG: gReportOverhangs = true; break;
1032 		case ARG_NO_CACHE: msNoCache = true; break;
1033 		case ARG_USE_CACHE: msNoCache = false; break;
1034 		case ARG_LOCAL_SEED_CACHE_SZ:
1035 			seedCacheLocalMB = (uint32_t)parseInt(1, "--local-seed-cache-sz arg must be at least 1", arg);
1036 			break;
1037 		case ARG_CURRENT_SEED_CACHE_SZ:
1038 			seedCacheCurrentMB = (uint32_t)parseInt(1, "--seed-cache-sz arg must be at least 1", arg);
1039 			break;
1040 		case ARG_REFIDX: noRefNames = true; break;
1041 		case ARG_FUZZY: fuzzy = true; break;
1042 		case ARG_FULLREF: fullRef = true; break;
1043 		case ARG_GAP_BAR:
1044 			gGapBarrier = parseInt(1, "--gbar must be no less than 1", arg);
1045 			break;
1046 		case ARG_SEED:
1047 			seed = parseInt(0, "--seed arg must be at least 0", arg);
1048 			break;
1049 		case ARG_NON_DETERMINISTIC:
1050 			arbitraryRandom = true;
1051 			break;
1052 		case 'u':
1053 			qUpto = (uint32_t)parseInt(1, "-u/--qupto arg must be at least 1", arg);
1054 			break;
1055 		case 'Q':
1056 			tokenize(arg, ",", qualities);
1057 			integerQuals = true;
1058 			break;
1059 		case ARG_QUALS1:
1060 			tokenize(arg, ",", qualities1);
1061 			integerQuals = true;
1062 			break;
1063 		case ARG_QUALS2:
1064 			tokenize(arg, ",", qualities2);
1065 			integerQuals = true;
1066 			break;
1067 		case ARG_CACHE_LIM:
1068 			cacheLimit = (uint32_t)parseInt(1, "--cachelim arg must be at least 1", arg);
1069 			break;
1070 		case ARG_CACHE_SZ:
1071 			cacheSize = (uint32_t)parseInt(1, "--cachesz arg must be at least 1", arg);
1072 			cacheSize *= (1024 * 1024); // convert from MB to B
1073 			break;
1074 		case ARG_WRAPPER: wrapper = arg; break;
1075 		case 'p':
1076 			nthreads = parseInt(1, "-p/--threads arg must be at least 1", arg);
1077 			break;
1078 		case ARG_FILEPAR:
1079 			fileParallel = true;
1080 			break;
1081 		case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break;
1082 		case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break;
1083 		case 'h': printUsage(cout); throw 0; break;
1084 		case ARG_USAGE: printUsage(cout); throw 0; break;
1085 		//
1086 		// NOTE that unlike in Bowtie 1, -M, -a and -k are mutually
1087 		// exclusive here.
1088 		//
1089 		case 'M': {
1090 			msample = true;
1091 			mhits = parse<uint32_t>(arg);
1092 			if(saw_a || saw_k) {
1093 				cerr << "Warning: -M, -k and -a are mutually exclusive. "
1094 					 << "-M will override" << endl;
1095 				khits = 1;
1096 			}
1097 			assert_eq(1, khits);
1098 			saw_M = true;
1099 			cerr << "Warning: -M is deprecated.  Use -D and -R to adjust " <<
1100 			        "effort instead." << endl;
1101 			break;
1102 		}
1103 		case ARG_EXTEND_ITERS: {
1104 			maxIters = parse<size_t>(arg);
1105 			break;
1106 		}
1107 		case ARG_NO_EXTEND: {
1108 			doExtend = false;
1109 			break;
1110 		}
1111 		case 'R': { polstr += ";ROUNDS="; polstr += arg; break; }
1112 		case 'D': { polstr += ";DPS=";    polstr += arg; break; }
1113 		case ARG_DP_MATE_STREAK_THRESH: {
1114 			maxMateStreak = parse<size_t>(arg);
1115 			break;
1116 		}
1117 		case ARG_DP_FAIL_STREAK_THRESH: {
1118 			maxDpStreak = parse<size_t>(arg);
1119 			break;
1120 		}
1121 		case ARG_EE_FAIL_STREAK_THRESH: {
1122 			maxEeStreak = parse<size_t>(arg);
1123 			break;
1124 		}
1125 		case ARG_UG_FAIL_STREAK_THRESH: {
1126 			maxUgStreak = parse<size_t>(arg);
1127 			break;
1128 		}
1129 		case ARG_DP_FAIL_THRESH: {
1130 			maxDp = parse<size_t>(arg);
1131 			break;
1132 		}
1133 		case ARG_UG_FAIL_THRESH: {
1134 			maxUg = parse<size_t>(arg);
1135 			break;
1136 		}
1137 		case ARG_SEED_BOOST_THRESH: {
1138 			seedBoostThresh = parse<int>(arg);
1139 			break;
1140 		}
1141 		case 'a': {
1142 			msample = false;
1143 			allHits = true;
1144 			mhits = 0; // disable -M
1145 			if(saw_M || saw_k) {
1146 				cerr << "Warning: -M, -k and -a are mutually exclusive. "
1147 					 << "-a will override" << endl;
1148 			}
1149 			saw_a = true;
1150 			break;
1151 		}
1152 		case 'k': {
1153 			msample = false;
1154 			khits = (uint32_t)parseInt(1, "-k arg must be at least 1", arg);
1155 			mhits = 0; // disable -M
1156 			if(saw_M || saw_a) {
1157 				cerr << "Warning: -M, -k and -a are mutually exclusive. "
1158 					 << "-k will override" << endl;
1159 			}
1160 			saw_k = true;
1161 			break;
1162 		}
1163 		case ARG_VERBOSE: gVerbose = 1; break;
1164 		case ARG_STARTVERBOSE: startVerbose = true; break;
1165 		case ARG_QUIET: gQuiet = true; break;
1166 		case ARG_SANITY: sanityCheck = true; break;
1167 		case 't': timing = true; break;
1168 		case ARG_METRIC_IVAL: {
1169 			metricsIval = parseInt(1, "--metrics arg must be at least 1", arg);
1170 			break;
1171 		}
1172 		case ARG_METRIC_FILE: metricsFile = arg; break;
1173 		case ARG_METRIC_STDERR: metricsStderr = true; break;
1174 		case ARG_METRIC_PER_READ: metricsPerRead = true; break;
1175 		case ARG_NO_FW: gNofw = true; break;
1176 		case ARG_NO_RC: gNorc = true; break;
1177 		case ARG_SAM_NO_QNAME_TRUNC: samTruncQname = false; break;
1178 		case ARG_SAM_OMIT_SEC_SEQ: samOmitSecSeqQual = true; break;
1179 		case ARG_SAM_NO_UNAL: samNoUnal = true; break;
1180 		case ARG_SAM_NOHEAD: samNoHead = true; break;
1181 		case ARG_SAM_NOSQ: samNoSQ = true; break;
1182 		case ARG_SAM_PRINT_YI: sam_print_yi = true; break;
1183 		case ARG_REORDER: reorder = true; break;
1184 		case ARG_MAPQ_EX: {
1185 			sam_print_zp = true;
1186 			sam_print_zu = true;
1187 			sam_print_xp = true;
1188 			sam_print_xss = true;
1189 			sam_print_yn = true;
1190 			break;
1191 		}
1192 		case ARG_SHOW_RAND_SEED: {
1193 			sam_print_zs = true;
1194 			break;
1195 		}
1196 		case ARG_SAMPLE:
1197 			sampleFrac = parse<float>(arg);
1198 			break;
1199 		case ARG_CP_MIN:
1200 			cminlen = parse<size_t>(arg);
1201 			break;
1202 		case ARG_CP_IVAL:
1203 			cpow2 = parse<size_t>(arg);
1204 			break;
1205 		case ARG_TRI:
1206 			doTri = true;
1207 			break;
1208 		case ARG_READ_PASSTHRU: {
1209 			sam_print_xr = true;
1210 			break;
1211 		}
1212 		case ARG_READ_TIMES: {
1213 			sam_print_xt = true;
1214 			sam_print_xd = true;
1215 			sam_print_xu = true;
1216 			sam_print_yl = true;
1217 			sam_print_ye = true;
1218 			sam_print_yu = true;
1219 			sam_print_yr = true;
1220 			sam_print_zb = true;
1221 			sam_print_zr = true;
1222 			sam_print_zf = true;
1223 			sam_print_zm = true;
1224 			sam_print_zi = true;
1225 			break;
1226 		}
1227 		case ARG_SAM_RG: {
1228 			string argstr = arg;
1229 			if(argstr.substr(0, 3) == "ID:") {
1230 				rgid = "\t";
1231 				rgid += argstr;
1232 				rgs_optflag = "RG:Z:" + argstr.substr(3);
1233 			} else {
1234 				rgs += '\t';
1235 				rgs += argstr;
1236 			}
1237 			break;
1238 		}
1239 		case ARG_SAM_RGID: {
1240 			string argstr = arg;
1241 			rgid = "\t";
1242 			rgid = "\tID:" + argstr;
1243 			rgs_optflag = "RG:Z:" + argstr;
1244 			break;
1245 		}
1246 		case ARG_PARTITION: partitionSz = parse<int>(arg); break;
1247 		case ARG_DPAD:
1248 			maxhalf = parseInt(0, "--dpad must be no less than 0", arg);
1249 			break;
1250 		case ARG_ORIG:
1251 			if(arg == NULL || strlen(arg) == 0) {
1252 				cerr << "--orig arg must be followed by a string" << endl;
1253 				printUsage(cerr);
1254 				throw 1;
1255 			}
1256 			origString = arg;
1257 			break;
1258 		case ARG_LOCAL: localAlign = true; break;
1259 		case ARG_END_TO_END: localAlign = false; break;
1260 		case ARG_SSE8: enable8 = true; break;
1261 		case ARG_SSE8_NO: enable8 = false; break;
1262 		case ARG_UNGAPPED: doUngapped = true; break;
1263 		case ARG_UNGAPPED_NO: doUngapped = false; break;
1264 		case ARG_NO_DOVETAIL: gDovetailMatesOK = false; break;
1265 		case ARG_NO_CONTAIN:  gContainMatesOK  = false; break;
1266 		case ARG_NO_OVERLAP:  gOlapMatesOK     = false; break;
1267 		case ARG_DOVETAIL:    gDovetailMatesOK = true;  break;
1268 		case ARG_CONTAIN:     gContainMatesOK  = true;  break;
1269 		case ARG_OVERLAP:     gOlapMatesOK     = true;  break;
1270 		case ARG_QC_FILTER: qcFilter = true; break;
1271 		case ARG_NO_SCORE_PRIORITY: sortByScore = false; break;
1272 		case ARG_IGNORE_QUALS: ignoreQuals = true; break;
1273 		case ARG_MAPQ_V: mapqv = parse<int>(arg); break;
1274 		case ARG_TIGHTEN: tighten = parse<int>(arg); break;
1275 		case ARG_EXACT_UPFRONT:    doExactUpFront = true; break;
1276 		case ARG_1MM_UPFRONT:      do1mmUpFront   = true; break;
1277 		case ARG_EXACT_UPFRONT_NO: doExactUpFront = false; break;
1278 		case ARG_1MM_UPFRONT_NO:   do1mmUpFront   = false; break;
1279 		case ARG_1MM_MINLEN:       do1mmMinLen = parse<size_t>(arg); break;
1280 		case ARG_NOISY_HPOLY: noisyHpolymer = true; break;
1281 		case 'x': bt2index = arg; break;
1282 		case ARG_PRESET_VERY_FAST_LOCAL: localAlign = true;
1283 		case ARG_PRESET_VERY_FAST: {
1284 			presetList.push_back("very-fast%LOCAL%"); break;
1285 		}
1286 		case ARG_PRESET_FAST_LOCAL: localAlign = true;
1287 		case ARG_PRESET_FAST: {
1288 			presetList.push_back("fast%LOCAL%"); break;
1289 		}
1290 		case ARG_PRESET_SENSITIVE_LOCAL: localAlign = true;
1291 		case ARG_PRESET_SENSITIVE: {
1292 			presetList.push_back("sensitive%LOCAL%"); break;
1293 		}
1294 		case ARG_PRESET_VERY_SENSITIVE_LOCAL: localAlign = true;
1295 		case ARG_PRESET_VERY_SENSITIVE: {
1296 			presetList.push_back("very-sensitive%LOCAL%"); break;
1297 		}
1298 		case 'P': { presetList.push_back(arg); break; }
1299 		case ARG_ALIGN_POLICY: {
1300 			if(strlen(arg) > 0) {
1301 				polstr += ";"; polstr += arg;
1302 			}
1303 			break;
1304 		}
1305 		case 'N': { polstr += ";SEED="; polstr += arg; break; }
1306 		case 'L': {
1307 			int64_t len = parse<size_t>(arg);
1308 			if(len < 0) {
1309 				cerr << "Error: -L argument must be >= 0; was " << arg << endl;
1310 				throw 1;
1311 			}
1312 			if(len > 32) {
1313 				cerr << "Error: -L argument must be <= 32; was" << arg << endl;
1314 				throw 1;
1315 			}
1316 			polstr += ";SEEDLEN="; polstr += arg; break;
1317 		}
1318 		case 'O':
1319 			multiseedOff = parse<size_t>(arg);
1320 			break;
1321 		case 'i': {
1322 			EList<string> args;
1323 			tokenize(arg, ",", args);
1324 			if(args.size() > 3 || args.size() == 0) {
1325 				cerr << "Error: expected 3 or fewer comma-separated "
1326 					 << "arguments to -i option, got "
1327 					 << args.size() << endl;
1328 				throw 1;
1329 			}
1330 			// Interval-settings arguments
1331 			polstr += (";IVAL=" + args[0]); // Function type
1332 			if(args.size() > 1) {
1333 				polstr += ("," + args[1]);  // Constant term
1334 			}
1335 			if(args.size() > 2) {
1336 				polstr += ("," + args[2]);  // Coefficient
1337 			}
1338 			break;
1339 		}
1340 		case ARG_MULTISEED_IVAL: {
1341 			polstr += ";";
1342 			// Split argument by comma
1343 			EList<string> args;
1344 			tokenize(arg, ",", args);
1345 			if(args.size() > 5 || args.size() == 0) {
1346 				cerr << "Error: expected 5 or fewer comma-separated "
1347 					 << "arguments to --multiseed option, got "
1348 					 << args.size() << endl;
1349 				throw 1;
1350 			}
1351 			// Seed mm and length arguments
1352 			polstr += "SEED=";
1353 			polstr += (args[0]); // # mismatches
1354 			if(args.size() >  1) polstr += ("," + args[ 1]); // length
1355 			if(args.size() >  2) polstr += (";IVAL=" + args[2]); // Func type
1356 			if(args.size() >  3) polstr += ("," + args[ 3]); // Constant term
1357 			if(args.size() >  4) polstr += ("," + args[ 4]); // Coefficient
1358 			break;
1359 		}
1360 		case ARG_N_CEIL: {
1361 			// Split argument by comma
1362 			EList<string> args;
1363 			tokenize(arg, ",", args);
1364 			if(args.size() > 3) {
1365 				cerr << "Error: expected 3 or fewer comma-separated "
1366 					 << "arguments to --n-ceil option, got "
1367 					 << args.size() << endl;
1368 				throw 1;
1369 			}
1370 			if(args.size() == 0) {
1371 				cerr << "Error: expected at least one argument to --n-ceil option" << endl;
1372 				throw 1;
1373 			}
1374 			polstr += ";NCEIL=";
1375 			if(args.size() == 3) {
1376 				polstr += (args[0] + "," + args[1] + "," + args[2]);
1377 			} else {
1378                 if(args.size() == 1) {
1379                     polstr += ("C," + args[0]);
1380                 } else {
1381 					polstr += (args[0] + "," + args[1]);
1382 				}
1383 			}
1384 			break;
1385 		}
1386 		case ARG_SCORE_MA:  polstr += ";MA=";    polstr += arg; break;
1387 		case ARG_SCORE_MMP: {
1388 			EList<string> args;
1389 			tokenize(arg, ",", args);
1390 			if(args.size() > 2 || args.size() == 0) {
1391 				cerr << "Error: expected 1 or 2 comma-separated "
1392 					 << "arguments to --mmp option, got " << args.size() << endl;
1393 				throw 1;
1394 			}
1395 			if(args.size() >= 1) {
1396 				polstr += ";MMP=Q,";
1397 				polstr += args[0];
1398 				if(args.size() >= 2) {
1399 					polstr += ",";
1400 					polstr += args[1];
1401 				}
1402 			}
1403 			break;
1404 		}
1405 		case ARG_SCORE_NP:  polstr += ";NP=C";   polstr += arg; break;
1406 		case ARG_SCORE_RDG: polstr += ";RDG=";   polstr += arg; break;
1407 		case ARG_SCORE_RFG: polstr += ";RFG=";   polstr += arg; break;
1408 		case ARG_SCORE_MIN: {
1409 			polstr += ";";
1410 			EList<string> args;
1411 			tokenize(arg, ",", args);
1412 			if(args.size() > 3 && args.size() == 0) {
1413 				cerr << "Error: expected 3 or fewer comma-separated "
1414 					 << "arguments to --n-ceil option, got "
1415 					 << args.size() << endl;
1416 				throw 1;
1417 			}
1418 			polstr += ("MIN=" + args[0]);
1419 			if(args.size() > 1) {
1420 				polstr += ("," + args[1]);
1421 			}
1422 			if(args.size() > 2) {
1423 				polstr += ("," + args[2]);
1424 			}
1425 			break;
1426 		}
1427 		case ARG_DESC: printArgDesc(cout); throw 0;
1428 		case 'S': outfile = arg; break;
1429 		case 'U': {
1430 			EList<string> args;
1431 			tokenize(arg, ",", args);
1432 			for(size_t i = 0; i < args.size(); i++) {
1433 				queries.push_back(args[i]);
1434 			}
1435 			break;
1436 		}
1437 		case ARG_VERSION: showVersion = 1; break;
1438         case ARG_NO_TEMPSPLICESITE: useTempSpliceSite = false; break;
1439         case ARG_PEN_CANSPLICE: {
1440             penCanSplice = parseInt(0, "-k arg must be at least 0", arg);
1441             break;
1442         }
1443         case ARG_PEN_NONCANSPLICE: {
1444             penNoncanSplice = parseInt(0, "-k arg must be at least 0", arg);
1445             break;
1446         }
1447         case ARG_PEN_INTRONLEN: {
1448 			polstr += ";";
1449 			EList<string> args;
1450 			tokenize(arg, ",", args);
1451 			if(args.size() > 3 && args.size() == 0) {
1452 				cerr << "Error: expected 3 or fewer comma-separated "
1453                 << "arguments to --n-ceil option, got "
1454                 << args.size() << endl;
1455 				throw 1;
1456 			}
1457 			polstr += ("INTRONLEN=" + args[0]);
1458 			if(args.size() > 1) {
1459 				polstr += ("," + args[1]);
1460 			}
1461 			if(args.size() > 2) {
1462 				polstr += ("," + args[2]);
1463 			}
1464 			break;
1465 		}
1466         case ARG_KNOWN_SPLICESITE_INFILE: knownSpliceSiteInfile = arg; break;
1467         case ARG_NOVEL_SPLICESITE_INFILE: novelSpliceSiteInfile = arg; break;
1468         case ARG_NOVEL_SPLICESITE_OUTFILE: novelSpliceSiteOutfile = arg; break;
1469         case ARG_NO_SPLICED_ALIGNMENT: no_spliced_alignment = true; break;
1470         case ARG_RNA_STRANDNESS: {
1471             string strandness = arg;
1472             if(strandness == "F")       rna_strandness = RNA_STRANDNESS_F;
1473             else if(strandness == "R")  rna_strandness = RNA_STRANDNESS_R;
1474             else if(strandness == "FR") rna_strandness = RNA_STRANDNESS_FR;
1475             else if(strandness == "RF") rna_strandness = RNA_STRANDNESS_RF;
1476             else {
1477                 // daehwan - throw exception with details
1478                 cerr << "Error: should be one of F, R, FR, or RF " << endl;
1479 				throw 1;
1480             }
1481             break;
1482         }
1483 		default:
1484 			printUsage(cerr);
1485 			throw 1;
1486 	}
1487 }
1488 
1489 /**
1490  * Read command-line arguments
1491  */
parseOptions(int argc,const char ** argv)1492 static void parseOptions(int argc, const char **argv) {
1493 	int option_index = 0;
1494 	int next_option;
1495 	saw_M = false;
1496 	saw_a = false;
1497 	saw_k = true;
1498 	presetList.clear();
1499 	if(startVerbose) { cerr << "Parsing options: "; logTime(cerr, true); }
1500 	while(true) {
1501 		next_option = getopt_long(
1502 			argc, const_cast<char**>(argv),
1503 			short_options, long_options, &option_index);
1504 		const char * arg = optarg;
1505 		if(next_option == EOF) {
1506 			if(extra_opts_cur < extra_opts.size()) {
1507 				next_option = extra_opts[extra_opts_cur].first;
1508 				arg = extra_opts[extra_opts_cur].second.c_str();
1509 				extra_opts_cur++;
1510 			} else {
1511 				break;
1512 			}
1513 		}
1514 		parseOption(next_option, arg);
1515 	}
1516 	// Now parse all the presets.  Might want to pick which presets version to
1517 	// use according to other parameters.
1518 	auto_ptr<Presets> presets(new PresetsV0());
1519 	// Apply default preset
1520 	if(!defaultPreset.empty()) {
1521 		polstr = applyPreset(defaultPreset, *presets.get()) + polstr;
1522 	}
1523 	// Apply specified presets
1524 	for(size_t i = 0; i < presetList.size(); i++) {
1525 		polstr += applyPreset(presetList[i], *presets.get());
1526 	}
1527 	for(size_t i = 0; i < extra_opts.size(); i++) {
1528 		next_option = extra_opts[extra_opts_cur].first;
1529 		const char *arg = extra_opts[extra_opts_cur].second.c_str();
1530 		parseOption(next_option, arg);
1531 	}
1532 	// Remove initial semicolons
1533 	while(!polstr.empty() && polstr[0] == ';') {
1534 		polstr = polstr.substr(1);
1535 	}
1536 	if(gVerbose) {
1537 		cerr << "Final policy string: '" << polstr.c_str() << "'" << endl;
1538 	}
1539 	size_t failStreakTmp = 0;
1540 	SeedAlignmentPolicy::parseString(
1541 		polstr,
1542 		localAlign,
1543 		noisyHpolymer,
1544 		ignoreQuals,
1545 		bonusMatchType,
1546 		bonusMatch,
1547 		penMmcType,
1548 		penMmcMax,
1549 		penMmcMin,
1550 		penNType,
1551 		penN,
1552 		penRdGapConst,
1553 		penRfGapConst,
1554 		penRdGapLinear,
1555 		penRfGapLinear,
1556 		scoreMin,
1557 		nCeil,
1558 		penNCatPair,
1559 		multiseedMms,
1560 		multiseedLen,
1561 		msIval,
1562 		failStreakTmp,
1563 		nSeedRounds,
1564         &penIntronLen);
1565 	if(failStreakTmp > 0) {
1566 		maxEeStreak = failStreakTmp;
1567 		maxUgStreak = failStreakTmp;
1568 		maxDpStreak = failStreakTmp;
1569 	}
1570 	if(saw_a || saw_k) {
1571 		msample = false;
1572 		mhits = 0;
1573 	} else {
1574 		assert_gt(mhits, 0);
1575 		msample = true;
1576 	}
1577 	if(mates1.size() != mates2.size()) {
1578 		cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << mates2.size() << endl
1579 		     << "mate files/sequences were specified with -2.  The same number of mate files/" << endl
1580 		     << "sequences must be specified with -1 and -2." << endl;
1581 		throw 1;
1582 	}
1583 	if(qualities.size() && format != FASTA) {
1584 		cerr << "Error: one or more quality files were specified with -Q but -f was not" << endl
1585 		     << "enabled.  -Q works only in combination with -f and -C." << endl;
1586 		throw 1;
1587 	}
1588 	if(qualities1.size() && format != FASTA) {
1589 		cerr << "Error: one or more quality files were specified with --Q1 but -f was not" << endl
1590 		     << "enabled.  --Q1 works only in combination with -f and -C." << endl;
1591 		throw 1;
1592 	}
1593 	if(qualities2.size() && format != FASTA) {
1594 		cerr << "Error: one or more quality files were specified with --Q2 but -f was not" << endl
1595 		     << "enabled.  --Q2 works only in combination with -f and -C." << endl;
1596 		throw 1;
1597 	}
1598 	if(qualities1.size() > 0 && mates1.size() != qualities1.size()) {
1599 		cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << qualities1.size() << endl
1600 		     << "quality files were specified with --Q1.  The same number of mate and quality" << endl
1601 		     << "files must sequences must be specified with -1 and --Q1." << endl;
1602 		throw 1;
1603 	}
1604 	if(qualities2.size() > 0 && mates2.size() != qualities2.size()) {
1605 		cerr << "Error: " << mates2.size() << " mate files/sequences were specified with -2, but " << qualities2.size() << endl
1606 		     << "quality files were specified with --Q2.  The same number of mate and quality" << endl
1607 		     << "files must sequences must be specified with -2 and --Q2." << endl;
1608 		throw 1;
1609 	}
1610 	if(!rgs.empty() && rgid.empty()) {
1611 		cerr << "Warning: --rg was specified without --rg-id also "
1612 		     << "being specified.  @RG line is not printed unless --rg-id "
1613 			 << "is specified." << endl;
1614 	}
1615 	// Check for duplicate mate input files
1616 	if(format != CMDLINE) {
1617 		for(size_t i = 0; i < mates1.size(); i++) {
1618 			for(size_t j = 0; j < mates2.size(); j++) {
1619 				if(mates1[i] == mates2[j] && !gQuiet) {
1620 					cerr << "Warning: Same mate file \"" << mates1[i].c_str() << "\" appears as argument to both -1 and -2" << endl;
1621 				}
1622 			}
1623 		}
1624 	}
1625 	// If both -s and -u are used, we need to adjust qUpto accordingly
1626 	// since it uses rdid to know if we've reached the -u limit (and
1627 	// rdids are all shifted up by skipReads characters)
1628 	if(qUpto + skipReads > qUpto) {
1629 		qUpto += skipReads;
1630 	}
1631 	if(useShmem && useMm && !gQuiet) {
1632 		cerr << "Warning: --shmem overrides --mm..." << endl;
1633 		useMm = false;
1634 	}
1635 	if(gGapBarrier < 1) {
1636 		cerr << "Warning: --gbar was set less than 1 (=" << gGapBarrier
1637 		     << "); setting to 1 instead" << endl;
1638 		gGapBarrier = 1;
1639 	}
1640 	if(multiseedMms >= multiseedLen) {
1641 		assert_gt(multiseedLen, 0);
1642 		cerr << "Warning: seed mismatches (" << multiseedMms
1643 		     << ") is less than seed length (" << multiseedLen
1644 			 << "); setting mismatches to " << (multiseedMms-1)
1645 			 << " instead" << endl;
1646 		multiseedMms = multiseedLen-1;
1647 	}
1648 	sam_print_zm = sam_print_zm && bowtie2p5;
1649 #ifndef NDEBUG
1650 	if(!gQuiet) {
1651 		cerr << "Warning: Running in debug mode.  Please use debug mode only "
1652 			 << "for diagnosing errors, and not for typical use of HISAT."
1653 			 << endl;
1654 	}
1655 #endif
1656 }
1657 
1658 static const char *argv0 = NULL;
1659 
1660 /// Create a PatternSourcePerThread for the current thread according
1661 /// to the global params and return a pointer to it
1662 static PatternSourcePerThreadFactory*
createPatsrcFactory(PairedPatternSource & _patsrc,int tid)1663 createPatsrcFactory(PairedPatternSource& _patsrc, int tid) {
1664 	PatternSourcePerThreadFactory *patsrcFact;
1665 	patsrcFact = new WrappedPatternSourcePerThreadFactory(_patsrc);
1666 	assert(patsrcFact != NULL);
1667 	return patsrcFact;
1668 }
1669 
1670 #define PTHREAD_ATTRS (PTHREAD_CREATE_JOINABLE | PTHREAD_CREATE_DETACHED)
1671 
1672 typedef TIndexOffU index_t;
1673 typedef uint16_t local_index_t;
1674 static PairedPatternSource*              multiseed_patsrc;
1675 static HierEbwt<index_t>*                multiseed_ebwtFw;
1676 static HierEbwt<index_t>*                multiseed_ebwtBw;
1677 static Scoring*                          multiseed_sc;
1678 static BitPairReference*                 multiseed_refs;
1679 static AlignmentCache<index_t>*          multiseed_ca; // seed cache
1680 static AlnSink<index_t>*                 multiseed_msink;
1681 static EList<string>                     multiseed_refnames;
1682 static OutFileBuf*                       multiseed_metricsOfb;
1683 static SpliceSiteDB*                     ssdb;
1684 static MUTEX_T                           multiseed_mutex;
1685 
1686 /**
1687  * Metrics for measuring the work done by the outer read alignment
1688  * loop.
1689  */
1690 struct OuterLoopMetrics {
1691 
OuterLoopMetricsOuterLoopMetrics1692 	OuterLoopMetrics() {
1693 	    reset();
1694 	}
1695 
1696 	/**
1697 	 * Set all counters to 0.
1698 	 */
resetOuterLoopMetrics1699 	void reset() {
1700 		reads = bases = srreads = srbases =
1701 		freads = fbases = ureads = ubases = 0;
1702 	}
1703 
1704 	/**
1705 	 * Sum the counters in m in with the conters in this object.  This
1706 	 * is the only safe way to update an OuterLoopMetrics that's shared
1707 	 * by multiple threads.
1708 	 */
mergeOuterLoopMetrics1709 	void merge(
1710 		const OuterLoopMetrics& m,
1711 		bool getLock = false)
1712 	{
1713 		ThreadSafe ts(&mutex_m, getLock);
1714 		reads += m.reads;
1715 		bases += m.bases;
1716 		srreads += m.srreads;
1717 		srbases += m.srbases;
1718 		freads += m.freads;
1719 		fbases += m.fbases;
1720 		ureads += m.ureads;
1721 		ubases += m.ubases;
1722 	}
1723 
1724 	uint64_t reads;   // total reads
1725 	uint64_t bases;   // total bases
1726 	uint64_t srreads; // same-read reads
1727 	uint64_t srbases; // same-read bases
1728 	uint64_t freads;  // filtered reads
1729 	uint64_t fbases;  // filtered bases
1730 	uint64_t ureads;  // unfiltered reads
1731 	uint64_t ubases;  // unfiltered bases
1732 	MUTEX_T mutex_m;
1733 };
1734 
1735 /**
1736  * Collection of all relevant performance metrics when aligning in
1737  * multiseed mode.
1738  */
1739 struct PerfMetrics {
1740 
PerfMetricsPerfMetrics1741 	PerfMetrics() : first(true) { reset(); }
1742 
1743 	/**
1744 	 * Set all counters to 0.
1745 	 */
resetPerfMetrics1746 	void reset() {
1747 		olm.reset();
1748 		sdm.reset();
1749 		wlm.reset();
1750 		swmSeed.reset();
1751 		swmMate.reset();
1752 		rpm.reset();
1753 		dpSse8Seed.reset();   // 8-bit SSE seed extensions
1754 		dpSse8Mate.reset();   // 8-bit SSE mate finds
1755 		dpSse16Seed.reset();  // 16-bit SSE seed extensions
1756 		dpSse16Mate.reset();  // 16-bit SSE mate finds
1757 		nbtfiltst = 0;
1758 		nbtfiltsc = 0;
1759 		nbtfiltdo = 0;
1760 
1761 		olmu.reset();
1762 		sdmu.reset();
1763 		wlmu.reset();
1764 		swmuSeed.reset();
1765 		swmuMate.reset();
1766 		rpmu.reset();
1767 		dpSse8uSeed.reset();  // 8-bit SSE seed extensions
1768 		dpSse8uMate.reset();  // 8-bit SSE mate finds
1769 		dpSse16uSeed.reset(); // 16-bit SSE seed extensions
1770 		dpSse16uMate.reset(); // 16-bit SSE mate finds
1771 		nbtfiltst_u = 0;
1772 		nbtfiltsc_u = 0;
1773 		nbtfiltdo_u = 0;
1774 
1775         him.reset();
1776 	}
1777 
1778 	/**
1779 	 * Merge a set of specific metrics into this object.
1780 	 */
mergePerfMetrics1781 	void merge(
1782 		const OuterLoopMetrics *ol,
1783 		const SeedSearchMetrics *sd,
1784 		const WalkMetrics *wl,
1785 		const SwMetrics *swSeed,
1786 		const SwMetrics *swMate,
1787 		const ReportingMetrics *rm,
1788 		const SSEMetrics *dpSse8Ex,
1789 		const SSEMetrics *dpSse8Ma,
1790 		const SSEMetrics *dpSse16Ex,
1791 		const SSEMetrics *dpSse16Ma,
1792 		uint64_t nbtfiltst_,
1793 		uint64_t nbtfiltsc_,
1794 		uint64_t nbtfiltdo_,
1795         const HIMetrics *hi,
1796 		bool getLock)
1797 	{
1798 		ThreadSafe ts(&mutex_m, getLock);
1799 		if(ol != NULL) {
1800 			olmu.merge(*ol, false);
1801 		}
1802 		if(sd != NULL) {
1803 			sdmu.merge(*sd, false);
1804 		}
1805 		if(wl != NULL) {
1806 			wlmu.merge(*wl, false);
1807 		}
1808 		if(swSeed != NULL) {
1809 			swmuSeed.merge(*swSeed, false);
1810 		}
1811 		if(swMate != NULL) {
1812 			swmuMate.merge(*swMate, false);
1813 		}
1814 		if(rm != NULL) {
1815 			rpmu.merge(*rm, false);
1816 		}
1817 		if(dpSse8Ex != NULL) {
1818 			dpSse8uSeed.merge(*dpSse8Ex, false);
1819 		}
1820 		if(dpSse8Ma != NULL) {
1821 			dpSse8uMate.merge(*dpSse8Ma, false);
1822 		}
1823 		if(dpSse16Ex != NULL) {
1824 			dpSse16uSeed.merge(*dpSse16Ex, false);
1825 		}
1826 		if(dpSse16Ma != NULL) {
1827 			dpSse16uMate.merge(*dpSse16Ma, false);
1828 		}
1829 		nbtfiltst_u += nbtfiltst_;
1830 		nbtfiltsc_u += nbtfiltsc_;
1831 		nbtfiltdo_u += nbtfiltdo_;
1832         if(hi != NULL) {
1833             him.merge(*hi, false);
1834         }
1835 	}
1836 
1837 	/**
1838 	 * Reports a matrix of results, incl. column labels, to an OutFileBuf.
1839 	 * Optionally also sends results to stderr (unbuffered).  Can optionally
1840 	 * print a per-read record with the read name at the beginning.
1841 	 */
reportIntervalPerfMetrics1842 	void reportInterval(
1843 		OutFileBuf* o,        // file to send output to
1844 		bool metricsStderr,   // additionally output to stderr?
1845 		bool total,           // true -> report total, otherwise incremental
1846 		bool sync,            //  synchronize output
1847 		const BTString *name) // non-NULL name pointer if is per-read record
1848 	{
1849 		ThreadSafe ts(&mutex_m, sync);
1850 		ostringstream stderrSs;
1851 		time_t curtime = time(0);
1852 		char buf[1024];
1853 		if(first) {
1854 			const char *str =
1855 				/*  1 */ "Time"           "\t"
1856 				/*  2 */ "Read"           "\t"
1857 				/*  3 */ "Base"           "\t"
1858 				/*  4 */ "SameRead"       "\t"
1859 				/*  5 */ "SameReadBase"   "\t"
1860 				/*  6 */ "UnfilteredRead" "\t"
1861 				/*  7 */ "UnfilteredBase" "\t"
1862 
1863 				/*  8 */ "Paired"         "\t"
1864 				/*  9 */ "Unpaired"       "\t"
1865 				/* 10 */ "AlConUni"       "\t"
1866 				/* 11 */ "AlConRep"       "\t"
1867 				/* 12 */ "AlConFail"      "\t"
1868 				/* 13 */ "AlDis"          "\t"
1869 				/* 14 */ "AlConFailUni"   "\t"
1870 				/* 15 */ "AlConFailRep"   "\t"
1871 				/* 16 */ "AlConFailFail"  "\t"
1872 				/* 17 */ "AlConRepUni"    "\t"
1873 				/* 18 */ "AlConRepRep"    "\t"
1874 				/* 19 */ "AlConRepFail"   "\t"
1875 				/* 20 */ "AlUnpUni"       "\t"
1876 				/* 21 */ "AlUnpRep"       "\t"
1877 				/* 22 */ "AlUnpFail"      "\t"
1878 
1879 				/* 23 */ "SeedSearch"     "\t"
1880 				/* 24 */ "IntraSCacheHit" "\t"
1881 				/* 25 */ "InterSCacheHit" "\t"
1882 				/* 26 */ "OutOfMemory"    "\t"
1883 				/* 27 */ "AlBWOp"         "\t"
1884 				/* 28 */ "AlBWBranch"     "\t"
1885 				/* 29 */ "ResBWOp"        "\t"
1886 				/* 30 */ "ResBWBranch"    "\t"
1887 				/* 31 */ "ResResolve"     "\t"
1888 				/* 34 */ "ResReport"      "\t"
1889 				/* 35 */ "RedundantSHit"  "\t"
1890 
1891 				/* 36 */ "BestMinEdit0"   "\t"
1892 				/* 37 */ "BestMinEdit1"   "\t"
1893 				/* 38 */ "BestMinEdit2"   "\t"
1894 
1895 				/* 39 */ "ExactAttempts"  "\t"
1896 				/* 40 */ "ExactSucc"      "\t"
1897 				/* 41 */ "ExactRanges"    "\t"
1898 				/* 42 */ "ExactRows"      "\t"
1899 				/* 43 */ "ExactOOMs"      "\t"
1900 
1901 				/* 44 */ "1mmAttempts"    "\t"
1902 				/* 45 */ "1mmSucc"        "\t"
1903 				/* 46 */ "1mmRanges"      "\t"
1904 				/* 47 */ "1mmRows"        "\t"
1905 				/* 48 */ "1mmOOMs"        "\t"
1906 
1907 				/* 49 */ "UngappedSucc"   "\t"
1908 				/* 50 */ "UngappedFail"   "\t"
1909 				/* 51 */ "UngappedNoDec"  "\t"
1910 
1911 				/* 52 */ "DPExLt10Gaps"   "\t"
1912 				/* 53 */ "DPExLt5Gaps"    "\t"
1913 				/* 54 */ "DPExLt3Gaps"    "\t"
1914 
1915 				/* 55 */ "DPMateLt10Gaps" "\t"
1916 				/* 56 */ "DPMateLt5Gaps"  "\t"
1917 				/* 57 */ "DPMateLt3Gaps"  "\t"
1918 
1919 				/* 58 */ "DP16ExDps"      "\t"
1920 				/* 59 */ "DP16ExDpSat"    "\t"
1921 				/* 60 */ "DP16ExDpFail"   "\t"
1922 				/* 61 */ "DP16ExDpSucc"   "\t"
1923 				/* 62 */ "DP16ExCol"      "\t"
1924 				/* 63 */ "DP16ExCell"     "\t"
1925 				/* 64 */ "DP16ExInner"    "\t"
1926 				/* 65 */ "DP16ExFixup"    "\t"
1927 				/* 66 */ "DP16ExGathSol"  "\t"
1928 				/* 67 */ "DP16ExBt"       "\t"
1929 				/* 68 */ "DP16ExBtFail"   "\t"
1930 				/* 69 */ "DP16ExBtSucc"   "\t"
1931 				/* 70 */ "DP16ExBtCell"   "\t"
1932 				/* 71 */ "DP16ExCoreRej"  "\t"
1933 				/* 72 */ "DP16ExNRej"     "\t"
1934 
1935 				/* 73 */ "DP8ExDps"       "\t"
1936 				/* 74 */ "DP8ExDpSat"     "\t"
1937 				/* 75 */ "DP8ExDpFail"    "\t"
1938 				/* 76 */ "DP8ExDpSucc"    "\t"
1939 				/* 77 */ "DP8ExCol"       "\t"
1940 				/* 78 */ "DP8ExCell"      "\t"
1941 				/* 79 */ "DP8ExInner"     "\t"
1942 				/* 80 */ "DP8ExFixup"     "\t"
1943 				/* 81 */ "DP8ExGathSol"   "\t"
1944 				/* 82 */ "DP8ExBt"        "\t"
1945 				/* 83 */ "DP8ExBtFail"    "\t"
1946 				/* 84 */ "DP8ExBtSucc"    "\t"
1947 				/* 85 */ "DP8ExBtCell"    "\t"
1948 				/* 86 */ "DP8ExCoreRej"   "\t"
1949 				/* 87 */ "DP8ExNRej"      "\t"
1950 
1951 				/* 88 */ "DP16MateDps"     "\t"
1952 				/* 89 */ "DP16MateDpSat"   "\t"
1953 				/* 90 */ "DP16MateDpFail"  "\t"
1954 				/* 91 */ "DP16MateDpSucc"  "\t"
1955 				/* 92 */ "DP16MateCol"     "\t"
1956 				/* 93 */ "DP16MateCell"    "\t"
1957 				/* 94 */ "DP16MateInner"   "\t"
1958 				/* 95 */ "DP16MateFixup"   "\t"
1959 				/* 96 */ "DP16MateGathSol" "\t"
1960 				/* 97 */ "DP16MateBt"      "\t"
1961 				/* 98 */ "DP16MateBtFail"  "\t"
1962 				/* 99 */ "DP16MateBtSucc"  "\t"
1963 				/* 100 */ "DP16MateBtCell"  "\t"
1964 				/* 101 */ "DP16MateCoreRej" "\t"
1965 				/* 102 */ "DP16MateNRej"    "\t"
1966 
1967 				/* 103 */ "DP8MateDps"     "\t"
1968 				/* 104 */ "DP8MateDpSat"   "\t"
1969 				/* 105 */ "DP8MateDpFail"  "\t"
1970 				/* 106 */ "DP8MateDpSucc"  "\t"
1971 				/* 107 */ "DP8MateCol"     "\t"
1972 				/* 108 */ "DP8MateCell"    "\t"
1973 				/* 109 */ "DP8MateInner"   "\t"
1974 				/* 110 */ "DP8MateFixup"   "\t"
1975 				/* 111 */ "DP8MateGathSol" "\t"
1976 				/* 112 */ "DP8MateBt"      "\t"
1977 				/* 113 */ "DP8MateBtFail"  "\t"
1978 				/* 114 */ "DP8MateBtSucc"  "\t"
1979 				/* 115 */ "DP8MateBtCell"  "\t"
1980 				/* 116 */ "DP8MateCoreRej" "\t"
1981 				/* 117 */ "DP8MateNRej"    "\t"
1982 
1983 				/* 118 */ "DPBtFiltStart"  "\t"
1984 				/* 119 */ "DPBtFiltScore"  "\t"
1985 				/* 120 */ "DpBtFiltDom"    "\t"
1986 
1987 				/* 121 */ "MemPeak"        "\t"
1988 				/* 122 */ "UncatMemPeak"   "\t" // 0
1989 				/* 123 */ "EbwtMemPeak"    "\t" // EBWT_CAT
1990 				/* 124 */ "CacheMemPeak"   "\t" // CA_CAT
1991 				/* 125 */ "ResolveMemPeak" "\t" // GW_CAT
1992 				/* 126 */ "AlignMemPeak"   "\t" // AL_CAT
1993 				/* 127 */ "DPMemPeak"      "\t" // DP_CAT
1994 				/* 128 */ "MiscMemPeak"    "\t" // MISC_CAT
1995 				/* 129 */ "DebugMemPeak"   "\t" // DEBUG_CAT
1996 
1997                 /* 130 */ "LocalSearch"         "\t"
1998                 /* 131 */ "AnchorSearch"        "\t"
1999                 /* 132 */ "LocalIndexSearch"    "\t"
2000                 /* 133 */ "LocalExtSearch"      "\t"
2001                 /* 134 */ "LocalSearchRecur"    "\t"
2002                 /* 135 */ "GlobalGenomeCoords"  "\t"
2003                 /* 136 */ "LocalGenomeCoords"   "\t"
2004 
2005 
2006 				"\n";
2007 
2008 			if(name != NULL) {
2009 				if(o != NULL) o->writeChars("Name\t");
2010 				if(metricsStderr) stderrSs << "Name\t";
2011 			}
2012 
2013 			if(o != NULL) o->writeChars(str);
2014 			if(metricsStderr) stderrSs << str;
2015 			first = false;
2016 		}
2017 
2018 		if(total) mergeIncrementals();
2019 
2020 		// 0. Read name, if needed
2021 		if(name != NULL) {
2022 			if(o != NULL) {
2023 				o->writeChars(name->toZBuf());
2024 				o->write('\t');
2025 			}
2026 			if(metricsStderr) {
2027 				stderrSs << (*name) << '\t';
2028 			}
2029 		}
2030 
2031 		// 1. Current time in secs
2032 		itoa10<time_t>(curtime, buf);
2033 		if(metricsStderr) stderrSs << buf << '\t';
2034 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2035 
2036 		const OuterLoopMetrics& ol = total ? olm : olmu;
2037 
2038 		// 2. Reads
2039 		itoa10<uint64_t>(ol.reads, buf);
2040 		if(metricsStderr) stderrSs << buf << '\t';
2041 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2042 		// 3. Bases
2043 		itoa10<uint64_t>(ol.bases, buf);
2044 		if(metricsStderr) stderrSs << buf << '\t';
2045 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2046 		// 4. Same-read reads
2047 		itoa10<uint64_t>(ol.srreads, buf);
2048 		if(metricsStderr) stderrSs << buf << '\t';
2049 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2050 		// 5. Same-read bases
2051 		itoa10<uint64_t>(ol.srbases, buf);
2052 		if(metricsStderr) stderrSs << buf << '\t';
2053 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2054 		// 6. Unfiltered reads
2055 		itoa10<uint64_t>(ol.ureads, buf);
2056 		if(metricsStderr) stderrSs << buf << '\t';
2057 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2058 		// 7. Unfiltered bases
2059 		itoa10<uint64_t>(ol.ubases, buf);
2060 		if(metricsStderr) stderrSs << buf << '\t';
2061 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2062 
2063 		const ReportingMetrics& rp = total ? rpm : rpmu;
2064 
2065 		// 8. Paired reads
2066 		itoa10<uint64_t>(rp.npaired, buf);
2067 		if(metricsStderr) stderrSs << buf << '\t';
2068 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2069 		// 9. Unpaired reads
2070 		itoa10<uint64_t>(rp.nunpaired, buf);
2071 		if(metricsStderr) stderrSs << buf << '\t';
2072 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2073 		// 10. Pairs with unique concordant alignments
2074 		itoa10<uint64_t>(rp.nconcord_uni, buf);
2075 		if(metricsStderr) stderrSs << buf << '\t';
2076 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2077 		// 11. Pairs with repetitive concordant alignments
2078 		itoa10<uint64_t>(rp.nconcord_rep, buf);
2079 		if(metricsStderr) stderrSs << buf << '\t';
2080 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2081 		// 12. Pairs with 0 concordant alignments
2082 		itoa10<uint64_t>(rp.nconcord_0, buf);
2083 		if(metricsStderr) stderrSs << buf << '\t';
2084 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2085 		// 13. Pairs with 1 discordant alignment
2086 		itoa10<uint64_t>(rp.ndiscord, buf);
2087 		if(metricsStderr) stderrSs << buf << '\t';
2088 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2089 		// 14. Mates from unaligned pairs that align uniquely
2090 		itoa10<uint64_t>(rp.nunp_0_uni, buf);
2091 		if(metricsStderr) stderrSs << buf << '\t';
2092 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2093 		// 15. Mates from unaligned pairs that align repetitively
2094 		itoa10<uint64_t>(rp.nunp_0_rep, buf);
2095 		if(metricsStderr) stderrSs << buf << '\t';
2096 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2097 		// 16. Mates from unaligned pairs that fail to align
2098 		itoa10<uint64_t>(rp.nunp_0_0, buf);
2099 		if(metricsStderr) stderrSs << buf << '\t';
2100 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2101 		// 17. Mates from repetitive pairs that align uniquely
2102 		itoa10<uint64_t>(rp.nunp_rep_uni, buf);
2103 		if(metricsStderr) stderrSs << buf << '\t';
2104 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2105 		// 18. Mates from repetitive pairs that align repetitively
2106 		itoa10<uint64_t>(rp.nunp_rep_rep, buf);
2107 		if(metricsStderr) stderrSs << buf << '\t';
2108 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2109 		// 19. Mates from repetitive pairs that fail to align
2110 		itoa10<uint64_t>(rp.nunp_rep_0, buf);
2111 		if(metricsStderr) stderrSs << buf << '\t';
2112 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2113 		// 20. Unpaired reads that align uniquely
2114 		itoa10<uint64_t>(rp.nunp_uni, buf);
2115 		if(metricsStderr) stderrSs << buf << '\t';
2116 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2117 		// 21. Unpaired reads that align repetitively
2118 		itoa10<uint64_t>(rp.nunp_rep, buf);
2119 		if(metricsStderr) stderrSs << buf << '\t';
2120 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2121 		// 22. Unpaired reads that fail to align
2122 		itoa10<uint64_t>(rp.nunp_0, buf);
2123 		if(metricsStderr) stderrSs << buf << '\t';
2124 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2125 
2126 		const SeedSearchMetrics& sd = total ? sdm : sdmu;
2127 
2128 		// 23. Seed searches
2129 		itoa10<uint64_t>(sd.seedsearch, buf);
2130 		if(metricsStderr) stderrSs << buf << '\t';
2131 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2132 		// 24. Hits in 'current' cache
2133 		itoa10<uint64_t>(sd.intrahit, buf);
2134 		if(metricsStderr) stderrSs << buf << '\t';
2135 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2136 		// 25. Hits in 'local' cache
2137 		itoa10<uint64_t>(sd.interhit, buf);
2138 		if(metricsStderr) stderrSs << buf << '\t';
2139 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2140 		// 26. Out of memory
2141 		itoa10<uint64_t>(sd.ooms, buf);
2142 		if(metricsStderr) stderrSs << buf << '\t';
2143 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2144 		// 27. Burrows-Wheeler ops in aligner
2145 		itoa10<uint64_t>(sd.bwops, buf);
2146 		if(metricsStderr) stderrSs << buf << '\t';
2147 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2148 		// 28. Burrows-Wheeler branches (edits) in aligner
2149 		itoa10<uint64_t>(sd.bweds, buf);
2150 		if(metricsStderr) stderrSs << buf << '\t';
2151 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2152 
2153 		const WalkMetrics& wl = total ? wlm : wlmu;
2154 
2155 		// 29. Burrows-Wheeler ops in resolver
2156 		itoa10<uint64_t>(wl.bwops, buf);
2157 		if(metricsStderr) stderrSs << buf << '\t';
2158 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2159 		// 30. Burrows-Wheeler branches in resolver
2160 		itoa10<uint64_t>(wl.branches, buf);
2161 		if(metricsStderr) stderrSs << buf << '\t';
2162 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2163 		// 31. Burrows-Wheeler offset resolutions
2164 		itoa10<uint64_t>(wl.resolves, buf);
2165 		if(metricsStderr) stderrSs << buf << '\t';
2166 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2167 		// 34. Offset reports
2168 		itoa10<uint64_t>(wl.reports, buf);
2169 		if(metricsStderr) stderrSs << buf << '\t';
2170 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2171 
2172 		// 35. Redundant seed hit
2173 		itoa10<uint64_t>(total ? swmSeed.rshit : swmuSeed.rshit, buf);
2174 		if(metricsStderr) stderrSs << buf << '\t';
2175 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2176 
2177 		// 36. # times the best (out of fw/rc) minimum # edits was 0
2178 		itoa10<uint64_t>(total ? sdm.bestmin0 : sdmu.bestmin0, buf);
2179 		if(metricsStderr) stderrSs << buf << '\t';
2180 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2181 		// 37. # times the best (out of fw/rc) minimum # edits was 1
2182 		itoa10<uint64_t>(total ? sdm.bestmin1 : sdmu.bestmin1, buf);
2183 		if(metricsStderr) stderrSs << buf << '\t';
2184 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2185 		// 38. # times the best (out of fw/rc) minimum # edits was 2
2186 		itoa10<uint64_t>(total ? sdm.bestmin2 : sdmu.bestmin2, buf);
2187 		if(metricsStderr) stderrSs << buf << '\t';
2188 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2189 
2190 		// 39. Exact aligner attempts
2191 		itoa10<uint64_t>(total ? swmSeed.exatts : swmuSeed.exatts, buf);
2192 		if(metricsStderr) stderrSs << buf << '\t';
2193 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2194 		// 40. Exact aligner successes
2195 		itoa10<uint64_t>(total ? swmSeed.exsucc : swmuSeed.exsucc, buf);
2196 		if(metricsStderr) stderrSs << buf << '\t';
2197 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2198 		// 41. Exact aligner ranges
2199 		itoa10<uint64_t>(total ? swmSeed.exranges : swmuSeed.exranges, buf);
2200 		if(metricsStderr) stderrSs << buf << '\t';
2201 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2202 		// 42. Exact aligner rows
2203 		itoa10<uint64_t>(total ? swmSeed.exrows : swmuSeed.exrows, buf);
2204 		if(metricsStderr) stderrSs << buf << '\t';
2205 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2206 		// 43. Exact aligner OOMs
2207 		itoa10<uint64_t>(total ? swmSeed.exooms : swmuSeed.exooms, buf);
2208 		if(metricsStderr) stderrSs << buf << '\t';
2209 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2210 
2211 		// 44. 1mm aligner attempts
2212 		itoa10<uint64_t>(total ? swmSeed.mm1atts : swmuSeed.mm1atts, buf);
2213 		if(metricsStderr) stderrSs << buf << '\t';
2214 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2215 		// 45. 1mm aligner successes
2216 		itoa10<uint64_t>(total ? swmSeed.mm1succ : swmuSeed.mm1succ, buf);
2217 		if(metricsStderr) stderrSs << buf << '\t';
2218 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2219 		// 46. 1mm aligner ranges
2220 		itoa10<uint64_t>(total ? swmSeed.mm1ranges : swmuSeed.mm1ranges, buf);
2221 		if(metricsStderr) stderrSs << buf << '\t';
2222 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2223 		// 47. 1mm aligner rows
2224 		itoa10<uint64_t>(total ? swmSeed.mm1rows : swmuSeed.mm1rows, buf);
2225 		if(metricsStderr) stderrSs << buf << '\t';
2226 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2227 		// 48. 1mm aligner OOMs
2228 		itoa10<uint64_t>(total ? swmSeed.mm1ooms : swmuSeed.mm1ooms, buf);
2229 		if(metricsStderr) stderrSs << buf << '\t';
2230 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2231 
2232 		// 49 Ungapped aligner success
2233 		itoa10<uint64_t>(total ? swmSeed.ungapsucc : swmuSeed.ungapsucc, buf);
2234 		if(metricsStderr) stderrSs << buf << '\t';
2235 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2236 		// 50. Ungapped aligner fail
2237 		itoa10<uint64_t>(total ? swmSeed.ungapfail : swmuSeed.ungapfail, buf);
2238 		if(metricsStderr) stderrSs << buf << '\t';
2239 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2240 		// 51. Ungapped aligner no decision
2241 		itoa10<uint64_t>(total ? swmSeed.ungapnodec : swmuSeed.ungapnodec, buf);
2242 		if(metricsStderr) stderrSs << buf << '\t';
2243 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2244 
2245 		// 52. # seed-extend DPs with < 10 gaps
2246 		itoa10<uint64_t>(total ? swmSeed.sws10 : swmuSeed.sws10, buf);
2247 		if(metricsStderr) stderrSs << buf << '\t';
2248 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2249 		// 53. # seed-extend DPs with < 5 gaps
2250 		itoa10<uint64_t>(total ? swmSeed.sws5 : swmuSeed.sws5, buf);
2251 		if(metricsStderr) stderrSs << buf << '\t';
2252 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2253 		// 54. # seed-extend DPs with < 3 gaps
2254 		itoa10<uint64_t>(total ? swmSeed.sws3 : swmuSeed.sws3, buf);
2255 		if(metricsStderr) stderrSs << buf << '\t';
2256 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2257 
2258 		// 55. # seed-extend DPs with < 10 gaps
2259 		itoa10<uint64_t>(total ? swmMate.sws10 : swmuMate.sws10, buf);
2260 		if(metricsStderr) stderrSs << buf << '\t';
2261 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2262 		// 56. # seed-extend DPs with < 5 gaps
2263 		itoa10<uint64_t>(total ? swmMate.sws5 : swmuMate.sws5, buf);
2264 		if(metricsStderr) stderrSs << buf << '\t';
2265 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2266 		// 57. # seed-extend DPs with < 3 gaps
2267 		itoa10<uint64_t>(total ? swmMate.sws3 : swmuMate.sws3, buf);
2268 		if(metricsStderr) stderrSs << buf << '\t';
2269 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2270 
2271 		const SSEMetrics& dpSse16s = total ? dpSse16Seed : dpSse16uSeed;
2272 
2273 		// 58. 16-bit SSE seed-extend DPs tried
2274 		itoa10<uint64_t>(dpSse16s.dp, buf);
2275 		if(metricsStderr) stderrSs << buf << '\t';
2276 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2277 		// 59. 16-bit SSE seed-extend DPs saturated
2278 		itoa10<uint64_t>(dpSse16s.dpsat, buf);
2279 		if(metricsStderr) stderrSs << buf << '\t';
2280 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2281 		// 60. 16-bit SSE seed-extend DPs failed
2282 		itoa10<uint64_t>(dpSse16s.dpfail, buf);
2283 		if(metricsStderr) stderrSs << buf << '\t';
2284 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2285 		// 61. 16-bit SSE seed-extend DPs succeeded
2286 		itoa10<uint64_t>(dpSse16s.dpsucc, buf);
2287 		if(metricsStderr) stderrSs << buf << '\t';
2288 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2289 		// 62. 16-bit SSE seed-extend DP columns completed
2290 		itoa10<uint64_t>(dpSse16s.col, buf);
2291 		if(metricsStderr) stderrSs << buf << '\t';
2292 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2293 		// 63. 16-bit SSE seed-extend DP cells completed
2294 		itoa10<uint64_t>(dpSse16s.cell, buf);
2295 		if(metricsStderr) stderrSs << buf << '\t';
2296 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2297 		// 64. 16-bit SSE seed-extend DP inner loop iters completed
2298 		itoa10<uint64_t>(dpSse16s.inner, buf);
2299 		if(metricsStderr) stderrSs << buf << '\t';
2300 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2301 		// 65. 16-bit SSE seed-extend DP fixup loop iters completed
2302 		itoa10<uint64_t>(dpSse16s.fixup, buf);
2303 		if(metricsStderr) stderrSs << buf << '\t';
2304 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2305 		// 66. 16-bit SSE seed-extend DP gather, cells with potential solutions
2306 		itoa10<uint64_t>(dpSse16s.gathsol, buf);
2307 		if(metricsStderr) stderrSs << buf << '\t';
2308 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2309 		// 67. 16-bit SSE seed-extend DP backtrace attempts
2310 		itoa10<uint64_t>(dpSse16s.bt, buf);
2311 		if(metricsStderr) stderrSs << buf << '\t';
2312 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2313 		// 68. 16-bit SSE seed-extend DP failed backtrace attempts
2314 		itoa10<uint64_t>(dpSse16s.btfail, buf);
2315 		if(metricsStderr) stderrSs << buf << '\t';
2316 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2317 		// 69. 16-bit SSE seed-extend DP succesful backtrace attempts
2318 		itoa10<uint64_t>(dpSse16s.btsucc, buf);
2319 		if(metricsStderr) stderrSs << buf << '\t';
2320 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2321 		// 70. 16-bit SSE seed-extend DP backtrace cells
2322 		itoa10<uint64_t>(dpSse16s.btcell, buf);
2323 		if(metricsStderr) stderrSs << buf << '\t';
2324 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2325 		// 71. 16-bit SSE seed-extend DP core-diag rejections
2326 		itoa10<uint64_t>(dpSse16s.corerej, buf);
2327 		if(metricsStderr) stderrSs << buf << '\t';
2328 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2329 		// 72. 16-bit SSE seed-extend DP N rejections
2330 		itoa10<uint64_t>(dpSse16s.nrej, buf);
2331 		if(metricsStderr) stderrSs << buf << '\t';
2332 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2333 
2334 		const SSEMetrics& dpSse8s = total ? dpSse8Seed : dpSse8uSeed;
2335 
2336 		// 73. 8-bit SSE seed-extend DPs tried
2337 		itoa10<uint64_t>(dpSse8s.dp, buf);
2338 		if(metricsStderr) stderrSs << buf << '\t';
2339 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2340 		// 74. 8-bit SSE seed-extend DPs saturated
2341 		itoa10<uint64_t>(dpSse8s.dpsat, buf);
2342 		if(metricsStderr) stderrSs << buf << '\t';
2343 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2344 		// 75. 8-bit SSE seed-extend DPs failed
2345 		itoa10<uint64_t>(dpSse8s.dpfail, buf);
2346 		if(metricsStderr) stderrSs << buf << '\t';
2347 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2348 		// 76. 8-bit SSE seed-extend DPs succeeded
2349 		itoa10<uint64_t>(dpSse8s.dpsucc, buf);
2350 		if(metricsStderr) stderrSs << buf << '\t';
2351 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2352 		// 77. 8-bit SSE seed-extend DP columns completed
2353 		itoa10<uint64_t>(dpSse8s.col, buf);
2354 		if(metricsStderr) stderrSs << buf << '\t';
2355 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2356 		// 78. 8-bit SSE seed-extend DP cells completed
2357 		itoa10<uint64_t>(dpSse8s.cell, buf);
2358 		if(metricsStderr) stderrSs << buf << '\t';
2359 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2360 		// 79. 8-bit SSE seed-extend DP inner loop iters completed
2361 		itoa10<uint64_t>(dpSse8s.inner, buf);
2362 		if(metricsStderr) stderrSs << buf << '\t';
2363 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2364 		// 80. 8-bit SSE seed-extend DP fixup loop iters completed
2365 		itoa10<uint64_t>(dpSse8s.fixup, buf);
2366 		if(metricsStderr) stderrSs << buf << '\t';
2367 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2368 		// 81. 16-bit SSE seed-extend DP gather, cells with potential solutions
2369 		itoa10<uint64_t>(dpSse8s.gathsol, buf);
2370 		if(metricsStderr) stderrSs << buf << '\t';
2371 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2372 		// 82. 16-bit SSE seed-extend DP backtrace attempts
2373 		itoa10<uint64_t>(dpSse8s.bt, buf);
2374 		if(metricsStderr) stderrSs << buf << '\t';
2375 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2376 		// 83. 16-bit SSE seed-extend DP failed backtrace attempts
2377 		itoa10<uint64_t>(dpSse8s.btfail, buf);
2378 		if(metricsStderr) stderrSs << buf << '\t';
2379 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2380 		// 84. 16-bit SSE seed-extend DP succesful backtrace attempts
2381 		itoa10<uint64_t>(dpSse8s.btsucc, buf);
2382 		if(metricsStderr) stderrSs << buf << '\t';
2383 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2384 		// 85. 16-bit SSE seed-extend DP backtrace cells
2385 		itoa10<uint64_t>(dpSse8s.btcell, buf);
2386 		if(metricsStderr) stderrSs << buf << '\t';
2387 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2388 		// 86. 16-bit SSE seed-extend DP core-diag rejections
2389 		itoa10<uint64_t>(dpSse8s.corerej, buf);
2390 		if(metricsStderr) stderrSs << buf << '\t';
2391 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2392 		// 87. 16-bit SSE seed-extend DP N rejections
2393 		itoa10<uint64_t>(dpSse8s.nrej, buf);
2394 		if(metricsStderr) stderrSs << buf << '\t';
2395 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2396 
2397 		const SSEMetrics& dpSse16m = total ? dpSse16Mate : dpSse16uMate;
2398 
2399 		// 88. 16-bit SSE mate-finding DPs tried
2400 		itoa10<uint64_t>(dpSse16m.dp, buf);
2401 		if(metricsStderr) stderrSs << buf << '\t';
2402 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2403 		// 89. 16-bit SSE mate-finding DPs saturated
2404 		itoa10<uint64_t>(dpSse16m.dpsat, buf);
2405 		if(metricsStderr) stderrSs << buf << '\t';
2406 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2407 		// 90. 16-bit SSE mate-finding DPs failed
2408 		itoa10<uint64_t>(dpSse16m.dpfail, buf);
2409 		if(metricsStderr) stderrSs << buf << '\t';
2410 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2411 		// 91. 16-bit SSE mate-finding DPs succeeded
2412 		itoa10<uint64_t>(dpSse16m.dpsucc, buf);
2413 		if(metricsStderr) stderrSs << buf << '\t';
2414 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2415 		// 92. 16-bit SSE mate-finding DP columns completed
2416 		itoa10<uint64_t>(dpSse16m.col, buf);
2417 		if(metricsStderr) stderrSs << buf << '\t';
2418 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2419 		// 93. 16-bit SSE mate-finding DP cells completed
2420 		itoa10<uint64_t>(dpSse16m.cell, buf);
2421 		if(metricsStderr) stderrSs << buf << '\t';
2422 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2423 		// 94. 16-bit SSE mate-finding DP inner loop iters completed
2424 		itoa10<uint64_t>(dpSse16m.inner, buf);
2425 		if(metricsStderr) stderrSs << buf << '\t';
2426 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2427 		// 95. 16-bit SSE mate-finding DP fixup loop iters completed
2428 		itoa10<uint64_t>(dpSse16m.fixup, buf);
2429 		if(metricsStderr) stderrSs << buf << '\t';
2430 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2431 		// 96. 16-bit SSE mate-finding DP gather, cells with potential solutions
2432 		itoa10<uint64_t>(dpSse16m.gathsol, buf);
2433 		if(metricsStderr) stderrSs << buf << '\t';
2434 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2435 		// 97. 16-bit SSE mate-finding DP backtrace attempts
2436 		itoa10<uint64_t>(dpSse16m.bt, buf);
2437 		if(metricsStderr) stderrSs << buf << '\t';
2438 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2439 		// 98. 16-bit SSE mate-finding DP failed backtrace attempts
2440 		itoa10<uint64_t>(dpSse16m.btfail, buf);
2441 		if(metricsStderr) stderrSs << buf << '\t';
2442 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2443 		// 99. 16-bit SSE mate-finding DP succesful backtrace attempts
2444 		itoa10<uint64_t>(dpSse16m.btsucc, buf);
2445 		if(metricsStderr) stderrSs << buf << '\t';
2446 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2447 		// 100. 16-bit SSE mate-finding DP backtrace cells
2448 		itoa10<uint64_t>(dpSse16m.btcell, buf);
2449 		if(metricsStderr) stderrSs << buf << '\t';
2450 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2451 		// 101. 16-bit SSE mate-finding DP core-diag rejections
2452 		itoa10<uint64_t>(dpSse16m.corerej, buf);
2453 		if(metricsStderr) stderrSs << buf << '\t';
2454 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2455 		// 102. 16-bit SSE mate-finding DP N rejections
2456 		itoa10<uint64_t>(dpSse16m.nrej, buf);
2457 		if(metricsStderr) stderrSs << buf << '\t';
2458 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2459 
2460 		const SSEMetrics& dpSse8m = total ? dpSse8Mate : dpSse8uMate;
2461 
2462 		// 103. 8-bit SSE mate-finding DPs tried
2463 		itoa10<uint64_t>(dpSse8m.dp, buf);
2464 		if(metricsStderr) stderrSs << buf << '\t';
2465 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2466 		// 104. 8-bit SSE mate-finding DPs saturated
2467 		itoa10<uint64_t>(dpSse8m.dpsat, buf);
2468 		if(metricsStderr) stderrSs << buf << '\t';
2469 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2470 		// 105. 8-bit SSE mate-finding DPs failed
2471 		itoa10<uint64_t>(dpSse8m.dpfail, buf);
2472 		if(metricsStderr) stderrSs << buf << '\t';
2473 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2474 		// 106. 8-bit SSE mate-finding DPs succeeded
2475 		itoa10<uint64_t>(dpSse8m.dpsucc, buf);
2476 		if(metricsStderr) stderrSs << buf << '\t';
2477 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2478 		// 107. 8-bit SSE mate-finding DP columns completed
2479 		itoa10<uint64_t>(dpSse8m.col, buf);
2480 		if(metricsStderr) stderrSs << buf << '\t';
2481 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2482 		// 108. 8-bit SSE mate-finding DP cells completed
2483 		itoa10<uint64_t>(dpSse8m.cell, buf);
2484 		if(metricsStderr) stderrSs << buf << '\t';
2485 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2486 		// 109. 8-bit SSE mate-finding DP inner loop iters completed
2487 		itoa10<uint64_t>(dpSse8m.inner, buf);
2488 		if(metricsStderr) stderrSs << buf << '\t';
2489 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2490 		// 110. 8-bit SSE mate-finding DP fixup loop iters completed
2491 		itoa10<uint64_t>(dpSse8m.fixup, buf);
2492 		if(metricsStderr) stderrSs << buf << '\t';
2493 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2494 		// 111. 16-bit SSE mate-finding DP gather, cells with potential solutions
2495 		itoa10<uint64_t>(dpSse8m.gathsol, buf);
2496 		if(metricsStderr) stderrSs << buf << '\t';
2497 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2498 		// 112. 16-bit SSE mate-finding DP backtrace attempts
2499 		itoa10<uint64_t>(dpSse8m.bt, buf);
2500 		if(metricsStderr) stderrSs << buf << '\t';
2501 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2502 		// 113. 16-bit SSE mate-finding DP failed backtrace attempts
2503 		itoa10<uint64_t>(dpSse8m.btfail, buf);
2504 		if(metricsStderr) stderrSs << buf << '\t';
2505 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2506 		// 114. 16-bit SSE mate-finding DP succesful backtrace attempts
2507 		itoa10<uint64_t>(dpSse8m.btsucc, buf);
2508 		if(metricsStderr) stderrSs << buf << '\t';
2509 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2510 		// 115. 16-bit SSE mate-finding DP backtrace cells
2511 		itoa10<uint64_t>(dpSse8m.btcell, buf);
2512 		if(metricsStderr) stderrSs << buf << '\t';
2513 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2514 		// 116. 16-bit SSE mate-finding DP core rejections
2515 		itoa10<uint64_t>(dpSse8m.corerej, buf);
2516 		if(metricsStderr) stderrSs << buf << '\t';
2517 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2518 		// 117. 16-bit SSE mate-finding N rejections
2519 		itoa10<uint64_t>(dpSse8m.nrej, buf);
2520 		if(metricsStderr) stderrSs << buf << '\t';
2521 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2522 
2523 		// 118. Backtrace candidates filtered due to starting cell
2524 		itoa10<uint64_t>(total ? nbtfiltst : nbtfiltst_u, buf);
2525 		if(metricsStderr) stderrSs << buf << '\t';
2526 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2527 		// 119. Backtrace candidates filtered due to low score
2528 		itoa10<uint64_t>(total ? nbtfiltsc : nbtfiltsc_u, buf);
2529 		if(metricsStderr) stderrSs << buf << '\t';
2530 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2531 		// 120. Backtrace candidates filtered due to domination
2532 		itoa10<uint64_t>(total ? nbtfiltdo : nbtfiltdo_u, buf);
2533 		if(metricsStderr) stderrSs << buf << '\t';
2534 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2535 
2536 		// 121. Overall memory peak
2537 		itoa10<size_t>(gMemTally.peak() >> 20, buf);
2538 		if(metricsStderr) stderrSs << buf << '\t';
2539 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2540 		// 122. Uncategorized memory peak
2541 		itoa10<size_t>(gMemTally.peak(0) >> 20, buf);
2542 		if(metricsStderr) stderrSs << buf << '\t';
2543 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2544 		// 123. Ebwt memory peak
2545 		itoa10<size_t>(gMemTally.peak(EBWT_CAT) >> 20, buf);
2546 		if(metricsStderr) stderrSs << buf << '\t';
2547 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2548 		// 124. Cache memory peak
2549 		itoa10<size_t>(gMemTally.peak(CA_CAT) >> 20, buf);
2550 		if(metricsStderr) stderrSs << buf << '\t';
2551 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2552 		// 125. Resolver memory peak
2553 		itoa10<size_t>(gMemTally.peak(GW_CAT) >> 20, buf);
2554 		if(metricsStderr) stderrSs << buf << '\t';
2555 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2556 		// 126. Seed aligner memory peak
2557 		itoa10<size_t>(gMemTally.peak(AL_CAT) >> 20, buf);
2558 		if(metricsStderr) stderrSs << buf << '\t';
2559 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2560 		// 127. Dynamic programming aligner memory peak
2561 		itoa10<size_t>(gMemTally.peak(DP_CAT) >> 20, buf);
2562 		if(metricsStderr) stderrSs << buf << '\t';
2563 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2564 		// 128. Miscellaneous memory peak
2565 		itoa10<size_t>(gMemTally.peak(MISC_CAT) >> 20, buf);
2566 		if(metricsStderr) stderrSs << buf << '\t';
2567 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2568 		// 129. Debug memory peak
2569 		itoa10<size_t>(gMemTally.peak(DEBUG_CAT) >> 20, buf);
2570         if(metricsStderr) stderrSs << buf << '\t';
2571 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2572 
2573         // 130
2574         itoa10<size_t>(him.localatts, buf);
2575 		if(metricsStderr) stderrSs << buf << '\t';
2576 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2577         // 131
2578         itoa10<size_t>(him.anchoratts, buf);
2579 		if(metricsStderr) stderrSs << buf << '\t';
2580 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2581         // 132
2582         itoa10<size_t>(him.localindexatts, buf);
2583 		if(metricsStderr) stderrSs << buf << '\t';
2584 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2585         // 133
2586         itoa10<size_t>(him.localextatts, buf);
2587         if(metricsStderr) stderrSs << buf << '\t';
2588 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2589         // 134
2590         itoa10<size_t>(him.localsearchrecur, buf);
2591         if(metricsStderr) stderrSs << buf << '\t';
2592 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2593         // 135
2594         itoa10<size_t>(him.globalgenomecoords, buf);
2595         if(metricsStderr) stderrSs << buf << '\t';
2596 		if(o != NULL) { o->writeChars(buf); o->write('\t'); }
2597         // 136
2598         itoa10<size_t>(him.localgenomecoords, buf);
2599         if(metricsStderr) stderrSs << buf;
2600 		if(o != NULL) { o->writeChars(buf); }
2601 
2602 		if(o != NULL) { o->write('\n'); }
2603 		if(metricsStderr) cerr << stderrSs.str().c_str() << endl;
2604 		if(!total) mergeIncrementals();
2605 	}
2606 
mergeIncrementalsPerfMetrics2607 	void mergeIncrementals() {
2608 		olm.merge(olmu, false);
2609 		sdm.merge(sdmu, false);
2610 		wlm.merge(wlmu, false);
2611 		swmSeed.merge(swmuSeed, false);
2612 		swmMate.merge(swmuMate, false);
2613 		dpSse8Seed.merge(dpSse8uSeed, false);
2614 		dpSse8Mate.merge(dpSse8uMate, false);
2615 		dpSse16Seed.merge(dpSse16uSeed, false);
2616 		dpSse16Mate.merge(dpSse16uMate, false);
2617 		nbtfiltst_u += nbtfiltst;
2618 		nbtfiltsc_u += nbtfiltsc;
2619 		nbtfiltdo_u += nbtfiltdo;
2620 
2621 		olmu.reset();
2622 		sdmu.reset();
2623 		wlmu.reset();
2624 		swmuSeed.reset();
2625 		swmuMate.reset();
2626 		rpmu.reset();
2627 		dpSse8uSeed.reset();
2628 		dpSse8uMate.reset();
2629 		dpSse16uSeed.reset();
2630 		dpSse16uMate.reset();
2631 		nbtfiltst_u = 0;
2632 		nbtfiltsc_u = 0;
2633 		nbtfiltdo_u = 0;
2634 	}
2635 
2636 	// Total over the whole job
2637 	OuterLoopMetrics  olm;   // overall metrics
2638 	SeedSearchMetrics sdm;   // metrics related to seed alignment
2639 	WalkMetrics       wlm;   // metrics related to walking left (i.e. resolving reference offsets)
2640 	SwMetrics         swmSeed;  // metrics related to DP seed-extend alignment
2641 	SwMetrics         swmMate;  // metrics related to DP mate-finding alignment
2642 	ReportingMetrics  rpm;   // metrics related to reporting
2643 	SSEMetrics        dpSse8Seed;  // 8-bit SSE seed extensions
2644 	SSEMetrics        dpSse8Mate;    // 8-bit SSE mate finds
2645 	SSEMetrics        dpSse16Seed; // 16-bit SSE seed extensions
2646 	SSEMetrics        dpSse16Mate;   // 16-bit SSE mate finds
2647 	uint64_t          nbtfiltst;
2648 	uint64_t          nbtfiltsc;
2649 	uint64_t          nbtfiltdo;
2650 
2651 	// Just since the last update
2652 	OuterLoopMetrics  olmu;  // overall metrics
2653 	SeedSearchMetrics sdmu;  // metrics related to seed alignment
2654 	WalkMetrics       wlmu;  // metrics related to walking left (i.e. resolving reference offsets)
2655 	SwMetrics         swmuSeed; // metrics related to DP seed-extend alignment
2656 	SwMetrics         swmuMate; // metrics related to DP mate-finding alignment
2657 	ReportingMetrics  rpmu;  // metrics related to reporting
2658 	SSEMetrics        dpSse8uSeed;  // 8-bit SSE seed extensions
2659 	SSEMetrics        dpSse8uMate;  // 8-bit SSE mate finds
2660 	SSEMetrics        dpSse16uSeed; // 16-bit SSE seed extensions
2661 	SSEMetrics        dpSse16uMate; // 16-bit SSE mate finds
2662 	uint64_t          nbtfiltst_u;
2663 	uint64_t          nbtfiltsc_u;
2664 	uint64_t          nbtfiltdo_u;
2665 
2666     //
2667     HIMetrics         him;
2668 
2669 	MUTEX_T           mutex_m;  // lock for when one ob
2670 	bool              first; // yet to print first line?
2671 	time_t            lastElapsed; // used in reportInterval to measure time since last call
2672 };
2673 
2674 static PerfMetrics metrics;
2675 
2676 // Cyclic rotations
2677 #define ROTL(n, x) (((x) << (n)) | ((x) >> (32-n)))
2678 #define ROTR(n, x) (((x) >> (n)) | ((x) << (32-n)))
2679 
printMmsSkipMsg(const PatternSourcePerThread & ps,bool paired,bool mate1,int seedmms)2680 static inline void printMmsSkipMsg(
2681 	const PatternSourcePerThread& ps,
2682 	bool paired,
2683 	bool mate1,
2684 	int seedmms)
2685 {
2686 	ostringstream os;
2687 	if(paired) {
2688 		os << "Warning: skipping mate #" << (mate1 ? '1' : '2')
2689 		   << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2690 		   << "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length())
2691 		   << ") <= # seed mismatches (" << seedmms << ")" << endl;
2692 	} else {
2693 		os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2694 		   << "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length())
2695 		   << ") <= # seed mismatches (" << seedmms << ")" << endl;
2696 	}
2697 	cerr << os.str().c_str();
2698 }
2699 
printLenSkipMsg(const PatternSourcePerThread & ps,bool paired,bool mate1)2700 static inline void printLenSkipMsg(
2701 	const PatternSourcePerThread& ps,
2702 	bool paired,
2703 	bool mate1)
2704 {
2705 	ostringstream os;
2706 	if(paired) {
2707 		os << "Warning: skipping mate #" << (mate1 ? '1' : '2')
2708 		   << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2709 		   << "' because it was < 2 characters long" << endl;
2710 	} else {
2711 		os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2712 		   << "' because it was < 2 characters long" << endl;
2713 	}
2714 	cerr << os.str().c_str();
2715 }
2716 
printLocalScoreMsg(const PatternSourcePerThread & ps,bool paired,bool mate1)2717 static inline void printLocalScoreMsg(
2718 	const PatternSourcePerThread& ps,
2719 	bool paired,
2720 	bool mate1)
2721 {
2722 	ostringstream os;
2723 	if(paired) {
2724 		os << "Warning: minimum score function gave negative number in "
2725 		   << "--local mode for mate #" << (mate1 ? '1' : '2')
2726 		   << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2727 		   << "; setting to 0 instead" << endl;
2728 	} else {
2729 		os << "Warning: minimum score function gave negative number in "
2730 		   << "--local mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2731 		   << "; setting to 0 instead" << endl;
2732 	}
2733 	cerr << os.str().c_str();
2734 }
2735 
printEEScoreMsg(const PatternSourcePerThread & ps,bool paired,bool mate1)2736 static inline void printEEScoreMsg(
2737 	const PatternSourcePerThread& ps,
2738 	bool paired,
2739 	bool mate1)
2740 {
2741 	ostringstream os;
2742 	if(paired) {
2743 		os << "Warning: minimum score function gave positive number in "
2744 		   << "--end-to-end mode for mate #" << (mate1 ? '1' : '2')
2745 		   << " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2746 		   << "; setting to 0 instead" << endl;
2747 	} else {
2748 		os << "Warning: minimum score function gave positive number in "
2749 		   << "--end-to-end mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
2750 		   << "; setting to 0 instead" << endl;
2751 	}
2752 	cerr << os.str().c_str();
2753 }
2754 
2755 
2756 #define MERGE_METRICS(met, sync) { \
2757 	msink.mergeMetrics(rpm); \
2758 	met.merge( \
2759 		&olm, \
2760 		&sdm, \
2761 		&wlm, \
2762 		&swmSeed, \
2763 		&swmMate, \
2764 		&rpm, \
2765 		&sseU8ExtendMet, \
2766 		&sseU8MateMet, \
2767 		&sseI16ExtendMet, \
2768 		&sseI16MateMet, \
2769 		nbtfiltst, \
2770 		nbtfiltsc, \
2771 		nbtfiltdo, \
2772         &him, \
2773 		sync); \
2774 	olm.reset(); \
2775 	sdm.reset(); \
2776 	wlm.reset(); \
2777 	swmSeed.reset(); \
2778 	swmMate.reset(); \
2779 	rpm.reset(); \
2780 	sseU8ExtendMet.reset(); \
2781 	sseU8MateMet.reset(); \
2782 	sseI16ExtendMet.reset(); \
2783 	sseI16MateMet.reset(); \
2784     him.reset(); \
2785 }
2786 
2787 #define MERGE_SW(x) { \
2788 	x.merge( \
2789 		sseU8ExtendMet, \
2790 		sseU8MateMet, \
2791 		sseI16ExtendMet, \
2792 		sseI16MateMet, \
2793 		nbtfiltst, \
2794 		nbtfiltsc, \
2795 		nbtfiltdo); \
2796 	x.resetCounters(); \
2797 }
2798 
2799 /**
2800  * Called once per thread.  Sets up per-thread pointers to the shared global
2801  * data structures, creates per-thread structures, then enters the alignment
2802  * loop.  The general flow of the alignment loop is:
2803  *
2804  * - If it's been a while and we're the master thread, report some alignment
2805  *   metrics
2806  * - Get the next read/pair
2807  * - Check if this read/pair is identical to the previous
2808  *   + If identical, check whether we can skip any or all alignment stages.  If
2809  *     we can skip all stages, report the result immediately and move to next
2810  *     read/pair
2811  *   + If not identical, continue
2812  * -
2813  */
multiseedSearchWorker_hisat_bp(void * vp)2814 static void multiseedSearchWorker_hisat_bp(void *vp) {
2815 	int tid = *((int*)vp);
2816 	assert(multiseed_ebwtFw != NULL);
2817 	assert(multiseedMms == 0 || multiseed_ebwtBw != NULL);
2818 	PairedPatternSource&             patsrc   = *multiseed_patsrc;
2819 	const HierEbwt<index_t>&         ebwtFw   = *multiseed_ebwtFw;
2820 	const HierEbwt<index_t>&         ebwtBw   = *multiseed_ebwtBw;
2821 	const Scoring&                   sc       = *multiseed_sc;
2822 	const BitPairReference&          ref      = *multiseed_refs;
2823 	AlignmentCache<index_t>&         scShared = *multiseed_ca;
2824 	AlnSink<index_t>&                msink    = *multiseed_msink;
2825 	OutFileBuf*                      metricsOfb = multiseed_metricsOfb;
2826 
2827 	// Sinks: these are so that we can print tables encoding counts for
2828 	// events of interest on a per-read, per-seed, per-join, or per-SW
2829 	// level.  These in turn can be used to diagnose performance
2830 	// problems, or generally characterize performance.
2831 
2832 	//const BitPairReference& refs   = *multiseed_refs;
2833 	auto_ptr<PatternSourcePerThreadFactory> patsrcFact(createPatsrcFactory(patsrc, tid));
2834 	auto_ptr<PatternSourcePerThread> ps(patsrcFact->create());
2835 
2836 	// Thread-local cache for seed alignments
2837 	PtrWrap<AlignmentCache<index_t> > scLocal;
2838 	if(!msNoCache) {
2839 		scLocal.init(new AlignmentCache<index_t>(seedCacheLocalMB * 1024 * 1024, false));
2840 	}
2841 	AlignmentCache<index_t> scCurrent(seedCacheCurrentMB * 1024 * 1024, false);
2842 	// Thread-local cache for current seed alignments
2843 
2844 	// Interfaces for alignment and seed caches
2845 	AlignmentCacheIface<index_t> ca(
2846                                     &scCurrent,
2847                                     scLocal.get(),
2848                                     msNoCache ? NULL : &scShared);
2849 
2850 	// Instantiate an object for holding reporting-related parameters.
2851     ReportingParams rp(
2852                        (allHits ? std::numeric_limits<THitInt>::max() : khits), // -k
2853                        mhits,             // -m/-M
2854                        0,                 // penalty gap (not used now)
2855                        msample,           // true -> -M was specified, otherwise assume -m
2856                        gReportDiscordant, // report discordang paired-end alignments?
2857                        gReportMixed);     // report unpaired alignments for paired reads?
2858 
2859 	// Instantiate a mapping quality calculator
2860 	auto_ptr<Mapq> bmapq(new_mapq(mapqv, scoreMin, sc));
2861 
2862 	// Make a per-thread wrapper for the global MHitSink object.
2863 	AlnSinkWrap<index_t> msinkwrap(
2864                                    msink,         // global sink
2865                                    rp,            // reporting parameters
2866                                    *bmapq.get(),  // MAPQ calculator
2867                                    (size_t)tid);  // thread id
2868 
2869     BP_Aligner<index_t, local_index_t> bp_Aligner(
2870                                                   ebwtFw,
2871                                                   multiseed_refnames,
2872                                                   &multiseed_mutex,
2873                                                   thread_rids_mindist,
2874                                                   no_spliced_alignment);
2875 	SwAligner sw;
2876 	OuterLoopMetrics olm;
2877 	SeedSearchMetrics sdm;
2878 	WalkMetrics wlm;
2879 	SwMetrics swmSeed, swmMate;
2880 	ReportingMetrics rpm;
2881 	RandomSource rnd, rndArb;
2882 	SSEMetrics sseU8ExtendMet;
2883 	SSEMetrics sseU8MateMet;
2884 	SSEMetrics sseI16ExtendMet;
2885 	SSEMetrics sseI16MateMet;
2886    	DescentMetrics descm;
2887 	uint64_t nbtfiltst = 0; // TODO: find a new home for these
2888 	uint64_t nbtfiltsc = 0; // TODO: find a new home for these
2889 	uint64_t nbtfiltdo = 0; // TODO: find a new home for these
2890     HIMetrics him;
2891 
2892 	ASSERT_ONLY(BTDnaString tmp);
2893 
2894 	int pepolFlag;
2895 	if(gMate1fw && gMate2fw) {
2896 		pepolFlag = PE_POLICY_FF;
2897 	} else if(gMate1fw && !gMate2fw) {
2898 		pepolFlag = PE_POLICY_FR;
2899 	} else if(!gMate1fw && gMate2fw) {
2900 		pepolFlag = PE_POLICY_RF;
2901 	} else {
2902 		pepolFlag = PE_POLICY_RR;
2903 	}
2904 	assert_geq(gMaxInsert, gMinInsert);
2905 	assert_geq(gMinInsert, 0);
2906 	PairedEndPolicy pepol(
2907                           pepolFlag,
2908                           gMaxInsert,
2909                           gMinInsert,
2910                           localAlign,
2911                           gFlippedMatesOK,
2912                           gDovetailMatesOK,
2913                           gContainMatesOK,
2914                           gOlapMatesOK,
2915                           gExpandToFrag);
2916 
2917   	PerfMetrics metricsPt; // per-thread metrics object; for read-level metrics
2918 	BTString nametmp;
2919 
2920 	PerReadMetrics prm;
2921 
2922 	// Used by thread with threadid == 1 to measure time elapsed
2923 	time_t iTime = time(0);
2924 
2925 	// Keep track of whether last search was exhaustive for mates 1 and 2
2926 	bool exhaustive[2] = { false, false };
2927 	// Keep track of whether mates 1/2 were filtered out last time through
2928 	bool filt[2]    = { true, true };
2929 	// Keep track of whether mates 1/2 were filtered out due Ns last time
2930 	bool nfilt[2]   = { true, true };
2931 	// Keep track of whether mates 1/2 were filtered out due to not having
2932 	// enough characters to rise about the score threshold.
2933 	bool scfilt[2]  = { true, true };
2934 	// Keep track of whether mates 1/2 were filtered out due to not having
2935 	// more characters than the number of mismatches permitted in a seed.
2936 	bool lenfilt[2] = { true, true };
2937 	// Keep track of whether mates 1/2 were filtered out by upstream qc
2938 	bool qcfilt[2]  = { true, true };
2939 
2940 	rndArb.init((uint32_t)time(0));
2941 	int mergei = 0;
2942 	int mergeival = 16;
2943 	while(true) {
2944 		bool success = false, done = false, paired = false;
2945 		ps->nextReadPair(success, done, paired, outType != OUTPUT_SAM);
2946 		if(!success && done) {
2947 			break;
2948 		} else if(!success) {
2949 			continue;
2950 		}
2951 		TReadId rdid = ps->rdid();
2952 
2953         if(nthreads > 1 && useTempSpliceSite) {
2954             while(true) {
2955                 uint64_t min_rdid = 0;
2956                 {
2957                     // ThreadSafe t(&thread_rids_mutex, nthreads > 1);
2958                     assert_gt(thread_rids.size(), 0);
2959                     min_rdid = thread_rids[0];
2960                     for(size_t i = 1; i < thread_rids.size(); i++) {
2961                         if(thread_rids[i] < min_rdid) {
2962                             min_rdid = thread_rids[i];
2963                         }
2964                     }
2965                 }
2966 
2967                 if(min_rdid + thread_rids_mindist < rdid) {
2968 #if defined(_TTHREAD_WIN32_)
2969                     Sleep(0);
2970 #elif defined(_TTHREAD_POSIX_)
2971                     sched_yield();
2972 #endif
2973                 } else break;
2974             }
2975         }
2976 
2977 		bool sample = true;
2978 		if(arbitraryRandom) {
2979 			ps->bufa().seed = rndArb.nextU32();
2980 			ps->bufb().seed = rndArb.nextU32();
2981 		}
2982 		if(sampleFrac < 1.0f) {
2983 			rnd.init(ROTL(ps->bufa().seed, 2));
2984 			sample = rnd.nextFloat() < sampleFrac;
2985 		}
2986 		if(rdid >= skipReads && rdid < qUpto && sample) {
2987 			// Align this read/pair
2988 			bool retry = true;
2989 			//
2990 			// Check if there is metrics reporting for us to do.
2991 			//
2992 			if(metricsIval > 0 &&
2993 			   (metricsOfb != NULL || metricsStderr) &&
2994 			   !metricsPerRead &&
2995 			   ++mergei == mergeival)
2996 			{
2997 				// Do a periodic merge.  Update global metrics, in a
2998 				// synchronized manner if needed.
2999 				MERGE_METRICS(metrics, nthreads > 1);
3000 				mergei = 0;
3001 				// Check if a progress message should be printed
3002 				if(tid == 0) {
3003 					// Only thread 1 prints progress messages
3004 					time_t curTime = time(0);
3005 					if(curTime - iTime >= metricsIval) {
3006 						metrics.reportInterval(metricsOfb, metricsStderr, false, true, NULL);
3007 						iTime = curTime;
3008 					}
3009 				}
3010 			}
3011 			prm.reset(); // per-read metrics
3012 			prm.doFmString = false;
3013 			if(sam_print_xt) {
3014 				gettimeofday(&prm.tv_beg, &prm.tz_beg);
3015 			}
3016 			// Try to align this read
3017 			while(retry) {
3018 				retry = false;
3019 				assert_eq(ps->bufa().color, false);
3020 				ca.nextRead(); // clear the cache
3021 				olm.reads++;
3022 				assert(!ca.aligning());
3023 				bool pair = paired;
3024 				const size_t rdlen1 = ps->bufa().length();
3025 				const size_t rdlen2 = pair ? ps->bufb().length() : 0;
3026 				olm.bases += (rdlen1 + rdlen2);
3027 #if 0
3028 				msinkwrap.nextRead(
3029                                    &ps->bufa(),
3030                                    pair ? &ps->bufb() : NULL,
3031                                    rdid,
3032                                    sc.qualitiesMatter());
3033 				assert(msinkwrap.inited());
3034 #endif
3035 				size_t rdlens[2] = { rdlen1, rdlen2 };
3036 				// Calculate the minimum valid score threshold for the read
3037 				TAlScore minsc[2], maxpen[2];
3038 				maxpen[0] = maxpen[1] = 0;
3039 				minsc[0] = minsc[1] = std::numeric_limits<TAlScore>::max();
3040 				if(bwaSwLike) {
3041 					// From BWA-SW manual: "Given an l-long query, the
3042 					// threshold for a hit to be retained is
3043 					// a*max{T,c*log(l)}."  We try to recreate that here.
3044 					float a = (float)sc.match(30);
3045 					float T = bwaSwLikeT, c = bwaSwLikeC;
3046 					minsc[0] = (TAlScore)max<float>(a*T, a*c*log(rdlens[0]));
3047 					if(paired) {
3048 						minsc[1] = (TAlScore)max<float>(a*T, a*c*log(rdlens[1]));
3049 					}
3050 				} else {
3051 					minsc[0] = scoreMin.f<TAlScore>(rdlens[0]);
3052 					if(paired) minsc[1] = scoreMin.f<TAlScore>(rdlens[1]);
3053 					if(localAlign) {
3054 						if(minsc[0] < 0) {
3055 							if(!gQuiet) printLocalScoreMsg(*ps, paired, true);
3056 							minsc[0] = 0;
3057 						}
3058 						if(paired && minsc[1] < 0) {
3059 							if(!gQuiet) printLocalScoreMsg(*ps, paired, false);
3060 							minsc[1] = 0;
3061 						}
3062 					} else {
3063 						if(minsc[0] > 0) {
3064 							if(!gQuiet) printEEScoreMsg(*ps, paired, true);
3065 							minsc[0] = 0;
3066 						}
3067 						if(paired && minsc[1] > 0) {
3068 							if(!gQuiet) printEEScoreMsg(*ps, paired, false);
3069 							minsc[1] = 0;
3070 						}
3071 					}
3072 				}
3073 
3074 				// N filter; does the read have too many Ns?
3075 				size_t readns[2] = {0, 0};
3076 				sc.nFilterPair(
3077                                &ps->bufa().patFw,
3078                                pair ? &ps->bufb().patFw : NULL,
3079                                readns[0],
3080                                readns[1],
3081                                nfilt[0],
3082                                nfilt[1]);
3083 				// Score filter; does the read enough character to rise above
3084 				// the score threshold?
3085 				scfilt[0] = sc.scoreFilter(minsc[0], rdlens[0]);
3086 				scfilt[1] = sc.scoreFilter(minsc[1], rdlens[1]);
3087 				lenfilt[0] = lenfilt[1] = true;
3088 				if(rdlens[0] <= (size_t)multiseedMms || rdlens[0] < 2) {
3089 					if(!gQuiet) printMmsSkipMsg(*ps, paired, true, multiseedMms);
3090 					lenfilt[0] = false;
3091 				}
3092 				if((rdlens[1] <= (size_t)multiseedMms || rdlens[1] < 2) && paired) {
3093 					if(!gQuiet) printMmsSkipMsg(*ps, paired, false, multiseedMms);
3094 					lenfilt[1] = false;
3095 				}
3096 				if(rdlens[0] < 2) {
3097 					if(!gQuiet) printLenSkipMsg(*ps, paired, true);
3098 					lenfilt[0] = false;
3099 				}
3100 				if(rdlens[1] < 2 && paired) {
3101 					if(!gQuiet) printLenSkipMsg(*ps, paired, false);
3102 					lenfilt[1] = false;
3103 				}
3104 				qcfilt[0] = qcfilt[1] = true;
3105 				if(qcFilter) {
3106 					qcfilt[0] = (ps->bufa().filter != '0');
3107 					qcfilt[1] = (ps->bufb().filter != '0');
3108 				}
3109 				filt[0] = (nfilt[0] && scfilt[0] && lenfilt[0] && qcfilt[0]);
3110 				filt[1] = (nfilt[1] && scfilt[1] && lenfilt[1] && qcfilt[1]);
3111 				prm.nFilt += (filt[0] ? 0 : 1) + (filt[1] ? 0 : 1);
3112 				Read* rds[2] = { &ps->bufa(), &ps->bufb() };
3113 				// For each mate...
3114 				assert(msinkwrap.empty());
3115 				//size_t minedfw[2] = { 0, 0 };
3116 				//size_t minedrc[2] = { 0, 0 };
3117 				// Calcualte nofw / no rc
3118 				bool nofw[2] = { false, false };
3119 				bool norc[2] = { false, false };
3120 				nofw[0] = paired ? (gMate1fw ? gNofw : gNorc) : gNofw;
3121 				norc[0] = paired ? (gMate1fw ? gNorc : gNofw) : gNorc;
3122 				nofw[1] = paired ? (gMate2fw ? gNofw : gNorc) : gNofw;
3123 				norc[1] = paired ? (gMate2fw ? gNorc : gNofw) : gNorc;
3124 				// Calculate nceil
3125 				int nceil[2] = { 0, 0 };
3126 				nceil[0] = nCeil.f<int>((double)rdlens[0]);
3127 				nceil[0] = min(nceil[0], (int)rdlens[0]);
3128 				if(paired) {
3129 					nceil[1] = nCeil.f<int>((double)rdlens[1]);
3130 					nceil[1] = min(nceil[1], (int)rdlens[1]);
3131 				}
3132 				exhaustive[0] = exhaustive[1] = false;
3133 				//size_t matemap[2] = { 0, 1 };
3134 				bool pairPostFilt = filt[0] && filt[1];
3135 				if(pairPostFilt) {
3136 					rnd.init(ps->bufa().seed ^ ps->bufb().seed);
3137 				} else {
3138 					rnd.init(ps->bufa().seed);
3139 				}
3140 				// Calculate interval length for both mates
3141 				int interval[2] = { 0, 0 };
3142 				for(size_t mate = 0; mate < (pair ? 2:1); mate++) {
3143 					interval[mate] = msIval.f<int>((double)rdlens[mate]);
3144 					if(filt[0] && filt[1]) {
3145 						// Boost interval length by 20% for paired-end reads
3146 						interval[mate] = (int)(interval[mate] * 1.2 + 0.5);
3147 					}
3148 					interval[mate] = max(interval[mate], 1);
3149 				}
3150 				// Calculate streak length
3151 				size_t streak[2]    = { maxDpStreak,   maxDpStreak };
3152 				size_t mtStreak[2]  = { maxMateStreak, maxMateStreak };
3153 				size_t mxDp[2]      = { maxDp,         maxDp       };
3154 				size_t mxUg[2]      = { maxUg,         maxUg       };
3155 				size_t mxIter[2]    = { maxIters,      maxIters    };
3156 				if(allHits) {
3157 					streak[0]   = streak[1]   = std::numeric_limits<size_t>::max();
3158 					mtStreak[0] = mtStreak[1] = std::numeric_limits<size_t>::max();
3159 					mxDp[0]     = mxDp[1]     = std::numeric_limits<size_t>::max();
3160 					mxUg[0]     = mxUg[1]     = std::numeric_limits<size_t>::max();
3161 					mxIter[0]   = mxIter[1]   = std::numeric_limits<size_t>::max();
3162 				} else if(khits > 1) {
3163 					for(size_t mate = 0; mate < 2; mate++) {
3164 						streak[mate]   += (khits-1) * maxStreakIncr;
3165 						mtStreak[mate] += (khits-1) * maxStreakIncr;
3166 						mxDp[mate]     += (khits-1) * maxItersIncr;
3167 						mxUg[mate]     += (khits-1) * maxItersIncr;
3168 						mxIter[mate]   += (khits-1) * maxItersIncr;
3169 					}
3170 				}
3171 				if(filt[0] && filt[1]) {
3172 					streak[0] = (size_t)ceil((double)streak[0] / 2.0);
3173 					streak[1] = (size_t)ceil((double)streak[1] / 2.0);
3174 					assert_gt(streak[1], 0);
3175 				}
3176 				assert_gt(streak[0], 0);
3177 				// Calculate # seed rounds for each mate
3178 				size_t nrounds[2] = { nSeedRounds, nSeedRounds };
3179 				if(filt[0] && filt[1]) {
3180 					nrounds[0] = (size_t)ceil((double)nrounds[0] / 2.0);
3181 					nrounds[1] = (size_t)ceil((double)nrounds[1] / 2.0);
3182 					assert_gt(nrounds[1], 0);
3183 				}
3184 				assert_gt(nrounds[0], 0);
3185 				// Increment counters according to what got filtered
3186 				for(size_t mate = 0; mate < (pair ? 2:1); mate++) {
3187 					if(!filt[mate]) {
3188 						// Mate was rejected by N filter
3189 						olm.freads++;               // reads filtered out
3190 						olm.fbases += rdlens[mate]; // bases filtered out
3191 					} else {
3192 						//shs[mate].clear();
3193 						//shs[mate].nextRead(mate == 0 ? ps->bufa() : ps->bufb());
3194 						//assert(shs[mate].empty());
3195 						olm.ureads++;               // reads passing filter
3196 						olm.ubases += rdlens[mate]; // bases passing filter
3197 					}
3198 				}
3199 				//size_t eePeEeltLimit = std::numeric_limits<size_t>::max();
3200 				// Whether we're done with mate1 / mate2
3201                 bool done[2] = { !filt[0], !filt[1] };
3202 				// size_t nelt[2] = {0, 0};
3203                 if(filt[0] && filt[1]) {
3204                     bp_Aligner.initReads(rds, nofw, norc, minsc, maxpen);
3205                 } else if(filt[0] || filt[1]) {
3206                     bp_Aligner.initRead(rds[0], nofw[0], norc[0], minsc[0], maxpen[0], filt[1]);
3207                 }
3208                 if(filt[0] || filt[1]) {
3209                     int ret = bp_Aligner.go(sc, ebwtFw, ebwtBw, ref, sw, *ssdb, wlm, prm, swmSeed, him, rnd, msinkwrap);
3210                     MERGE_SW(sw);
3211                     // daehwan
3212                     size_t mate = 0;
3213 
3214                     assert_gt(ret, 0);
3215                     // Clear out the exact hits so that we don't try to
3216                     // extend them again later!
3217                     if(ret == EXTEND_EXHAUSTED_CANDIDATES) {
3218                         // Not done yet
3219                     } else if(ret == EXTEND_POLICY_FULFILLED) {
3220                         // Policy is satisfied for this mate at least
3221                         if(msinkwrap.state().doneWithMate(mate == 0)) {
3222                             done[mate] = true;
3223                         }
3224                         if(msinkwrap.state().doneWithMate(mate == 1)) {
3225                             done[mate^1] = true;
3226                         }
3227                     } else if(ret == EXTEND_PERFECT_SCORE) {
3228                         // We exhausted this mode at least
3229                         done[mate] = true;
3230                     } else if(ret == EXTEND_EXCEEDED_HARD_LIMIT) {
3231                         // We exceeded a per-read limit
3232                         done[mate] = true;
3233                     } else if(ret == EXTEND_EXCEEDED_SOFT_LIMIT) {
3234                         // Not done yet
3235                     } else {
3236                         //
3237                         cerr << "Bad return value: " << ret << endl;
3238                         throw 1;
3239                     }
3240                     if(!done[mate]) {
3241                         TAlScore perfectScore = sc.perfectScore(rdlens[mate]);
3242                         if(!done[mate] && minsc[mate] == perfectScore) {
3243                             done[mate] = true;
3244                         }
3245                     }
3246                 }
3247 
3248                 for(size_t i = 0; i < 2; i++) {
3249                     assert_leq(prm.nExIters, mxIter[i]);
3250                     assert_leq(prm.nExDps,   mxDp[i]);
3251                     assert_leq(prm.nMateDps, mxDp[i]);
3252                     assert_leq(prm.nExUgs,   mxUg[i]);
3253                     assert_leq(prm.nMateUgs, mxUg[i]);
3254                     assert_leq(prm.nDpFail,  streak[i]);
3255                     assert_leq(prm.nUgFail,  streak[i]);
3256                     assert_leq(prm.nEeFail,  streak[i]);
3257                 }
3258 
3259 #if 0
3260 				// Commit and report paired-end/unpaired alignments
3261 				msinkwrap.finishRead(
3262                                      NULL,
3263                                      NULL,
3264                                      exhaustive[0],        // exhausted seed hits for mate 1?
3265                                      exhaustive[1],        // exhausted seed hits for mate 2?
3266                                      nfilt[0],
3267                                      nfilt[1],
3268                                      scfilt[0],
3269                                      scfilt[1],
3270                                      lenfilt[0],
3271                                      lenfilt[1],
3272                                      qcfilt[0],
3273                                      qcfilt[1],
3274                                      sortByScore,          // prioritize by alignment score
3275                                      rnd,                  // pseudo-random generator
3276                                      rpm,                  // reporting metrics
3277                                      prm,                  // per-read metrics
3278                                      sc,                   // scoring scheme
3279                                      !seedSumm,            // suppress seed summaries?
3280                                      seedSumm);            // suppress alignments?
3281 				assert(!retry || msinkwrap.empty());
3282 #endif
3283 
3284                 if(nthreads > 1 && useTempSpliceSite) {
3285                     // ThreadSafe t(&thread_rids_mutex, nthreads > 1);
3286                     assert_gt(tid, 0);
3287                     assert_leq(tid, thread_rids.size());
3288                     assert(thread_rids[tid - 1] == 0 || rdid > thread_rids[tid - 1]);
3289                     thread_rids[tid - 1] = rdid;
3290                 }
3291 			} // while(retry)
3292 		} // if(rdid >= skipReads && rdid < qUpto)
3293 		else if(rdid >= qUpto) {
3294 			break;
3295 		}
3296 		if(metricsPerRead) {
3297 			MERGE_METRICS(metricsPt, nthreads > 1);
3298 			nametmp = ps->bufa().name;
3299 			metricsPt.reportInterval(
3300                                      metricsOfb, metricsStderr, true, true, &nametmp);
3301 			metricsPt.reset();
3302 		}
3303 	} // while(true)
3304 
3305 	// One last metrics merge
3306 	MERGE_METRICS(metrics, nthreads > 1);
3307 
3308 	return;
3309 }
3310 
3311 /**
3312  * Called once per alignment job.  Sets up global pointers to the
3313  * shared global data structures, creates per-thread structures, then
3314  * enters the search loop.
3315  */
multiseedSearch(Scoring & sc,PairedPatternSource & patsrc,AlnSink<index_t> & msink,HierEbwt<index_t> & ebwtFw,HierEbwt<index_t> & ebwtBw,BitPairReference * refs,const EList<string> & refnames,OutFileBuf * metricsOfb)3316 static void multiseedSearch(
3317 	Scoring& sc,
3318 	PairedPatternSource& patsrc,  // pattern source
3319 	AlnSink<index_t>& msink,             // hit sink
3320 	HierEbwt<index_t>& ebwtFw,                 // index of original text
3321 	HierEbwt<index_t>& ebwtBw,                 // index of mirror text
3322     BitPairReference* refs,
3323     const EList<string>& refnames,
3324 	OutFileBuf *metricsOfb)
3325 {
3326     multiseed_patsrc = &patsrc;
3327 	multiseed_msink  = &msink;
3328 	multiseed_ebwtFw = &ebwtFw;
3329 	multiseed_ebwtBw = &ebwtBw;
3330 	multiseed_sc     = &sc;
3331     multiseed_refnames = refnames;
3332 	multiseed_metricsOfb      = metricsOfb;
3333 	multiseed_refs = refs;
3334 	AutoArray<tthread::thread*> threads(nthreads);
3335 	AutoArray<int> tids(nthreads);
3336 	{
3337 		// Load the other half of the index into memory
3338 		assert(!ebwtFw.isInMemory());
3339 		Timer _t(cerr, "Time loading forward index: ", timing);
3340 		ebwtFw.loadIntoMemory(
3341 			0,  // colorspace?
3342 			-1, // not the reverse index
3343 			true,         // load SA samp? (yes, need forward index's SA samp)
3344 			true,         // load ftab (in forward index)
3345 			true,         // load rstarts (in forward index)
3346 			!noRefNames,  // load names?
3347 			startVerbose);
3348 	}
3349 #if 0
3350 	if(multiseedMms > 0 || do1mmUpFront) {
3351 		// Load the other half of the index into memory
3352 		assert(!ebwtBw.isInMemory());
3353 		Timer _t(cerr, "Time loading mirror index: ", timing);
3354 		ebwtBw.loadIntoMemory(
3355 			0, // colorspace?
3356 			// It's bidirectional search, so we need the reverse to be
3357 			// constructed as the reverse of the concatenated strings.
3358 			1,
3359 			true,        // load SA samp in reverse index
3360 			true,         // yes, need ftab in reverse index
3361 			true,        // load rstarts in reverse index
3362 			!noRefNames,  // load names?
3363 			startVerbose);
3364 	}
3365 #endif
3366 	// Start the metrics thread
3367 	{
3368 		Timer _t(cerr, "Multiseed full-index search: ", timing);
3369 
3370         thread_rids.resize(nthreads);
3371         thread_rids.fill(0);
3372         thread_rids_mindist = (nthreads == 1 || !useTempSpliceSite ? 0 : 1000 * nthreads);
3373 
3374 		for(int i = 0; i < nthreads; i++) {
3375 			// Thread IDs start at 1
3376 			tids[i] = i+1;
3377             threads[i] = new tthread::thread(multiseedSearchWorker_hisat_bp, (void*)&tids[i]);
3378 		}
3379 
3380         for (int i = 0; i < nthreads; i++)
3381             threads[i]->join();
3382 
3383 	}
3384 	if(!metricsPerRead && (metricsOfb != NULL || metricsStderr)) {
3385 		metrics.reportInterval(metricsOfb, metricsStderr, true, false, NULL);
3386 	}
3387 }
3388 
3389 static string argstr;
3390 
3391 extern void initializeCntLut();
3392 extern void initializeCntBit();
3393 
3394 template<typename TStr>
driver(const char * type,const string & bt2indexBase,const string & outfile)3395 static void driver(
3396 	const char * type,
3397 	const string& bt2indexBase,
3398 	const string& outfile)
3399 {
3400 	if(gVerbose || startVerbose)  {
3401 		cerr << "Entered driver(): "; logTime(cerr, true);
3402 	}
3403 
3404     initializeCntLut();
3405     initializeCntBit();
3406 
3407 	// Vector of the reference sequences; used for sanity-checking
3408 	EList<SString<char> > names, os;
3409 	EList<size_t> nameLens, seqLens;
3410 	// Read reference sequences from the command-line or from a FASTA file
3411 	if(!origString.empty()) {
3412 		// Read fasta file(s)
3413 		EList<string> origFiles;
3414 		tokenize(origString, ",", origFiles);
3415 		parseFastas(origFiles, names, nameLens, os, seqLens);
3416 	}
3417 	PatternParams pp(
3418 		format,        // file format
3419 		fileParallel,  // true -> wrap files with separate PairedPatternSources
3420 		seed,          // pseudo-random seed
3421 		useSpinlock,   // use spin locks instead of pthreads
3422 		solexaQuals,   // true -> qualities are on solexa64 scale
3423 		phred64Quals,  // true -> qualities are on phred64 scale
3424 		integerQuals,  // true -> qualities are space-separated numbers
3425 		fuzzy,         // true -> try to parse fuzzy fastq
3426 		fastaContLen,  // length of sampled reads for FastaContinuous...
3427 		fastaContFreq, // frequency of sampled reads for FastaContinuous...
3428 		skipReads      // skip the first 'skip' patterns
3429 	);
3430 	if(gVerbose || startVerbose) {
3431 		cerr << "Creating PatternSource: "; logTime(cerr, true);
3432 	}
3433 	PairedPatternSource *patsrc = PairedPatternSource::setupPatternSources(
3434 		queries,     // singles, from argv
3435 		mates1,      // mate1's, from -1 arg
3436 		mates2,      // mate2's, from -2 arg
3437 		mates12,     // both mates on each line, from --12 arg
3438 		qualities,   // qualities associated with singles
3439 		qualities1,  // qualities associated with m1
3440 		qualities2,  // qualities associated with m2
3441 		pp,          // read read-in parameters
3442 		gVerbose || startVerbose); // be talkative
3443 	// Open hit output file
3444 	if(gVerbose || startVerbose) {
3445 		cerr << "Opening hit output file: "; logTime(cerr, true);
3446 	}
3447 	OutFileBuf *fout;
3448 	if(!outfile.empty()) {
3449 		fout = new OutFileBuf(outfile.c_str(), false);
3450 	} else {
3451 		fout = new OutFileBuf();
3452 	}
3453 	// Initialize Ebwt object and read in header
3454 	if(gVerbose || startVerbose) {
3455 		cerr << "About to initialize fw Ebwt: "; logTime(cerr, true);
3456 	}
3457 	adjIdxBase = adjustEbwtBase(argv0, bt2indexBase, gVerbose);
3458 	HierEbwt<index_t, local_index_t> ebwt(
3459 		adjIdxBase,
3460 	    0,        // index is colorspace
3461 		-1,       // fw index
3462 	    true,     // index is for the forward direction
3463 	    /* overriding: */ offRate,
3464 		0, // amount to add to index offrate or <= 0 to do nothing
3465 	    useMm,    // whether to use memory-mapped files
3466 	    useShmem, // whether to use shared memory
3467 	    mmSweep,  // sweep memory-mapped files
3468 	    !noRefNames, // load names?
3469 		true,        // load SA sample?
3470 		true,        // load ftab?
3471 		true,        // load rstarts?
3472 	    gVerbose, // whether to be talkative
3473 	    startVerbose, // talkative during initialization
3474 	    false /*passMemExc*/,
3475 	    sanityCheck);
3476 	HierEbwt<index_t, local_index_t>* ebwtBw = NULL;
3477 #if 0
3478 	// We need the mirror index if mismatches are allowed
3479 	if(multiseedMms > 0 || do1mmUpFront) {
3480 		if(gVerbose || startVerbose) {
3481 			cerr << "About to initialize rev Ebwt: "; logTime(cerr, true);
3482 		}
3483 		ebwtBw = new HierEbwt<index_t, local_index_t>(
3484 			adjIdxBase + ".rev",
3485 			0,       // index is colorspace
3486 			1,       // TODO: maybe not
3487 		    false, // index is for the reverse direction
3488 		    /* overriding: */ offRate,
3489 			0, // amount to add to index offrate or <= 0 to do nothing
3490 		    useMm,    // whether to use memory-mapped files
3491 		    useShmem, // whether to use shared memory
3492 		    mmSweep,  // sweep memory-mapped files
3493 		    !noRefNames, // load names?
3494 			true,        // load SA sample?
3495 			true,        // load ftab?
3496 			true,        // load rstarts?
3497 		    gVerbose,    // whether to be talkative
3498 		    startVerbose, // talkative during initialization
3499 		    false /*passMemExc*/,
3500 		    sanityCheck);
3501 	}
3502 #endif
3503 	if(sanityCheck && !os.empty()) {
3504 		// Sanity check number of patterns and pattern lengths in Ebwt
3505 		// against original strings
3506 		assert_eq(os.size(), ebwt.nPat());
3507 		for(size_t i = 0; i < os.size(); i++) {
3508 			assert_eq(os[i].length(), ebwt.plen()[i]);
3509 		}
3510 	}
3511 	// Sanity-check the restored version of the Ebwt
3512 	if(sanityCheck && !os.empty()) {
3513 		ebwt.loadIntoMemory(
3514 			0,
3515 			-1, // fw index
3516 			true, // load SA sample
3517 			true, // load ftab
3518 			true, // load rstarts
3519 			!noRefNames,
3520 			startVerbose);
3521 		ebwt.checkOrigs(os, false, false);
3522 		ebwt.evictFromMemory();
3523 	}
3524 	OutputQueue oq(
3525 		*fout,                   // out file buffer
3526 		reorder && nthreads > 1, // whether to reorder when there's >1 thread
3527 		nthreads,                // # threads
3528 		nthreads > 1,            // whether to be thread-safe
3529 		skipReads);              // first read will have this rdid
3530 	{
3531 		Timer _t(cerr, "Time searching: ", timing);
3532 		// Set up penalities
3533 		if(bonusMatch > 0 && !localAlign) {
3534 			cerr << "Warning: Match bonus always = 0 in --end-to-end mode; ignoring user setting" << endl;
3535 			bonusMatch = 0;
3536 		}
3537 		Scoring sc(
3538 			bonusMatch,     // constant reward for match
3539 			penMmcType,     // how to penalize mismatches
3540 			penMmcMax,      // max mm pelanty
3541 			penMmcMin,      // min mm pelanty
3542 			scoreMin,       // min score as function of read len
3543 			nCeil,          // max # Ns as function of read len
3544 			penNType,       // how to penalize Ns in the read
3545 			penN,           // constant if N pelanty is a constant
3546 			penNCatPair,    // whether to concat mates before N filtering
3547 			penRdGapConst,  // constant coeff for read gap cost
3548 			penRfGapConst,  // constant coeff for ref gap cost
3549 			penRdGapLinear, // linear coeff for read gap cost
3550 			penRfGapLinear, // linear coeff for ref gap cost
3551 			gGapBarrier,    // # rows at top/bot only entered diagonally
3552             penCanSplice,   // canonical splicing penalty
3553             penNoncanSplice,// non-canonical splicing penalty
3554             &penIntronLen);  // penalty as to intron length
3555 		EList<size_t> reflens;
3556 		for(size_t i = 0; i < ebwt.nPat(); i++) {
3557 			reflens.push_back(ebwt.plen()[i]);
3558 		}
3559 		EList<string> refnames;
3560 		readEbwtRefnames<index_t>(adjIdxBase, refnames);
3561 		SamConfig samc(
3562 			refnames,               // reference sequence names
3563 			reflens,                // reference sequence lengths
3564 			samTruncQname,          // whether to truncate QNAME to 255 chars
3565 			samOmitSecSeqQual,      // omit SEQ/QUAL for 2ndary alignments?
3566 			samNoUnal,              // omit unaligned-read records?
3567 			string("hisat"),      // program id
3568 			string("hisat"),      // program name
3569 			string(HISAT_VERSION), // program version
3570 			argstr,                 // command-line
3571 			rgs_optflag,            // read-group string
3572             rna_strandness,
3573 			sam_print_as,
3574 			sam_print_xs,
3575 			sam_print_xss,
3576 			sam_print_yn,
3577 			sam_print_xn,
3578 			sam_print_cs,
3579 			sam_print_cq,
3580 			sam_print_x0,
3581 			sam_print_x1,
3582 			sam_print_xm,
3583 			sam_print_xo,
3584 			sam_print_xg,
3585 			sam_print_nm,
3586 			sam_print_md,
3587 			sam_print_yf,
3588 			sam_print_yi,
3589 			sam_print_ym,
3590 			sam_print_yp,
3591 			sam_print_yt,
3592 			sam_print_ys,
3593 			sam_print_zs,
3594 			sam_print_xr,
3595 			sam_print_xt,
3596 			sam_print_xd,
3597 			sam_print_xu,
3598 			sam_print_yl,
3599 			sam_print_ye,
3600 			sam_print_yu,
3601 			sam_print_xp,
3602 			sam_print_yr,
3603 			sam_print_zb,
3604 			sam_print_zr,
3605 			sam_print_zf,
3606 			sam_print_zm,
3607 			sam_print_zi,
3608 			sam_print_zp,
3609 			sam_print_zu,
3610             sam_print_xs_a);
3611 		// Set up hit sink; if sanityCheck && !os.empty() is true,
3612 		// then instruct the sink to "retain" hits in a vector in
3613 		// memory so that we can easily sanity check them later on
3614 		AlnSink<index_t> *mssink = NULL;
3615         Timer *_tRef = new Timer(cerr, "Time loading reference: ", timing);
3616         auto_ptr<BitPairReference> refs(
3617                                         new BitPairReference(
3618                                                              adjIdxBase,
3619                                                              false,
3620                                                              sanityCheck,
3621                                                              NULL,
3622                                                              NULL,
3623                                                              false,
3624                                                              useMm,
3625                                                              useShmem,
3626                                                              mmSweep,
3627                                                              gVerbose,
3628                                                              startVerbose)
3629                                         );
3630         delete _tRef;
3631         if(!refs->loaded()) throw 1;
3632 
3633         init_junction_prob();
3634         bool write = novelSpliceSiteOutfile != "" || useTempSpliceSite;
3635         bool read = knownSpliceSiteInfile != "" || novelSpliceSiteInfile != "" || useTempSpliceSite;
3636         ssdb = new SpliceSiteDB(
3637                                 *(refs.get()),
3638                                 refnames,
3639                                 nthreads > 1, // thread-safe
3640                                 write, // write?
3641                                 read);  // read?
3642         if(ssdb != NULL) {
3643             if(knownSpliceSiteInfile != "") {
3644                 ifstream ssdb_file(knownSpliceSiteInfile.c_str(), ios::in);
3645                 if(ssdb_file.is_open()) {
3646                     ssdb->read(ssdb_file, false); // known splice sites
3647                     ssdb_file.close();
3648                 }
3649             }
3650             if(novelSpliceSiteInfile != "") {
3651                 ifstream ssdb_file(novelSpliceSiteInfile.c_str(), ios::in);
3652                 if(ssdb_file.is_open()) {
3653                     ssdb->read(ssdb_file, true); // novel splice sites
3654                     ssdb_file.close();
3655                 }
3656             }
3657         }
3658 		switch(outType) {
3659 			case OUTPUT_SAM: {
3660 				mssink = new AlnSinkSam<index_t>(
3661 					oq,           // output queue
3662 					samc,         // settings & routines for SAM output
3663 					refnames,     // reference names
3664 					gQuiet,       // don't print alignment summary at end
3665                     ssdb);
3666 #if 0
3667 				if(!samNoHead) {
3668 					bool printHd = true, printSq = true;
3669 					BTString buf;
3670 					samc.printHeader(buf, rgid, rgs, printHd, !samNoSQ, printSq);
3671 					fout->writeString(buf);
3672 				}
3673 #endif
3674 				break;
3675 			}
3676 			default:
3677 				cerr << "Invalid output type: " << outType << endl;
3678 				throw 1;
3679 		}
3680 		if(gVerbose || startVerbose) {
3681 			cerr << "Dispatching to search driver: "; logTime(cerr, true);
3682 		}
3683 		// Set up global constraint
3684 		OutFileBuf *metricsOfb = NULL;
3685 		if(!metricsFile.empty() && metricsIval > 0) {
3686 			metricsOfb = new OutFileBuf(metricsFile);
3687 		}
3688 		// Do the search for all input reads
3689 		assert(patsrc != NULL);
3690 		assert(mssink != NULL);
3691 		multiseedSearch(
3692 			sc,      // scoring scheme
3693 			*patsrc, // pattern source
3694 			*mssink, // hit sink
3695 			ebwt,    // BWT
3696 			*ebwtBw, // BWT'
3697             refs.get(),
3698                         refnames,
3699 			metricsOfb);
3700 		// Evict any loaded indexes from memory
3701 		if(ebwt.isInMemory()) {
3702 			ebwt.evictFromMemory();
3703 		}
3704 		if(ebwtBw != NULL) {
3705 			delete ebwtBw;
3706 		}
3707 		if(!gQuiet && !seedSumm) {
3708 			size_t repThresh = mhits;
3709 			if(repThresh == 0) {
3710 				repThresh = std::numeric_limits<size_t>::max();
3711 			}
3712 #if 0
3713 			mssink->finish(
3714 				repThresh,
3715 				gReportDiscordant,
3716 				gReportMixed,
3717 				hadoopOut);
3718 #endif
3719 		}
3720         if(ssdb != NULL) {
3721             if(novelSpliceSiteOutfile != "") {
3722                 ofstream ssdb_file(novelSpliceSiteOutfile.c_str(), ios::out);
3723                 if(ssdb_file.is_open()) {
3724                     ssdb->print(ssdb_file);
3725                     ssdb_file.close();
3726                 }
3727             }
3728         }
3729 		oq.flush(true);
3730 		assert_eq(oq.numStarted(), oq.numFinished());
3731 		assert_eq(oq.numStarted(), oq.numFlushed());
3732 		delete patsrc;
3733 		delete mssink;
3734         delete ssdb;
3735 		delete metricsOfb;
3736 		if(fout != NULL) {
3737 			delete fout;
3738 		}
3739 	}
3740 }
3741 
3742 // C++ name mangling is disabled for the bowtie() function to make it
3743 // easier to use Bowtie as a library.
3744 extern "C" {
3745 
3746 /**
3747  * Main bowtie entry function.  Parses argc/argv style command-line
3748  * options, sets global configuration variables, and calls the driver()
3749  * function.
3750  */
hisat(int argc,const char ** argv)3751 int hisat(int argc, const char **argv) {
3752 	try {
3753 		// Reset all global state, including getopt state
3754 		opterr = optind = 1;
3755 		resetOptions();
3756 		for(int i = 0; i < argc; i++) {
3757 			argstr += argv[i];
3758 			if(i < argc-1) argstr += " ";
3759 		}
3760 		if(startVerbose) { cerr << "Entered main(): "; logTime(cerr, true); }
3761 		parseOptions(argc, argv);
3762 		argv0 = argv[0];
3763 		if(showVersion) {
3764 			cout << argv0 << " version " << HISAT_VERSION << endl;
3765 			if(sizeof(void*) == 4) {
3766 				cout << "32-bit" << endl;
3767 			} else if(sizeof(void*) == 8) {
3768 				cout << "64-bit" << endl;
3769 			} else {
3770 				cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl;
3771 			}
3772 			cout << "Built on " << BUILD_HOST << endl;
3773 			cout << BUILD_TIME << endl;
3774 			cout << "Compiler: " << COMPILER_VERSION << endl;
3775 			cout << "Options: " << COMPILER_OPTIONS << endl;
3776 			cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {"
3777 				 << sizeof(int)
3778 				 << ", " << sizeof(long) << ", " << sizeof(long long)
3779 				 << ", " << sizeof(void *) << ", " << sizeof(size_t)
3780 				 << ", " << sizeof(off_t) << "}" << endl;
3781 			return 0;
3782 		}
3783 		{
3784 			Timer _t(cerr, "Overall time: ", timing);
3785 			if(startVerbose) {
3786 				cerr << "Parsing index and read arguments: "; logTime(cerr, true);
3787 			}
3788 
3789 			// Get index basename (but only if it wasn't specified via --index)
3790 			if(bt2index.empty()) {
3791 				if(optind >= argc) {
3792 					cerr << "No index, query, or output file specified!" << endl;
3793 					printUsage(cerr);
3794 					return 1;
3795 				}
3796 				bt2index = argv[optind++];
3797 			}
3798 
3799 			// Get query filename
3800 			bool got_reads = !queries.empty() || !mates1.empty() || !mates12.empty();
3801 			if(optind >= argc) {
3802 				if(!got_reads) {
3803 					printUsage(cerr);
3804 					cerr << "***" << endl
3805 					     << "Error: Must specify at least one read input with -U/-1/-2" << endl;
3806 					return 1;
3807 				}
3808 			} else if(!got_reads) {
3809 				// Tokenize the list of query files
3810 				tokenize(argv[optind++], ",", queries);
3811 				if(queries.empty()) {
3812 					cerr << "Tokenized query file list was empty!" << endl;
3813 					printUsage(cerr);
3814 					return 1;
3815 				}
3816 			}
3817 
3818 			// Get output filename
3819 			if(optind < argc && outfile.empty()) {
3820 				outfile = argv[optind++];
3821 				cerr << "Warning: Output file '" << outfile.c_str()
3822 				     << "' was specified without -S.  This will not work in "
3823 					 << "future HISAT 2 versions.  Please use -S instead."
3824 					 << endl;
3825 			}
3826 
3827 			// Extra parametesr?
3828 			if(optind < argc) {
3829 				cerr << "Extra parameter(s) specified: ";
3830 				for(int i = optind; i < argc; i++) {
3831 					cerr << "\"" << argv[i] << "\"";
3832 					if(i < argc-1) cerr << ", ";
3833 				}
3834 				cerr << endl;
3835 				if(mates1.size() > 0) {
3836 					cerr << "Note that if <mates> files are specified using -1/-2, a <singles> file cannot" << endl
3837 						 << "also be specified.  Please run HISAT2 separately for mates and singles." << endl;
3838 				}
3839 				throw 1;
3840 			}
3841 
3842 			// Optionally summarize
3843 			if(gVerbose) {
3844 				cout << "Input bt2 file: \"" << bt2index.c_str() << "\"" << endl;
3845 				cout << "Query inputs (DNA, " << file_format_names[format].c_str() << "):" << endl;
3846 				for(size_t i = 0; i < queries.size(); i++) {
3847 					cout << "  " << queries[i].c_str() << endl;
3848 				}
3849 				cout << "Quality inputs:" << endl;
3850 				for(size_t i = 0; i < qualities.size(); i++) {
3851 					cout << "  " << qualities[i].c_str() << endl;
3852 				}
3853 				cout << "Output file: \"" << outfile.c_str() << "\"" << endl;
3854 				cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl;
3855 				cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl;
3856 			#ifdef NDEBUG
3857 				cout << "Assertions: disabled" << endl;
3858 			#else
3859 				cout << "Assertions: enabled" << endl;
3860 			#endif
3861 			}
3862 			if(ipause) {
3863 				cout << "Press key to continue..." << endl;
3864 				getchar();
3865 			}
3866 			driver<SString<char> >("DNA", bt2index, outfile);
3867 		}
3868 		return 0;
3869 	} catch(std::exception& e) {
3870 		cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
3871 		cerr << "Command: ";
3872 		for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
3873 		cerr << endl;
3874 		return 1;
3875 	} catch(int e) {
3876 		if(e != 0) {
3877 			cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl;
3878 			cerr << "Command: ";
3879 			for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
3880 			cerr << endl;
3881 		}
3882 		return e;
3883 	}
3884 } // bowtie()
3885 } // extern "C"
3886