1 /*
2  * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3  *
4  * This file is part of Bowtie 2.
5  *
6  * Bowtie 2 is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Bowtie 2 is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <string>
21 #include <iostream>
22 #include <sstream>
23 #include <limits>
24 #include "ds.h"
25 #include "aligner_seed_policy.h"
26 #include "mem_ids.h"
27 
28 using namespace std;
29 
parseFuncType(const std::string & otype)30 static int parseFuncType(const std::string& otype) {
31 	string type = otype;
32 	if(type == "C" || type == "Constant") {
33 		return SIMPLE_FUNC_CONST;
34 	} else if(type == "L" || type == "Linear") {
35 		return SIMPLE_FUNC_LINEAR;
36 	} else if(type == "S" || type == "Sqrt") {
37 		return SIMPLE_FUNC_SQRT;
38 	} else if(type == "G" || type == "Log") {
39 		return SIMPLE_FUNC_LOG;
40 	}
41 	std::cerr << "Error: Bad function type '" << otype.c_str()
42 	          << "'.  Should be C (constant), L (linear), "
43 	          << "S (square root) or G (natural log)." << std::endl;
44 	throw 1;
45 }
46 
47 #define PARSE_FUNC(fv) { \
48 	if(ctoks.size() >= 1) { \
49 		fv.setType(parseFuncType(ctoks[0])); \
50 	} \
51 	if(ctoks.size() >= 2) { \
52 		double co; \
53 		istringstream tmpss(ctoks[1]); \
54 		tmpss >> co; \
55 		fv.setConst(co); \
56 	} \
57 	if(ctoks.size() >= 3) { \
58 		double ce; \
59 		istringstream tmpss(ctoks[2]); \
60 		tmpss >> ce; \
61 		fv.setCoeff(ce); \
62 	} \
63 	if(ctoks.size() >= 4) { \
64 		double mn; \
65 		istringstream tmpss(ctoks[3]); \
66 		tmpss >> mn; \
67 		fv.setMin(mn); \
68 	} \
69 	if(ctoks.size() >= 5) { \
70 		double mx; \
71 		istringstream tmpss(ctoks[4]); \
72 		tmpss >> mx; \
73 		fv.setMin(mx); \
74 	} \
75 }
76 
77 /**
78  * Parse alignment policy when provided in this format:
79  * <lab>=<val>;<lab>=<val>;<lab>=<val>...
80  *
81  * And label=value possibilities are:
82  *
83  * Bonus for a match
84  * -----------------
85  *
86  * MA=xx (default: MA=0, or MA=2 if --local is set)
87  *
88  *    xx = Each position where equal read and reference characters match up
89  *         in the alignment contriubtes this amount to the total score.
90  *
91  * Penalty for a mismatch
92  * ----------------------
93  *
94  * MMP={Cxx|Q|RQ} (default: MMP=C6)
95  *
96  *   Cxx = Each mismatch costs xx.  If MMP=Cxx is specified, quality
97  *         values are ignored when assessing penalities for mismatches.
98  *   Q   = Each mismatch incurs a penalty equal to the mismatched base's
99  *         value.
100  *   R   = Each mismatch incurs a penalty equal to the mismatched base's
101  *         rounded quality value.  Qualities are rounded off to the
102  *         nearest 10, and qualities greater than 30 are rounded to 30.
103  *
104  * Penalty for position with N (in either read or reference)
105  * ---------------------------------------------------------
106  *
107  * NP={Cxx|Q|RQ} (default: NP=C1)
108  *
109  *   Cxx = Each alignment position with an N in either the read or the
110  *         reference costs xx.  If NP=Cxx is specified, quality values are
111  *         ignored when assessing penalities for Ns.
112  *   Q   = Each alignment position with an N in either the read or the
113  *         reference incurs a penalty equal to the read base's quality
114  *         value.
115  *   R   = Each alignment position with an N in either the read or the
116  *         reference incurs a penalty equal to the read base's rounded
117  *         quality value.  Qualities are rounded off to the nearest 10,
118  *         and qualities greater than 30 are rounded to 30.
119  *
120  * Penalty for a read gap
121  * ----------------------
122  *
123  * RDG=xx,yy (default: RDG=5,3)
124  *
125  *   xx    = Read gap open penalty.
126  *   yy    = Read gap extension penalty.
127  *
128  * Total cost incurred by a read gap = xx + (yy * gap length)
129  *
130  * Penalty for a reference gap
131  * ---------------------------
132  *
133  * RFG=xx,yy (default: RFG=5,3)
134  *
135  *   xx    = Reference gap open penalty.
136  *   yy    = Reference gap extension penalty.
137  *
138  * Total cost incurred by a reference gap = xx + (yy * gap length)
139  *
140  * Minimum score for valid alignment
141  * ---------------------------------
142  *
143  * MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set)
144  *
145  *   xx,yy = For a read of length N, the total score must be at least
146  *           xx + (read length * yy) for the alignment to be valid.  The
147  *           total score is the sum of all negative penalties (from
148  *           mismatches and gaps) and all positive bonuses.  The minimum
149  *           can be negative (and is by default in global alignment mode).
150  *
151  * Score floor for local alignment
152  * -------------------------------
153  *
154  * FL=xx,yy (defaults: FL=-Infinity,0.0, or FL=0.0,0.0 if --local is set)
155  *
156  *   xx,yy = If a cell in the dynamic programming table has a score less
157  *           than xx + (read length * yy), then no valid alignment can go
158  *           through it.  Defaults are highly recommended.
159  *
160  * N ceiling
161  * ---------
162  *
163  * NCEIL=xx,yy (default: NCEIL=0.0,0.15)
164  *
165  *   xx,yy = For a read of length N, the number of alignment
166  *           positions with an N in either the read or the
167  *           reference cannot exceed
168  *           ceiling = xx + (read length * yy).  If the ceiling is
169  *           exceeded, the alignment is considered invalid.
170  *
171  * Seeds
172  * -----
173  *
174  * SEED=mm,len,ival (default: SEED=0,22)
175  *
176  *   mm   = Maximum number of mismatches allowed within a seed.
177  *          Must be >= 0 and <= 2.  Note that 2-mismatch mode is
178  *          not fully sensitive; i.e. some 2-mismatch seed
179  *          alignments may be missed.
180  *   len  = Length of seed.
181  *   ival = Interval between seeds.  If not specified, seed
182  *          interval is determined by IVAL.
183  *
184  * Seed interval
185  * -------------
186  *
187  * IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0)
188  *
189  *   L  = let interval between seeds be a linear function of the
190  *        read length.  xx and yy are the constant and linear
191  *        coefficients respectively.  In other words, the interval
192  *        equals a * len + b, where len is the read length.
193  *        Intervals less than 1 are rounded up to 1.
194  *   S  = let interval between seeds be a function of the sqaure
195  *        root of the  read length.  xx and yy are the
196  *        coefficients.  In other words, the interval equals
197  *        a * sqrt(len) + b, where len is the read length.
198  *        Intervals less than 1 are rounded up to 1.
199  *   C  = Like S but uses cube root of length instead of square
200  *        root.
201  *
202  * Example 1:
203  *
204  *  SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC:
205  *
206  *  The following seeds are extracted from the forward
207  *  representation of the read and aligned to the reference
208  *  allowing up to 1 mismatch:
209  *
210  *  Read:    TGCTATCGTACGATCGTACA
211  *
212  *  Seed 1+: TGCTATCGTA
213  *  Seed 2+:      TCGTACGATC
214  *  Seed 3+:           CGATCGTACA
215  *
216  *  ...and the following are extracted from the reverse-complement
217  *  representation of the read and align to the reference allowing
218  *  up to 1 mismatch:
219  *
220  *  Seed 1-: TACGATAGCA
221  *  Seed 2-:      GATCGTACGA
222  *  Seed 3-:           TGTACGATCG
223  *
224  * Example 2:
225  *
226  *  SEED=1,20,20 and read sequence is TGCTATCGTACGATC.  The seed
227  *  length is 20 but the read is only 15 characters long.  In this
228  *  case, Bowtie2 automatically shrinks the seed length to be equal
229  *  to the read length.
230  *
231  *  Read:    TGCTATCGTACGATC
232  *
233  *  Seed 1+: TGCTATCGTACGATC
234  *  Seed 1-: GATCGTACGATAGCA
235  *
236  * Example 3:
237  *
238  *  SEED=1,10,10 and read sequence is TGCTATCGTACGATC.  Only one seed
239  *  fits on the read; a second seed would overhang the end of the read
240  *  by 5 positions.  In this case, Bowtie2 extracts one seed.
241  *
242  *  Read:    TGCTATCGTACGATC
243  *
244  *  Seed 1+: TGCTATCGTA
245  *  Seed 1-: TACGATAGCA
246  */
parseString(const std::string & s,bool local,bool noisyHpolymer,bool ignoreQuals,int & bonusMatchType,int & bonusMatch,int & penMmcType,int & penMmcMax,int & penMmcMin,int & penNType,int & penN,int & penRdExConst,int & penRfExConst,int & penRdExLinear,int & penRfExLinear,SimpleFunc & costMin,SimpleFunc & nCeil,bool & nCatPair,int & multiseedMms,int & multiseedLen,SimpleFunc & multiseedIval,size_t & failStreak,size_t & seedRounds)247 void SeedAlignmentPolicy::parseString(
248 	const       std::string& s,
249 	bool        local,
250 	bool        noisyHpolymer,
251 	bool        ignoreQuals,
252 	int&        bonusMatchType,
253 	int&        bonusMatch,
254 	int&        penMmcType,
255 	int&        penMmcMax,
256 	int&        penMmcMin,
257 	int&        penNType,
258 	int&        penN,
259 	int&        penRdExConst,
260 	int&        penRfExConst,
261 	int&        penRdExLinear,
262 	int&        penRfExLinear,
263 	SimpleFunc& costMin,
264 	SimpleFunc& nCeil,
265 	bool&       nCatPair,
266 	int&        multiseedMms,
267 	int&        multiseedLen,
268 	SimpleFunc& multiseedIval,
269 	size_t&     failStreak,
270 	size_t&     seedRounds)
271 {
272 
273 	bonusMatchType    = local ? DEFAULT_MATCH_BONUS_TYPE_LOCAL : DEFAULT_MATCH_BONUS_TYPE;
274 	bonusMatch        = local ? DEFAULT_MATCH_BONUS_LOCAL : DEFAULT_MATCH_BONUS;
275 	penMmcType        = ignoreQuals ? DEFAULT_MM_PENALTY_TYPE_IGNORE_QUALS :
276 	                                  DEFAULT_MM_PENALTY_TYPE;
277 	penMmcMax         = DEFAULT_MM_PENALTY_MAX;
278 	penMmcMin         = DEFAULT_MM_PENALTY_MIN;
279 	penNType          = DEFAULT_N_PENALTY_TYPE;
280 	penN              = DEFAULT_N_PENALTY;
281 
282 	const double DMAX = std::numeric_limits<double>::max();
283 	costMin.init(
284 		local ? SIMPLE_FUNC_LOG : SIMPLE_FUNC_LINEAR,
285 		local ? DEFAULT_MIN_CONST_LOCAL  : DEFAULT_MIN_CONST,
286 		local ? DEFAULT_MIN_LINEAR_LOCAL : DEFAULT_MIN_LINEAR);
287 	nCeil.init(
288 		SIMPLE_FUNC_LINEAR, 0.0f, DMAX,
289 		DEFAULT_N_CEIL_CONST, DEFAULT_N_CEIL_LINEAR);
290 	multiseedIval.init(
291 		DEFAULT_IVAL, 1.0f, DMAX,
292 		DEFAULT_IVAL_B, DEFAULT_IVAL_A);
293 	nCatPair          = DEFAULT_N_CAT_PAIR;
294 
295 	if(!noisyHpolymer) {
296 		penRdExConst  = DEFAULT_READ_GAP_CONST;
297 		penRdExLinear = DEFAULT_READ_GAP_LINEAR;
298 		penRfExConst  = DEFAULT_REF_GAP_CONST;
299 		penRfExLinear = DEFAULT_REF_GAP_LINEAR;
300 	} else {
301 		penRdExConst  = DEFAULT_READ_GAP_CONST_BADHPOLY;
302 		penRdExLinear = DEFAULT_READ_GAP_LINEAR_BADHPOLY;
303 		penRfExConst  = DEFAULT_REF_GAP_CONST_BADHPOLY;
304 		penRfExLinear = DEFAULT_REF_GAP_LINEAR_BADHPOLY;
305 	}
306 
307 	multiseedMms      = DEFAULT_SEEDMMS;
308 	multiseedLen      = gDefaultSeedLen;
309 
310 	EList<string> toks(MISC_CAT);
311 	string tok;
312 	istringstream ss(s);
313 	int setting = 0;
314 	// Get each ;-separated token
315 	while(getline(ss, tok, ';')) {
316 		setting++;
317 		EList<string> etoks(MISC_CAT);
318 		string etok;
319 		// Divide into tokens on either side of =
320 		istringstream ess(tok);
321 		while(getline(ess, etok, '=')) {
322 			etoks.push_back(etok);
323 		}
324 		// Must be exactly 1 =
325 		if(etoks.size() != 2) {
326 			cerr << "Error parsing alignment policy setting " << setting
327 			     << "; must be bisected by = sign" << endl
328 				 << "Policy: " << s.c_str() << endl;
329 			assert(false); throw 1;
330 		}
331 		// LHS is tag, RHS value
332 		string tag = etoks[0], val = etoks[1];
333 		// Separate value into comma-separated tokens
334 		EList<string> ctoks(MISC_CAT);
335 		string ctok;
336 		istringstream css(val);
337 		while(getline(css, ctok, ',')) {
338 			ctoks.push_back(ctok);
339 		}
340 		if(ctoks.size() == 0) {
341 			cerr << "Error parsing alignment policy setting " << setting
342 			     << "; RHS must have at least 1 token" << endl
343 				 << "Policy: " << s.c_str() << endl;
344 			assert(false); throw 1;
345 		}
346 		for(size_t i = 0; i < ctoks.size(); i++) {
347 			if(ctoks[i].length() == 0) {
348 				cerr << "Error parsing alignment policy setting " << setting
349 				     << "; token " << i+1 << " on RHS had length=0" << endl
350 					 << "Policy: " << s.c_str() << endl;
351 				assert(false); throw 1;
352 			}
353 		}
354 		// Bonus for a match
355 		// MA=xx (default: MA=0, or MA=10 if --local is set)
356 		if(tag == "MA") {
357 			if(ctoks.size() != 1) {
358 				cerr << "Error parsing alignment policy setting " << setting
359 				     << "; RHS must have 1 token" << endl
360 					 << "Policy: " << s.c_str() << endl;
361 				assert(false); throw 1;
362 			}
363 			string tmp = ctoks[0];
364 			istringstream tmpss(tmp);
365 			tmpss >> bonusMatch;
366 		}
367 		// Scoring for mismatches
368 		// MMP={Cxx|Q|RQ}
369 		//        Cxx = constant, where constant is integer xx
370 		//        Qxx = equal to quality, scaled
371 		//        R   = equal to maq-rounded quality value (rounded to nearest
372 		//              10, can't be greater than 30)
373 		else if(tag == "MMP") {
374 			if(ctoks.size() > 3) {
375 				cerr << "Error parsing alignment policy setting "
376 				     << "'" << tag.c_str() << "'"
377 				     << "; RHS must have at most 3 tokens" << endl
378 					 << "Policy: '" << s.c_str() << "'" << endl;
379 				assert(false); throw 1;
380 			}
381 			if(ctoks[0][0] == 'C') {
382 				string tmp = ctoks[0].substr(1);
383 				// Parse constant penalty
384 				istringstream tmpss(tmp);
385 				tmpss >> penMmcMax;
386 				penMmcMin = penMmcMax;
387 				// Parse constant penalty
388 				penMmcType = COST_MODEL_CONSTANT;
389 			} else if(ctoks[0][0] == 'Q') {
390 				if(ctoks.size() >= 2) {
391 					string tmp = ctoks[1];
392 					istringstream tmpss(tmp);
393 					tmpss >> penMmcMax;
394 				} else {
395 					penMmcMax = DEFAULT_MM_PENALTY_MAX;
396 				}
397 				if(ctoks.size() >= 3) {
398 					string tmp = ctoks[2];
399 					istringstream tmpss(tmp);
400 					tmpss >> penMmcMin;
401 				} else {
402 					penMmcMin = DEFAULT_MM_PENALTY_MIN;
403 				}
404 				if(penMmcMin > penMmcMax) {
405 					cerr << "Error: Maximum mismatch penalty (" << penMmcMax
406 					     << ") is less than minimum penalty (" << penMmcMin
407 						 << endl;
408 					throw 1;
409 				}
410 				// Set type to =quality
411 				penMmcType = COST_MODEL_QUAL;
412 				/* The match penalty may get set before
413 				 * the --ignore-quals parameter gets parsed.
414 				 * Since the match penalty policy depends on
415 				 * the whether or not --ignore-quals was set
416 				 * we do a check here and adjust the parameter
417 				 * accordingly.
418 				 */
419 				if (ignoreQuals) {
420 					if (gVerbose)
421 						cerr << "Changing MMP=Q," << penMmcMax << " to ";
422 					penMmcMin = penMmcMax;
423 					penMmcType = COST_MODEL_CONSTANT;
424 					if (gVerbose) {
425 						cerr << "MMP=C," << penMmcMax
426 						     << " because of --ignore-quals"
427 						     << endl;
428 					}
429 
430 				}
431 			} else if(ctoks[0][0] == 'R') {
432 				// Set type to=Maq-quality
433 				penMmcType = COST_MODEL_ROUNDED_QUAL;
434 			} else {
435 				cerr << "Error parsing alignment policy setting "
436 				     << "'" << tag.c_str() << "'"
437 				     << "; RHS must start with C, Q or R" << endl
438 					 << "Policy: '" << s.c_str() << "'" << endl;
439 				assert(false); throw 1;
440 			}
441 		}
442 		// Scoring for mismatches where read char=N
443 		// NP={Cxx|Q|RQ}
444 		//        Cxx = constant, where constant is integer xx
445 		//        Q   = equal to quality
446 		//        R   = equal to maq-rounded quality value (rounded to nearest
447 		//              10, can't be greater than 30)
448 		else if(tag == "NP") {
449 			if(ctoks.size() != 1) {
450 				cerr << "Error parsing alignment policy setting "
451 				     << "'" << tag.c_str() << "'"
452 				     << "; RHS must have 1 token" << endl
453 					 << "Policy: '" << s.c_str() << "'" << endl;
454 				assert(false); throw 1;
455 			}
456 			if(ctoks[0][0] == 'C') {
457 				string tmp = ctoks[0].substr(1);
458 				// Parse constant penalty
459 				istringstream tmpss(tmp);
460 				tmpss >> penN;
461 				// Parse constant penalty
462 				penNType = COST_MODEL_CONSTANT;
463 			} else if(ctoks[0][0] == 'Q') {
464 				// Set type to =quality
465 				penNType = COST_MODEL_QUAL;
466 			} else if(ctoks[0][0] == 'R') {
467 				// Set type to=Maq-quality
468 				penNType = COST_MODEL_ROUNDED_QUAL;
469 			} else {
470 				cerr << "Error parsing alignment policy setting "
471 				     << "'" << tag.c_str() << "'"
472 				     << "; RHS must start with C, Q or R" << endl
473 					 << "Policy: '" << s.c_str() << "'" << endl;
474 				assert(false); throw 1;
475 			}
476 		}
477 		// Scoring for read gaps
478 		// RDG=xx,yy,zz
479 		//        xx = read gap open penalty
480 		//        yy = read gap extension penalty constant coefficient
481 		//             (defaults to open penalty)
482 		//        zz = read gap extension penalty linear coefficient
483 		//             (defaults to 0)
484 		else if(tag == "RDG") {
485 			if(ctoks.size() >= 1) {
486 				istringstream tmpss(ctoks[0]);
487 				tmpss >> penRdExConst;
488 			} else {
489 				penRdExConst = noisyHpolymer ?
490 					DEFAULT_READ_GAP_CONST_BADHPOLY :
491 					DEFAULT_READ_GAP_CONST;
492 			}
493 			if(ctoks.size() >= 2) {
494 				istringstream tmpss(ctoks[1]);
495 				tmpss >> penRdExLinear;
496 			} else {
497 				penRdExLinear = noisyHpolymer ?
498 					DEFAULT_READ_GAP_LINEAR_BADHPOLY :
499 					DEFAULT_READ_GAP_LINEAR;
500 			}
501 		}
502 		// Scoring for reference gaps
503 		// RFG=xx,yy,zz
504 		//        xx = ref gap open penalty
505 		//        yy = ref gap extension penalty constant coefficient
506 		//             (defaults to open penalty)
507 		//        zz = ref gap extension penalty linear coefficient
508 		//             (defaults to 0)
509 		else if(tag == "RFG") {
510 			if(ctoks.size() >= 1) {
511 				istringstream tmpss(ctoks[0]);
512 				tmpss >> penRfExConst;
513 			} else {
514 				penRfExConst = noisyHpolymer ?
515 					DEFAULT_REF_GAP_CONST_BADHPOLY :
516 					DEFAULT_REF_GAP_CONST;
517 			}
518 			if(ctoks.size() >= 2) {
519 				istringstream tmpss(ctoks[1]);
520 				tmpss >> penRfExLinear;
521 			} else {
522 				penRfExLinear = noisyHpolymer ?
523 					DEFAULT_REF_GAP_LINEAR_BADHPOLY :
524 					DEFAULT_REF_GAP_LINEAR;
525 			}
526 		}
527 		// Minimum score as a function of read length
528 		// MIN=xx,yy
529 		//        xx = constant coefficient
530 		//        yy = linear coefficient
531 		else if(tag == "MIN") {
532 			PARSE_FUNC(costMin);
533 		}
534 		// Per-read N ceiling as a function of read length
535 		// NCEIL=xx,yy
536 		//        xx = N ceiling constant coefficient
537 		//        yy = N ceiling linear coefficient (set to 0 if unspecified)
538 		else if(tag == "NCEIL") {
539 			PARSE_FUNC(nCeil);
540 		}
541 		/*
542 		 * Seeds
543 		 * -----
544 		 *
545 		 * SEED=mm,len,ival (default: SEED=0,22)
546 		 *
547 		 *   mm   = Maximum number of mismatches allowed within a seed.
548 		 *          Must be >= 0 and <= 2.  Note that 2-mismatch mode is
549 		 *          not fully sensitive; i.e. some 2-mismatch seed
550 		 *          alignments may be missed.
551 		 *   len  = Length of seed.
552 		 *   ival = Interval between seeds.  If not specified, seed
553 		 *          interval is determined by IVAL.
554 		 */
555 		else if(tag == "SEED") {
556 			if(ctoks.size() > 1) {
557 				cerr << "Error parsing alignment policy setting "
558 				     << "'" << tag.c_str() << "'; RHS must have 1 token, "
559 					 << "had " << ctoks.size() << ".  "
560 					 << "Policy: '" << s.c_str() << "'" << endl;
561 				assert(false); throw 1;
562 			}
563 			if(ctoks.size() >= 1) {
564 				istringstream tmpss(ctoks[0]);
565 				tmpss >> multiseedMms;
566 				if(multiseedMms > 1) {
567 					cerr << "Error: -N was set to " << multiseedMms << ", but cannot be set greater than 1" << endl;
568 					throw 1;
569 				}
570 				if(multiseedMms < 0) {
571 					cerr << "Error: -N was set to a number less than 0 (" << multiseedMms << ")" << endl;
572 					throw 1;
573 				}
574 			}
575 		}
576 		else if(tag == "SEEDLEN") {
577 			if(ctoks.size() > 1) {
578 				cerr << "Error parsing alignment policy setting "
579 				     << "'" << tag.c_str() << "'; RHS must have 1 token, "
580 					 << "had " << ctoks.size() << ".  "
581 					 << "Policy: '" << s.c_str() << "'" << endl;
582 				assert(false); throw 1;
583 			}
584 			if(ctoks.size() >= 1) {
585 				istringstream tmpss(ctoks[0]);
586 				tmpss >> multiseedLen;
587 			}
588 		}
589 		else if(tag == "DPS") {
590 			if(ctoks.size() > 1) {
591 				cerr << "Error parsing alignment policy setting "
592 				     << "'" << tag.c_str() << "'; RHS must have 1 token, "
593 					 << "had " << ctoks.size() << ".  "
594 					 << "Policy: '" << s.c_str() << "'" << endl;
595 				assert(false); throw 1;
596 			}
597 			if(ctoks.size() >= 1) {
598 				istringstream tmpss(ctoks[0]);
599 				tmpss >> failStreak;
600 			}
601 		}
602 		else if(tag == "ROUNDS") {
603 			if(ctoks.size() > 1) {
604 				cerr << "Error parsing alignment policy setting "
605 				     << "'" << tag.c_str() << "'; RHS must have 1 token, "
606 					 << "had " << ctoks.size() << ".  "
607 					 << "Policy: '" << s.c_str() << "'" << endl;
608 				assert(false); throw 1;
609 			}
610 			if(ctoks.size() >= 1) {
611 				istringstream tmpss(ctoks[0]);
612 				tmpss >> seedRounds;
613 			}
614 		}
615 		/*
616 		 * Seed interval
617 		 * -------------
618 		 *
619 		 * IVAL={L|S|C},a,b (default: IVAL=S,1.0,0.0)
620 		 *
621 		 *   L  = let interval between seeds be a linear function of the
622 		 *        read length.  xx and yy are the constant and linear
623 		 *        coefficients respectively.  In other words, the interval
624 		 *        equals a * len + b, where len is the read length.
625 		 *        Intervals less than 1 are rounded up to 1.
626 		 *   S  = let interval between seeds be a function of the sqaure
627 		 *        root of the  read length.  xx and yy are the
628 		 *        coefficients.  In other words, the interval equals
629 		 *        a * sqrt(len) + b, where len is the read length.
630 		 *        Intervals less than 1 are rounded up to 1.
631 		 *   C  = Like S but uses cube root of length instead of square
632 		 *        root.
633 		 */
634 		else if(tag == "IVAL") {
635 			PARSE_FUNC(multiseedIval);
636 		}
637 		else {
638 			// Unknown tag
639 			cerr << "Unexpected alignment policy setting "
640 				 << "'" << tag.c_str() << "'" << endl
641 				 << "Policy: '" << s.c_str() << "'" << endl;
642 			assert(false); throw 1;
643 		}
644 	}
645 }
646 
647 #ifdef ALIGNER_SEED_POLICY_MAIN
main()648 int main() {
649 
650 	int bonusMatchType;
651 	int bonusMatch;
652 	int penMmcType;
653 	int penMmc;
654 	int penNType;
655 	int penN;
656 	int penRdExConst;
657 	int penRfExConst;
658 	int penRdExLinear;
659 	int penRfExLinear;
660 	SimpleFunc costMin;
661 	SimpleFunc costFloor;
662 	SimpleFunc nCeil;
663 	bool nCatPair;
664 	int multiseedMms;
665 	int multiseedLen;
666 	SimpleFunc msIval;
667 	SimpleFunc posfrac;
668 	SimpleFunc rowmult;
669 	uint32_t mhits;
670 
671 	{
672 		cout << "Case 1: Defaults 1 ... ";
673 		const char *pol = "";
674 		SeedAlignmentPolicy::parseString(
675 			string(pol),
676 			false,              // --local?
677 			false,              // noisy homopolymers a la 454?
678 			false,              // ignore qualities?
679 			bonusMatchType,
680 			bonusMatch,
681 			penMmcType,
682 			penMmc,
683 			penNType,
684 			penN,
685 			penRdExConst,
686 			penRfExConst,
687 			penRdExLinear,
688 			penRfExLinear,
689 			costMin,
690 			costFloor,
691 			nCeil,
692 			nCatPair,
693 			multiseedMms,
694 			multiseedLen,
695 			msIval,
696 			mhits);
697 
698 		assert_eq(DEFAULT_MATCH_BONUS_TYPE,   bonusMatchType);
699 		assert_eq(DEFAULT_MATCH_BONUS,        bonusMatch);
700 		assert_eq(DEFAULT_MM_PENALTY_TYPE,    penMmcType);
701 		assert_eq(DEFAULT_MM_PENALTY_MAX,     penMmcMax);
702 		assert_eq(DEFAULT_MM_PENALTY_MIN,     penMmcMin);
703 		assert_eq(DEFAULT_N_PENALTY_TYPE,     penNType);
704 		assert_eq(DEFAULT_N_PENALTY,          penN);
705 		assert_eq(DEFAULT_MIN_CONST,          costMin.getConst());
706 		assert_eq(DEFAULT_MIN_LINEAR,         costMin.getCoeff());
707 		assert_eq(DEFAULT_FLOOR_CONST,        costFloor.getConst());
708 		assert_eq(DEFAULT_FLOOR_LINEAR,       costFloor.getCoeff());
709 		assert_eq(DEFAULT_N_CEIL_CONST,       nCeil.getConst());
710 		assert_eq(DEFAULT_N_CAT_PAIR,         nCatPair);
711 
712 		assert_eq(DEFAULT_READ_GAP_CONST,     penRdExConst);
713 		assert_eq(DEFAULT_READ_GAP_LINEAR,    penRdExLinear);
714 		assert_eq(DEFAULT_REF_GAP_CONST,      penRfExConst);
715 		assert_eq(DEFAULT_REF_GAP_LINEAR,     penRfExLinear);
716 		assert_eq(DEFAULT_SEEDMMS,            multiseedMms);
717 		assert_eq(DEFAULT_IVAL,               msIval.getType());
718 		assert_eq(DEFAULT_IVAL_A,             msIval.getCoeff());
719 		assert_eq(DEFAULT_IVAL_B,             msIval.getConst());
720 
721 		cout << "PASSED" << endl;
722 	}
723 
724 	{
725 		cout << "Case 2: Defaults 2 ... ";
726 		const char *pol = "";
727 		SeedAlignmentPolicy::parseString(
728 			string(pol),
729 			false,              // --local?
730 			true,               // noisy homopolymers a la 454?
731 			false,              // ignore qualities?
732 			bonusMatchType,
733 			bonusMatch,
734 			penMmcType,
735 			penMmc,
736 			penNType,
737 			penN,
738 			penRdExConst,
739 			penRfExConst,
740 			penRdExLinear,
741 			penRfExLinear,
742 			costMin,
743 			costFloor,
744 			nCeil,
745 			nCatPair,
746 			multiseedMms,
747 			multiseedLen,
748 			msIval,
749 			mhits);
750 
751 		assert_eq(DEFAULT_MATCH_BONUS_TYPE,   bonusMatchType);
752 		assert_eq(DEFAULT_MATCH_BONUS,        bonusMatch);
753 		assert_eq(DEFAULT_MM_PENALTY_TYPE,    penMmcType);
754 		assert_eq(DEFAULT_MM_PENALTY_MAX,     penMmc);
755 		assert_eq(DEFAULT_MM_PENALTY_MIN,     penMmc);
756 		assert_eq(DEFAULT_N_PENALTY_TYPE,     penNType);
757 		assert_eq(DEFAULT_N_PENALTY,          penN);
758 		assert_eq(DEFAULT_MIN_CONST,          costMin.getConst());
759 		assert_eq(DEFAULT_MIN_LINEAR,         costMin.getCoeff());
760 		assert_eq(DEFAULT_FLOOR_CONST,        costFloor.getConst());
761 		assert_eq(DEFAULT_FLOOR_LINEAR,       costFloor.getCoeff());
762 		assert_eq(DEFAULT_N_CEIL_CONST,       nCeil.getConst());
763 		assert_eq(DEFAULT_N_CAT_PAIR,         nCatPair);
764 
765 		assert_eq(DEFAULT_READ_GAP_CONST_BADHPOLY,  penRdExConst);
766 		assert_eq(DEFAULT_READ_GAP_LINEAR_BADHPOLY, penRdExLinear);
767 		assert_eq(DEFAULT_REF_GAP_CONST_BADHPOLY,   penRfExConst);
768 		assert_eq(DEFAULT_REF_GAP_LINEAR_BADHPOLY,  penRfExLinear);
769 		assert_eq(DEFAULT_SEEDMMS,            multiseedMms);
770 		assert_eq(DEFAULT_IVAL,               msIval.getType());
771 		assert_eq(DEFAULT_IVAL_A,             msIval.getCoeff());
772 		assert_eq(DEFAULT_IVAL_B,             msIval.getConst());
773 
774 		cout << "PASSED" << endl;
775 	}
776 
777 	{
778 		cout << "Case 3: Defaults 3 ... ";
779 		const char *pol = "";
780 		SeedAlignmentPolicy::parseString(
781 			string(pol),
782 			true,               // --local?
783 			false,              // noisy homopolymers a la 454?
784 			false,              // ignore qualities?
785 			bonusMatchType,
786 			bonusMatch,
787 			penMmcType,
788 			penMmc,
789 			penNType,
790 			penN,
791 			penRdExConst,
792 			penRfExConst,
793 			penRdExLinear,
794 			penRfExLinear,
795 			costMin,
796 			costFloor,
797 			nCeil,
798 			nCatPair,
799 			multiseedMms,
800 			multiseedLen,
801 			msIval,
802 			mhits);
803 
804 		assert_eq(DEFAULT_MATCH_BONUS_TYPE_LOCAL,   bonusMatchType);
805 		assert_eq(DEFAULT_MATCH_BONUS_LOCAL,        bonusMatch);
806 		assert_eq(DEFAULT_MM_PENALTY_TYPE,    penMmcType);
807 		assert_eq(DEFAULT_MM_PENALTY_MAX,     penMmcMax);
808 		assert_eq(DEFAULT_MM_PENALTY_MIN,     penMmcMin);
809 		assert_eq(DEFAULT_N_PENALTY_TYPE,     penNType);
810 		assert_eq(DEFAULT_N_PENALTY,          penN);
811 		assert_eq(DEFAULT_MIN_CONST_LOCAL,    costMin.getConst());
812 		assert_eq(DEFAULT_MIN_LINEAR_LOCAL,   costMin.getCoeff());
813 		assert_eq(DEFAULT_FLOOR_CONST_LOCAL,  costFloor.getConst());
814 		assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
815 		assert_eq(DEFAULT_N_CEIL_CONST,       nCeil.getConst());
816 		assert_eq(DEFAULT_N_CEIL_LINEAR,      nCeil.getCoeff());
817 		assert_eq(DEFAULT_N_CAT_PAIR,         nCatPair);
818 
819 		assert_eq(DEFAULT_READ_GAP_CONST,     penRdExConst);
820 		assert_eq(DEFAULT_READ_GAP_LINEAR,    penRdExLinear);
821 		assert_eq(DEFAULT_REF_GAP_CONST,      penRfExConst);
822 		assert_eq(DEFAULT_REF_GAP_LINEAR,     penRfExLinear);
823 		assert_eq(DEFAULT_SEEDMMS,            multiseedMms);
824 		assert_eq(DEFAULT_IVAL,               msIval.getType());
825 		assert_eq(DEFAULT_IVAL_A,             msIval.getCoeff());
826 		assert_eq(DEFAULT_IVAL_B,             msIval.getConst());
827 
828 		cout << "PASSED" << endl;
829 	}
830 
831 	{
832 		cout << "Case 4: Simple string 1 ... ";
833 		const char *pol = "MMP=C44;MA=4;RFG=24,12;FL=C,8;RDG=2;NP=C4;MIN=C,7";
834 		SeedAlignmentPolicy::parseString(
835 			string(pol),
836 			true,               // --local?
837 			false,              // noisy homopolymers a la 454?
838 			false,              // ignore qualities?
839 			bonusMatchType,
840 			bonusMatch,
841 			penMmcType,
842 			penMmc,
843 			penNType,
844 			penN,
845 			penRdExConst,
846 			penRfExConst,
847 			penRdExLinear,
848 			penRfExLinear,
849 			costMin,
850 			costFloor,
851 			nCeil,
852 			nCatPair,
853 			multiseedMms,
854 			multiseedLen,
855 			msIval,
856 			mhits);
857 
858 		assert_eq(COST_MODEL_CONSTANT,        bonusMatchType);
859 		assert_eq(4,                          bonusMatch);
860 		assert_eq(COST_MODEL_CONSTANT,        penMmcType);
861 		assert_eq(44,                         penMmc);
862 		assert_eq(COST_MODEL_CONSTANT,        penNType);
863 		assert_eq(4.0f,                       penN);
864 		assert_eq(7,                          costMin.getConst());
865 		assert_eq(DEFAULT_MIN_LINEAR_LOCAL,   costMin.getCoeff());
866 		assert_eq(8,                          costFloor.getConst());
867 		assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
868 		assert_eq(DEFAULT_N_CEIL_CONST,       nCeil.getConst());
869 		assert_eq(DEFAULT_N_CEIL_LINEAR,      nCeil.getCoeff());
870 		assert_eq(DEFAULT_N_CAT_PAIR,         nCatPair);
871 
872 		assert_eq(2.0f,                       penRdExConst);
873 		assert_eq(DEFAULT_READ_GAP_LINEAR,    penRdExLinear);
874 		assert_eq(24.0f,                      penRfExConst);
875 		assert_eq(12.0f,                      penRfExLinear);
876 		assert_eq(DEFAULT_SEEDMMS,            multiseedMms);
877 		assert_eq(DEFAULT_IVAL,               msIval.getType());
878 		assert_eq(DEFAULT_IVAL_A,             msIval.getCoeff());
879 		assert_eq(DEFAULT_IVAL_B,             msIval.getConst());
880 
881 		cout << "PASSED" << endl;
882 	}
883 }
884 #endif /*def ALIGNER_SEED_POLICY_MAIN*/
885