1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <string>
21 #include <iostream>
22 #include <sstream>
23 #include <limits>
24 #include "ds.h"
25 #include "aligner_seed_policy.h"
26 #include "mem_ids.h"
27
28 using namespace std;
29
parseFuncType(const std::string & otype)30 static int parseFuncType(const std::string& otype) {
31 string type = otype;
32 if(type == "C" || type == "Constant") {
33 return SIMPLE_FUNC_CONST;
34 } else if(type == "L" || type == "Linear") {
35 return SIMPLE_FUNC_LINEAR;
36 } else if(type == "S" || type == "Sqrt") {
37 return SIMPLE_FUNC_SQRT;
38 } else if(type == "G" || type == "Log") {
39 return SIMPLE_FUNC_LOG;
40 }
41 std::cerr << "Error: Bad function type '" << otype.c_str()
42 << "'. Should be C (constant), L (linear), "
43 << "S (square root) or G (natural log)." << std::endl;
44 throw 1;
45 }
46
47 #define PARSE_FUNC(fv) { \
48 if(ctoks.size() >= 1) { \
49 fv.setType(parseFuncType(ctoks[0])); \
50 } \
51 if(ctoks.size() >= 2) { \
52 double co; \
53 istringstream tmpss(ctoks[1]); \
54 tmpss >> co; \
55 fv.setConst(co); \
56 } \
57 if(ctoks.size() >= 3) { \
58 double ce; \
59 istringstream tmpss(ctoks[2]); \
60 tmpss >> ce; \
61 fv.setCoeff(ce); \
62 } \
63 if(ctoks.size() >= 4) { \
64 double mn; \
65 istringstream tmpss(ctoks[3]); \
66 tmpss >> mn; \
67 fv.setMin(mn); \
68 } \
69 if(ctoks.size() >= 5) { \
70 double mx; \
71 istringstream tmpss(ctoks[4]); \
72 tmpss >> mx; \
73 fv.setMin(mx); \
74 } \
75 }
76
77 /**
78 * Parse alignment policy when provided in this format:
79 * <lab>=<val>;<lab>=<val>;<lab>=<val>...
80 *
81 * And label=value possibilities are:
82 *
83 * Bonus for a match
84 * -----------------
85 *
86 * MA=xx (default: MA=0, or MA=2 if --local is set)
87 *
88 * xx = Each position where equal read and reference characters match up
89 * in the alignment contriubtes this amount to the total score.
90 *
91 * Penalty for a mismatch
92 * ----------------------
93 *
94 * MMP={Cxx|Q|RQ} (default: MMP=C6)
95 *
96 * Cxx = Each mismatch costs xx. If MMP=Cxx is specified, quality
97 * values are ignored when assessing penalities for mismatches.
98 * Q = Each mismatch incurs a penalty equal to the mismatched base's
99 * value.
100 * R = Each mismatch incurs a penalty equal to the mismatched base's
101 * rounded quality value. Qualities are rounded off to the
102 * nearest 10, and qualities greater than 30 are rounded to 30.
103 *
104 * Penalty for position with N (in either read or reference)
105 * ---------------------------------------------------------
106 *
107 * NP={Cxx|Q|RQ} (default: NP=C1)
108 *
109 * Cxx = Each alignment position with an N in either the read or the
110 * reference costs xx. If NP=Cxx is specified, quality values are
111 * ignored when assessing penalities for Ns.
112 * Q = Each alignment position with an N in either the read or the
113 * reference incurs a penalty equal to the read base's quality
114 * value.
115 * R = Each alignment position with an N in either the read or the
116 * reference incurs a penalty equal to the read base's rounded
117 * quality value. Qualities are rounded off to the nearest 10,
118 * and qualities greater than 30 are rounded to 30.
119 *
120 * Penalty for a read gap
121 * ----------------------
122 *
123 * RDG=xx,yy (default: RDG=5,3)
124 *
125 * xx = Read gap open penalty.
126 * yy = Read gap extension penalty.
127 *
128 * Total cost incurred by a read gap = xx + (yy * gap length)
129 *
130 * Penalty for a reference gap
131 * ---------------------------
132 *
133 * RFG=xx,yy (default: RFG=5,3)
134 *
135 * xx = Reference gap open penalty.
136 * yy = Reference gap extension penalty.
137 *
138 * Total cost incurred by a reference gap = xx + (yy * gap length)
139 *
140 * Minimum score for valid alignment
141 * ---------------------------------
142 *
143 * MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set)
144 *
145 * xx,yy = For a read of length N, the total score must be at least
146 * xx + (read length * yy) for the alignment to be valid. The
147 * total score is the sum of all negative penalties (from
148 * mismatches and gaps) and all positive bonuses. The minimum
149 * can be negative (and is by default in global alignment mode).
150 *
151 * Score floor for local alignment
152 * -------------------------------
153 *
154 * FL=xx,yy (defaults: FL=-Infinity,0.0, or FL=0.0,0.0 if --local is set)
155 *
156 * xx,yy = If a cell in the dynamic programming table has a score less
157 * than xx + (read length * yy), then no valid alignment can go
158 * through it. Defaults are highly recommended.
159 *
160 * N ceiling
161 * ---------
162 *
163 * NCEIL=xx,yy (default: NCEIL=0.0,0.15)
164 *
165 * xx,yy = For a read of length N, the number of alignment
166 * positions with an N in either the read or the
167 * reference cannot exceed
168 * ceiling = xx + (read length * yy). If the ceiling is
169 * exceeded, the alignment is considered invalid.
170 *
171 * Seeds
172 * -----
173 *
174 * SEED=mm,len,ival (default: SEED=0,22)
175 *
176 * mm = Maximum number of mismatches allowed within a seed.
177 * Must be >= 0 and <= 2. Note that 2-mismatch mode is
178 * not fully sensitive; i.e. some 2-mismatch seed
179 * alignments may be missed.
180 * len = Length of seed.
181 * ival = Interval between seeds. If not specified, seed
182 * interval is determined by IVAL.
183 *
184 * Seed interval
185 * -------------
186 *
187 * IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0)
188 *
189 * L = let interval between seeds be a linear function of the
190 * read length. xx and yy are the constant and linear
191 * coefficients respectively. In other words, the interval
192 * equals a * len + b, where len is the read length.
193 * Intervals less than 1 are rounded up to 1.
194 * S = let interval between seeds be a function of the sqaure
195 * root of the read length. xx and yy are the
196 * coefficients. In other words, the interval equals
197 * a * sqrt(len) + b, where len is the read length.
198 * Intervals less than 1 are rounded up to 1.
199 * C = Like S but uses cube root of length instead of square
200 * root.
201 *
202 * Example 1:
203 *
204 * SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC:
205 *
206 * The following seeds are extracted from the forward
207 * representation of the read and aligned to the reference
208 * allowing up to 1 mismatch:
209 *
210 * Read: TGCTATCGTACGATCGTACA
211 *
212 * Seed 1+: TGCTATCGTA
213 * Seed 2+: TCGTACGATC
214 * Seed 3+: CGATCGTACA
215 *
216 * ...and the following are extracted from the reverse-complement
217 * representation of the read and align to the reference allowing
218 * up to 1 mismatch:
219 *
220 * Seed 1-: TACGATAGCA
221 * Seed 2-: GATCGTACGA
222 * Seed 3-: TGTACGATCG
223 *
224 * Example 2:
225 *
226 * SEED=1,20,20 and read sequence is TGCTATCGTACGATC. The seed
227 * length is 20 but the read is only 15 characters long. In this
228 * case, Bowtie2 automatically shrinks the seed length to be equal
229 * to the read length.
230 *
231 * Read: TGCTATCGTACGATC
232 *
233 * Seed 1+: TGCTATCGTACGATC
234 * Seed 1-: GATCGTACGATAGCA
235 *
236 * Example 3:
237 *
238 * SEED=1,10,10 and read sequence is TGCTATCGTACGATC. Only one seed
239 * fits on the read; a second seed would overhang the end of the read
240 * by 5 positions. In this case, Bowtie2 extracts one seed.
241 *
242 * Read: TGCTATCGTACGATC
243 *
244 * Seed 1+: TGCTATCGTA
245 * Seed 1-: TACGATAGCA
246 */
parseString(const std::string & s,bool local,bool noisyHpolymer,bool ignoreQuals,int & bonusMatchType,int & bonusMatch,int & penMmcType,int & penMmcMax,int & penMmcMin,int & penNType,int & penN,int & penRdExConst,int & penRfExConst,int & penRdExLinear,int & penRfExLinear,SimpleFunc & costMin,SimpleFunc & nCeil,bool & nCatPair,int & multiseedMms,int & multiseedLen,SimpleFunc & multiseedIval,size_t & failStreak,size_t & seedRounds)247 void SeedAlignmentPolicy::parseString(
248 const std::string& s,
249 bool local,
250 bool noisyHpolymer,
251 bool ignoreQuals,
252 int& bonusMatchType,
253 int& bonusMatch,
254 int& penMmcType,
255 int& penMmcMax,
256 int& penMmcMin,
257 int& penNType,
258 int& penN,
259 int& penRdExConst,
260 int& penRfExConst,
261 int& penRdExLinear,
262 int& penRfExLinear,
263 SimpleFunc& costMin,
264 SimpleFunc& nCeil,
265 bool& nCatPair,
266 int& multiseedMms,
267 int& multiseedLen,
268 SimpleFunc& multiseedIval,
269 size_t& failStreak,
270 size_t& seedRounds)
271 {
272
273 bonusMatchType = local ? DEFAULT_MATCH_BONUS_TYPE_LOCAL : DEFAULT_MATCH_BONUS_TYPE;
274 bonusMatch = local ? DEFAULT_MATCH_BONUS_LOCAL : DEFAULT_MATCH_BONUS;
275 penMmcType = ignoreQuals ? DEFAULT_MM_PENALTY_TYPE_IGNORE_QUALS :
276 DEFAULT_MM_PENALTY_TYPE;
277 penMmcMax = DEFAULT_MM_PENALTY_MAX;
278 penMmcMin = DEFAULT_MM_PENALTY_MIN;
279 penNType = DEFAULT_N_PENALTY_TYPE;
280 penN = DEFAULT_N_PENALTY;
281
282 const double DMAX = std::numeric_limits<double>::max();
283 costMin.init(
284 local ? SIMPLE_FUNC_LOG : SIMPLE_FUNC_LINEAR,
285 local ? DEFAULT_MIN_CONST_LOCAL : DEFAULT_MIN_CONST,
286 local ? DEFAULT_MIN_LINEAR_LOCAL : DEFAULT_MIN_LINEAR);
287 nCeil.init(
288 SIMPLE_FUNC_LINEAR, 0.0f, DMAX,
289 DEFAULT_N_CEIL_CONST, DEFAULT_N_CEIL_LINEAR);
290 multiseedIval.init(
291 DEFAULT_IVAL, 1.0f, DMAX,
292 DEFAULT_IVAL_B, DEFAULT_IVAL_A);
293 nCatPair = DEFAULT_N_CAT_PAIR;
294
295 if(!noisyHpolymer) {
296 penRdExConst = DEFAULT_READ_GAP_CONST;
297 penRdExLinear = DEFAULT_READ_GAP_LINEAR;
298 penRfExConst = DEFAULT_REF_GAP_CONST;
299 penRfExLinear = DEFAULT_REF_GAP_LINEAR;
300 } else {
301 penRdExConst = DEFAULT_READ_GAP_CONST_BADHPOLY;
302 penRdExLinear = DEFAULT_READ_GAP_LINEAR_BADHPOLY;
303 penRfExConst = DEFAULT_REF_GAP_CONST_BADHPOLY;
304 penRfExLinear = DEFAULT_REF_GAP_LINEAR_BADHPOLY;
305 }
306
307 multiseedMms = DEFAULT_SEEDMMS;
308 multiseedLen = gDefaultSeedLen;
309
310 EList<string> toks(MISC_CAT);
311 string tok;
312 istringstream ss(s);
313 int setting = 0;
314 // Get each ;-separated token
315 while(getline(ss, tok, ';')) {
316 setting++;
317 EList<string> etoks(MISC_CAT);
318 string etok;
319 // Divide into tokens on either side of =
320 istringstream ess(tok);
321 while(getline(ess, etok, '=')) {
322 etoks.push_back(etok);
323 }
324 // Must be exactly 1 =
325 if(etoks.size() != 2) {
326 cerr << "Error parsing alignment policy setting " << setting
327 << "; must be bisected by = sign" << endl
328 << "Policy: " << s.c_str() << endl;
329 assert(false); throw 1;
330 }
331 // LHS is tag, RHS value
332 string tag = etoks[0], val = etoks[1];
333 // Separate value into comma-separated tokens
334 EList<string> ctoks(MISC_CAT);
335 string ctok;
336 istringstream css(val);
337 while(getline(css, ctok, ',')) {
338 ctoks.push_back(ctok);
339 }
340 if(ctoks.size() == 0) {
341 cerr << "Error parsing alignment policy setting " << setting
342 << "; RHS must have at least 1 token" << endl
343 << "Policy: " << s.c_str() << endl;
344 assert(false); throw 1;
345 }
346 for(size_t i = 0; i < ctoks.size(); i++) {
347 if(ctoks[i].length() == 0) {
348 cerr << "Error parsing alignment policy setting " << setting
349 << "; token " << i+1 << " on RHS had length=0" << endl
350 << "Policy: " << s.c_str() << endl;
351 assert(false); throw 1;
352 }
353 }
354 // Bonus for a match
355 // MA=xx (default: MA=0, or MA=10 if --local is set)
356 if(tag == "MA") {
357 if(ctoks.size() != 1) {
358 cerr << "Error parsing alignment policy setting " << setting
359 << "; RHS must have 1 token" << endl
360 << "Policy: " << s.c_str() << endl;
361 assert(false); throw 1;
362 }
363 string tmp = ctoks[0];
364 istringstream tmpss(tmp);
365 tmpss >> bonusMatch;
366 }
367 // Scoring for mismatches
368 // MMP={Cxx|Q|RQ}
369 // Cxx = constant, where constant is integer xx
370 // Qxx = equal to quality, scaled
371 // R = equal to maq-rounded quality value (rounded to nearest
372 // 10, can't be greater than 30)
373 else if(tag == "MMP") {
374 if(ctoks.size() > 3) {
375 cerr << "Error parsing alignment policy setting "
376 << "'" << tag.c_str() << "'"
377 << "; RHS must have at most 3 tokens" << endl
378 << "Policy: '" << s.c_str() << "'" << endl;
379 assert(false); throw 1;
380 }
381 if(ctoks[0][0] == 'C') {
382 string tmp = ctoks[0].substr(1);
383 // Parse constant penalty
384 istringstream tmpss(tmp);
385 tmpss >> penMmcMax;
386 penMmcMin = penMmcMax;
387 // Parse constant penalty
388 penMmcType = COST_MODEL_CONSTANT;
389 } else if(ctoks[0][0] == 'Q') {
390 if(ctoks.size() >= 2) {
391 string tmp = ctoks[1];
392 istringstream tmpss(tmp);
393 tmpss >> penMmcMax;
394 } else {
395 penMmcMax = DEFAULT_MM_PENALTY_MAX;
396 }
397 if(ctoks.size() >= 3) {
398 string tmp = ctoks[2];
399 istringstream tmpss(tmp);
400 tmpss >> penMmcMin;
401 } else {
402 penMmcMin = DEFAULT_MM_PENALTY_MIN;
403 }
404 if(penMmcMin > penMmcMax) {
405 cerr << "Error: Maximum mismatch penalty (" << penMmcMax
406 << ") is less than minimum penalty (" << penMmcMin
407 << endl;
408 throw 1;
409 }
410 // Set type to =quality
411 penMmcType = COST_MODEL_QUAL;
412 /* The match penalty may get set before
413 * the --ignore-quals parameter gets parsed.
414 * Since the match penalty policy depends on
415 * the whether or not --ignore-quals was set
416 * we do a check here and adjust the parameter
417 * accordingly.
418 */
419 if (ignoreQuals) {
420 if (gVerbose)
421 cerr << "Changing MMP=Q," << penMmcMax << " to ";
422 penMmcMin = penMmcMax;
423 penMmcType = COST_MODEL_CONSTANT;
424 if (gVerbose) {
425 cerr << "MMP=C," << penMmcMax
426 << " because of --ignore-quals"
427 << endl;
428 }
429
430 }
431 } else if(ctoks[0][0] == 'R') {
432 // Set type to=Maq-quality
433 penMmcType = COST_MODEL_ROUNDED_QUAL;
434 } else {
435 cerr << "Error parsing alignment policy setting "
436 << "'" << tag.c_str() << "'"
437 << "; RHS must start with C, Q or R" << endl
438 << "Policy: '" << s.c_str() << "'" << endl;
439 assert(false); throw 1;
440 }
441 }
442 // Scoring for mismatches where read char=N
443 // NP={Cxx|Q|RQ}
444 // Cxx = constant, where constant is integer xx
445 // Q = equal to quality
446 // R = equal to maq-rounded quality value (rounded to nearest
447 // 10, can't be greater than 30)
448 else if(tag == "NP") {
449 if(ctoks.size() != 1) {
450 cerr << "Error parsing alignment policy setting "
451 << "'" << tag.c_str() << "'"
452 << "; RHS must have 1 token" << endl
453 << "Policy: '" << s.c_str() << "'" << endl;
454 assert(false); throw 1;
455 }
456 if(ctoks[0][0] == 'C') {
457 string tmp = ctoks[0].substr(1);
458 // Parse constant penalty
459 istringstream tmpss(tmp);
460 tmpss >> penN;
461 // Parse constant penalty
462 penNType = COST_MODEL_CONSTANT;
463 } else if(ctoks[0][0] == 'Q') {
464 // Set type to =quality
465 penNType = COST_MODEL_QUAL;
466 } else if(ctoks[0][0] == 'R') {
467 // Set type to=Maq-quality
468 penNType = COST_MODEL_ROUNDED_QUAL;
469 } else {
470 cerr << "Error parsing alignment policy setting "
471 << "'" << tag.c_str() << "'"
472 << "; RHS must start with C, Q or R" << endl
473 << "Policy: '" << s.c_str() << "'" << endl;
474 assert(false); throw 1;
475 }
476 }
477 // Scoring for read gaps
478 // RDG=xx,yy,zz
479 // xx = read gap open penalty
480 // yy = read gap extension penalty constant coefficient
481 // (defaults to open penalty)
482 // zz = read gap extension penalty linear coefficient
483 // (defaults to 0)
484 else if(tag == "RDG") {
485 if(ctoks.size() >= 1) {
486 istringstream tmpss(ctoks[0]);
487 tmpss >> penRdExConst;
488 } else {
489 penRdExConst = noisyHpolymer ?
490 DEFAULT_READ_GAP_CONST_BADHPOLY :
491 DEFAULT_READ_GAP_CONST;
492 }
493 if(ctoks.size() >= 2) {
494 istringstream tmpss(ctoks[1]);
495 tmpss >> penRdExLinear;
496 } else {
497 penRdExLinear = noisyHpolymer ?
498 DEFAULT_READ_GAP_LINEAR_BADHPOLY :
499 DEFAULT_READ_GAP_LINEAR;
500 }
501 }
502 // Scoring for reference gaps
503 // RFG=xx,yy,zz
504 // xx = ref gap open penalty
505 // yy = ref gap extension penalty constant coefficient
506 // (defaults to open penalty)
507 // zz = ref gap extension penalty linear coefficient
508 // (defaults to 0)
509 else if(tag == "RFG") {
510 if(ctoks.size() >= 1) {
511 istringstream tmpss(ctoks[0]);
512 tmpss >> penRfExConst;
513 } else {
514 penRfExConst = noisyHpolymer ?
515 DEFAULT_REF_GAP_CONST_BADHPOLY :
516 DEFAULT_REF_GAP_CONST;
517 }
518 if(ctoks.size() >= 2) {
519 istringstream tmpss(ctoks[1]);
520 tmpss >> penRfExLinear;
521 } else {
522 penRfExLinear = noisyHpolymer ?
523 DEFAULT_REF_GAP_LINEAR_BADHPOLY :
524 DEFAULT_REF_GAP_LINEAR;
525 }
526 }
527 // Minimum score as a function of read length
528 // MIN=xx,yy
529 // xx = constant coefficient
530 // yy = linear coefficient
531 else if(tag == "MIN") {
532 PARSE_FUNC(costMin);
533 }
534 // Per-read N ceiling as a function of read length
535 // NCEIL=xx,yy
536 // xx = N ceiling constant coefficient
537 // yy = N ceiling linear coefficient (set to 0 if unspecified)
538 else if(tag == "NCEIL") {
539 PARSE_FUNC(nCeil);
540 }
541 /*
542 * Seeds
543 * -----
544 *
545 * SEED=mm,len,ival (default: SEED=0,22)
546 *
547 * mm = Maximum number of mismatches allowed within a seed.
548 * Must be >= 0 and <= 2. Note that 2-mismatch mode is
549 * not fully sensitive; i.e. some 2-mismatch seed
550 * alignments may be missed.
551 * len = Length of seed.
552 * ival = Interval between seeds. If not specified, seed
553 * interval is determined by IVAL.
554 */
555 else if(tag == "SEED") {
556 if(ctoks.size() > 1) {
557 cerr << "Error parsing alignment policy setting "
558 << "'" << tag.c_str() << "'; RHS must have 1 token, "
559 << "had " << ctoks.size() << ". "
560 << "Policy: '" << s.c_str() << "'" << endl;
561 assert(false); throw 1;
562 }
563 if(ctoks.size() >= 1) {
564 istringstream tmpss(ctoks[0]);
565 tmpss >> multiseedMms;
566 if(multiseedMms > 1) {
567 cerr << "Error: -N was set to " << multiseedMms << ", but cannot be set greater than 1" << endl;
568 throw 1;
569 }
570 if(multiseedMms < 0) {
571 cerr << "Error: -N was set to a number less than 0 (" << multiseedMms << ")" << endl;
572 throw 1;
573 }
574 }
575 }
576 else if(tag == "SEEDLEN") {
577 if(ctoks.size() > 1) {
578 cerr << "Error parsing alignment policy setting "
579 << "'" << tag.c_str() << "'; RHS must have 1 token, "
580 << "had " << ctoks.size() << ". "
581 << "Policy: '" << s.c_str() << "'" << endl;
582 assert(false); throw 1;
583 }
584 if(ctoks.size() >= 1) {
585 istringstream tmpss(ctoks[0]);
586 tmpss >> multiseedLen;
587 }
588 }
589 else if(tag == "DPS") {
590 if(ctoks.size() > 1) {
591 cerr << "Error parsing alignment policy setting "
592 << "'" << tag.c_str() << "'; RHS must have 1 token, "
593 << "had " << ctoks.size() << ". "
594 << "Policy: '" << s.c_str() << "'" << endl;
595 assert(false); throw 1;
596 }
597 if(ctoks.size() >= 1) {
598 istringstream tmpss(ctoks[0]);
599 tmpss >> failStreak;
600 }
601 }
602 else if(tag == "ROUNDS") {
603 if(ctoks.size() > 1) {
604 cerr << "Error parsing alignment policy setting "
605 << "'" << tag.c_str() << "'; RHS must have 1 token, "
606 << "had " << ctoks.size() << ". "
607 << "Policy: '" << s.c_str() << "'" << endl;
608 assert(false); throw 1;
609 }
610 if(ctoks.size() >= 1) {
611 istringstream tmpss(ctoks[0]);
612 tmpss >> seedRounds;
613 }
614 }
615 /*
616 * Seed interval
617 * -------------
618 *
619 * IVAL={L|S|C},a,b (default: IVAL=S,1.0,0.0)
620 *
621 * L = let interval between seeds be a linear function of the
622 * read length. xx and yy are the constant and linear
623 * coefficients respectively. In other words, the interval
624 * equals a * len + b, where len is the read length.
625 * Intervals less than 1 are rounded up to 1.
626 * S = let interval between seeds be a function of the sqaure
627 * root of the read length. xx and yy are the
628 * coefficients. In other words, the interval equals
629 * a * sqrt(len) + b, where len is the read length.
630 * Intervals less than 1 are rounded up to 1.
631 * C = Like S but uses cube root of length instead of square
632 * root.
633 */
634 else if(tag == "IVAL") {
635 PARSE_FUNC(multiseedIval);
636 }
637 else {
638 // Unknown tag
639 cerr << "Unexpected alignment policy setting "
640 << "'" << tag.c_str() << "'" << endl
641 << "Policy: '" << s.c_str() << "'" << endl;
642 assert(false); throw 1;
643 }
644 }
645 }
646
647 #ifdef ALIGNER_SEED_POLICY_MAIN
main()648 int main() {
649
650 int bonusMatchType;
651 int bonusMatch;
652 int penMmcType;
653 int penMmc;
654 int penNType;
655 int penN;
656 int penRdExConst;
657 int penRfExConst;
658 int penRdExLinear;
659 int penRfExLinear;
660 SimpleFunc costMin;
661 SimpleFunc costFloor;
662 SimpleFunc nCeil;
663 bool nCatPair;
664 int multiseedMms;
665 int multiseedLen;
666 SimpleFunc msIval;
667 SimpleFunc posfrac;
668 SimpleFunc rowmult;
669 uint32_t mhits;
670
671 {
672 cout << "Case 1: Defaults 1 ... ";
673 const char *pol = "";
674 SeedAlignmentPolicy::parseString(
675 string(pol),
676 false, // --local?
677 false, // noisy homopolymers a la 454?
678 false, // ignore qualities?
679 bonusMatchType,
680 bonusMatch,
681 penMmcType,
682 penMmc,
683 penNType,
684 penN,
685 penRdExConst,
686 penRfExConst,
687 penRdExLinear,
688 penRfExLinear,
689 costMin,
690 costFloor,
691 nCeil,
692 nCatPair,
693 multiseedMms,
694 multiseedLen,
695 msIval,
696 mhits);
697
698 assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType);
699 assert_eq(DEFAULT_MATCH_BONUS, bonusMatch);
700 assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
701 assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax);
702 assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin);
703 assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
704 assert_eq(DEFAULT_N_PENALTY, penN);
705 assert_eq(DEFAULT_MIN_CONST, costMin.getConst());
706 assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff());
707 assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst());
708 assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff());
709 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
710 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
711
712 assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst);
713 assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
714 assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst);
715 assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear);
716 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
717 assert_eq(DEFAULT_IVAL, msIval.getType());
718 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
719 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
720
721 cout << "PASSED" << endl;
722 }
723
724 {
725 cout << "Case 2: Defaults 2 ... ";
726 const char *pol = "";
727 SeedAlignmentPolicy::parseString(
728 string(pol),
729 false, // --local?
730 true, // noisy homopolymers a la 454?
731 false, // ignore qualities?
732 bonusMatchType,
733 bonusMatch,
734 penMmcType,
735 penMmc,
736 penNType,
737 penN,
738 penRdExConst,
739 penRfExConst,
740 penRdExLinear,
741 penRfExLinear,
742 costMin,
743 costFloor,
744 nCeil,
745 nCatPair,
746 multiseedMms,
747 multiseedLen,
748 msIval,
749 mhits);
750
751 assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType);
752 assert_eq(DEFAULT_MATCH_BONUS, bonusMatch);
753 assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
754 assert_eq(DEFAULT_MM_PENALTY_MAX, penMmc);
755 assert_eq(DEFAULT_MM_PENALTY_MIN, penMmc);
756 assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
757 assert_eq(DEFAULT_N_PENALTY, penN);
758 assert_eq(DEFAULT_MIN_CONST, costMin.getConst());
759 assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff());
760 assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst());
761 assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff());
762 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
763 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
764
765 assert_eq(DEFAULT_READ_GAP_CONST_BADHPOLY, penRdExConst);
766 assert_eq(DEFAULT_READ_GAP_LINEAR_BADHPOLY, penRdExLinear);
767 assert_eq(DEFAULT_REF_GAP_CONST_BADHPOLY, penRfExConst);
768 assert_eq(DEFAULT_REF_GAP_LINEAR_BADHPOLY, penRfExLinear);
769 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
770 assert_eq(DEFAULT_IVAL, msIval.getType());
771 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
772 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
773
774 cout << "PASSED" << endl;
775 }
776
777 {
778 cout << "Case 3: Defaults 3 ... ";
779 const char *pol = "";
780 SeedAlignmentPolicy::parseString(
781 string(pol),
782 true, // --local?
783 false, // noisy homopolymers a la 454?
784 false, // ignore qualities?
785 bonusMatchType,
786 bonusMatch,
787 penMmcType,
788 penMmc,
789 penNType,
790 penN,
791 penRdExConst,
792 penRfExConst,
793 penRdExLinear,
794 penRfExLinear,
795 costMin,
796 costFloor,
797 nCeil,
798 nCatPair,
799 multiseedMms,
800 multiseedLen,
801 msIval,
802 mhits);
803
804 assert_eq(DEFAULT_MATCH_BONUS_TYPE_LOCAL, bonusMatchType);
805 assert_eq(DEFAULT_MATCH_BONUS_LOCAL, bonusMatch);
806 assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
807 assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax);
808 assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin);
809 assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
810 assert_eq(DEFAULT_N_PENALTY, penN);
811 assert_eq(DEFAULT_MIN_CONST_LOCAL, costMin.getConst());
812 assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff());
813 assert_eq(DEFAULT_FLOOR_CONST_LOCAL, costFloor.getConst());
814 assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
815 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
816 assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff());
817 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
818
819 assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst);
820 assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
821 assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst);
822 assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear);
823 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
824 assert_eq(DEFAULT_IVAL, msIval.getType());
825 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
826 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
827
828 cout << "PASSED" << endl;
829 }
830
831 {
832 cout << "Case 4: Simple string 1 ... ";
833 const char *pol = "MMP=C44;MA=4;RFG=24,12;FL=C,8;RDG=2;NP=C4;MIN=C,7";
834 SeedAlignmentPolicy::parseString(
835 string(pol),
836 true, // --local?
837 false, // noisy homopolymers a la 454?
838 false, // ignore qualities?
839 bonusMatchType,
840 bonusMatch,
841 penMmcType,
842 penMmc,
843 penNType,
844 penN,
845 penRdExConst,
846 penRfExConst,
847 penRdExLinear,
848 penRfExLinear,
849 costMin,
850 costFloor,
851 nCeil,
852 nCatPair,
853 multiseedMms,
854 multiseedLen,
855 msIval,
856 mhits);
857
858 assert_eq(COST_MODEL_CONSTANT, bonusMatchType);
859 assert_eq(4, bonusMatch);
860 assert_eq(COST_MODEL_CONSTANT, penMmcType);
861 assert_eq(44, penMmc);
862 assert_eq(COST_MODEL_CONSTANT, penNType);
863 assert_eq(4.0f, penN);
864 assert_eq(7, costMin.getConst());
865 assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff());
866 assert_eq(8, costFloor.getConst());
867 assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
868 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
869 assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff());
870 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
871
872 assert_eq(2.0f, penRdExConst);
873 assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
874 assert_eq(24.0f, penRfExConst);
875 assert_eq(12.0f, penRfExLinear);
876 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
877 assert_eq(DEFAULT_IVAL, msIval.getType());
878 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
879 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
880
881 cout << "PASSED" << endl;
882 }
883 }
884 #endif /*def ALIGNER_SEED_POLICY_MAIN*/
885