1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <string>
21 #include <iostream>
22 #include <sstream>
23 #include <limits>
24 #include "ds.h"
25 #include "aligner_seed_policy.h"
26 #include "mem_ids.h"
27
28 using namespace std;
29
parseFuncType(const std::string & otype)30 static int parseFuncType(const std::string& otype) {
31 string type = otype;
32 if(type == "C" || type == "Constant") {
33 return SIMPLE_FUNC_CONST;
34 } else if(type == "L" || type == "Linear") {
35 return SIMPLE_FUNC_LINEAR;
36 } else if(type == "S" || type == "Sqrt") {
37 return SIMPLE_FUNC_SQRT;
38 } else if(type == "G" || type == "Log") {
39 return SIMPLE_FUNC_LOG;
40 }
41 std::cerr << "Error: Bad function type '" << otype.c_str()
42 << "'. Should be C (constant), L (linear), "
43 << "S (square root) or G (natural log)." << std::endl;
44 throw 1;
45 }
46
47 #define PARSE_FUNC(fv) { \
48 if(ctoks.size() >= 1) { \
49 fv.setType(parseFuncType(ctoks[0])); \
50 } \
51 if(ctoks.size() >= 2) { \
52 double co; \
53 istringstream tmpss(ctoks[1]); \
54 tmpss >> co; \
55 fv.setConst(co); \
56 } \
57 if(ctoks.size() >= 3) { \
58 double ce; \
59 istringstream tmpss(ctoks[2]); \
60 tmpss >> ce; \
61 fv.setCoeff(ce); \
62 } \
63 if(ctoks.size() >= 4) { \
64 double mn; \
65 istringstream tmpss(ctoks[3]); \
66 tmpss >> mn; \
67 fv.setMin(mn); \
68 } \
69 if(ctoks.size() >= 5) { \
70 double mx; \
71 istringstream tmpss(ctoks[4]); \
72 tmpss >> mx; \
73 fv.setMin(mx); \
74 } \
75 }
76
77 /**
78 * Parse alignment policy when provided in this format:
79 * <lab>=<val>;<lab>=<val>;<lab>=<val>...
80 *
81 * And label=value possibilities are:
82 *
83 * Bonus for a match
84 * -----------------
85 *
86 * MA=xx (default: MA=0, or MA=2 if --local is set)
87 *
88 * xx = Each position where equal read and reference characters match up
89 * in the alignment contriubtes this amount to the total score.
90 *
91 * Penalty for a mismatch
92 * ----------------------
93 *
94 * MMP={Cxx|Q|RQ} (default: MMP=C6)
95 *
96 * Cxx = Each mismatch costs xx. If MMP=Cxx is specified, quality
97 * values are ignored when assessing penalities for mismatches.
98 * Q = Each mismatch incurs a penalty equal to the mismatched base's
99 * value.
100 * R = Each mismatch incurs a penalty equal to the mismatched base's
101 * rounded quality value. Qualities are rounded off to the
102 * nearest 10, and qualities greater than 30 are rounded to 30.
103 *
104 * Penalty for position with N (in either read or reference)
105 * ---------------------------------------------------------
106 *
107 * NP={Cxx|Q|RQ} (default: NP=C1)
108 *
109 * Cxx = Each alignment position with an N in either the read or the
110 * reference costs xx. If NP=Cxx is specified, quality values are
111 * ignored when assessing penalities for Ns.
112 * Q = Each alignment position with an N in either the read or the
113 * reference incurs a penalty equal to the read base's quality
114 * value.
115 * R = Each alignment position with an N in either the read or the
116 * reference incurs a penalty equal to the read base's rounded
117 * quality value. Qualities are rounded off to the nearest 10,
118 * and qualities greater than 30 are rounded to 30.
119 *
120 * Penalty for a read gap
121 * ----------------------
122 *
123 * RDG=xx,yy (default: RDG=5,3)
124 *
125 * xx = Read gap open penalty.
126 * yy = Read gap extension penalty.
127 *
128 * Total cost incurred by a read gap = xx + (yy * gap length)
129 *
130 * Penalty for a reference gap
131 * ---------------------------
132 *
133 * RFG=xx,yy (default: RFG=5,3)
134 *
135 * xx = Reference gap open penalty.
136 * yy = Reference gap extension penalty.
137 *
138 * Total cost incurred by a reference gap = xx + (yy * gap length)
139 *
140 * Minimum score for valid alignment
141 * ---------------------------------
142 *
143 * MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set)
144 *
145 * xx,yy = For a read of length N, the total score must be at least
146 * xx + (read length * yy) for the alignment to be valid. The
147 * total score is the sum of all negative penalties (from
148 * mismatches and gaps) and all positive bonuses. The minimum
149 * can be negative (and is by default in global alignment mode).
150 *
151 * Score floor for local alignment
152 * -------------------------------
153 *
154 * FL=xx,yy (defaults: FL=-Infinity,0.0, or FL=0.0,0.0 if --local is set)
155 *
156 * xx,yy = If a cell in the dynamic programming table has a score less
157 * than xx + (read length * yy), then no valid alignment can go
158 * through it. Defaults are highly recommended.
159 *
160 * N ceiling
161 * ---------
162 *
163 * NCEIL=xx,yy (default: NCEIL=0.0,0.15)
164 *
165 * xx,yy = For a read of length N, the number of alignment
166 * positions with an N in either the read or the
167 * reference cannot exceed
168 * ceiling = xx + (read length * yy). If the ceiling is
169 * exceeded, the alignment is considered invalid.
170 *
171 * Seeds
172 * -----
173 *
174 * SEED=mm,len,ival (default: SEED=0,22)
175 *
176 * mm = Maximum number of mismatches allowed within a seed.
177 * Must be >= 0 and <= 2. Note that 2-mismatch mode is
178 * not fully sensitive; i.e. some 2-mismatch seed
179 * alignments may be missed.
180 * len = Length of seed.
181 * ival = Interval between seeds. If not specified, seed
182 * interval is determined by IVAL.
183 *
184 * Seed interval
185 * -------------
186 *
187 * IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0)
188 *
189 * L = let interval between seeds be a linear function of the
190 * read length. xx and yy are the constant and linear
191 * coefficients respectively. In other words, the interval
192 * equals a * len + b, where len is the read length.
193 * Intervals less than 1 are rounded up to 1.
194 * S = let interval between seeds be a function of the sqaure
195 * root of the read length. xx and yy are the
196 * coefficients. In other words, the interval equals
197 * a * sqrt(len) + b, where len is the read length.
198 * Intervals less than 1 are rounded up to 1.
199 * C = Like S but uses cube root of length instead of square
200 * root.
201 *
202 * Example 1:
203 *
204 * SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC:
205 *
206 * The following seeds are extracted from the forward
207 * representation of the read and aligned to the reference
208 * allowing up to 1 mismatch:
209 *
210 * Read: TGCTATCGTACGATCGTACA
211 *
212 * Seed 1+: TGCTATCGTA
213 * Seed 2+: TCGTACGATC
214 * Seed 3+: CGATCGTACA
215 *
216 * ...and the following are extracted from the reverse-complement
217 * representation of the read and align to the reference allowing
218 * up to 1 mismatch:
219 *
220 * Seed 1-: TACGATAGCA
221 * Seed 2-: GATCGTACGA
222 * Seed 3-: TGTACGATCG
223 *
224 * Example 2:
225 *
226 * SEED=1,20,20 and read sequence is TGCTATCGTACGATC. The seed
227 * length is 20 but the read is only 15 characters long. In this
228 * case, Bowtie2 automatically shrinks the seed length to be equal
229 * to the read length.
230 *
231 * Read: TGCTATCGTACGATC
232 *
233 * Seed 1+: TGCTATCGTACGATC
234 * Seed 1-: GATCGTACGATAGCA
235 *
236 * Example 3:
237 *
238 * SEED=1,10,10 and read sequence is TGCTATCGTACGATC. Only one seed
239 * fits on the read; a second seed would overhang the end of the read
240 * by 5 positions. In this case, Bowtie2 extracts one seed.
241 *
242 * Read: TGCTATCGTACGATC
243 *
244 * Seed 1+: TGCTATCGTA
245 * Seed 1-: TACGATAGCA
246 */
parseString(const std::string & s,bool local,bool noisyHpolymer,bool ignoreQuals,int & bonusMatchType,int & bonusMatch,int & penMmcType,int & penMmcMax,int & penMmcMin,int & penScMax,int & penScMin,int & penNType,int & penN,int & penRdExConst,int & penRfExConst,int & penRdExLinear,int & penRfExLinear,SimpleFunc & costMin,SimpleFunc & nCeil,bool & nCatPair,int & multiseedMms,int & multiseedLen,SimpleFunc & multiseedIval,size_t & failStreak,size_t & seedRounds,SimpleFunc * penCanIntronLen,SimpleFunc * penNoncanIntronLen)247 void SeedAlignmentPolicy::parseString(
248 const std::string& s,
249 bool local,
250 bool noisyHpolymer,
251 bool ignoreQuals,
252 int& bonusMatchType,
253 int& bonusMatch,
254 int& penMmcType,
255 int& penMmcMax,
256 int& penMmcMin,
257 int& penScMax,
258 int& penScMin,
259 int& penNType,
260 int& penN,
261 int& penRdExConst,
262 int& penRfExConst,
263 int& penRdExLinear,
264 int& penRfExLinear,
265 SimpleFunc& costMin,
266 SimpleFunc& nCeil,
267 bool& nCatPair,
268 int& multiseedMms,
269 int& multiseedLen,
270 SimpleFunc& multiseedIval,
271 size_t& failStreak,
272 size_t& seedRounds,
273 SimpleFunc* penCanIntronLen,
274 SimpleFunc* penNoncanIntronLen)
275 {
276
277 bonusMatchType = local ? DEFAULT_MATCH_BONUS_TYPE_LOCAL : DEFAULT_MATCH_BONUS_TYPE;
278 bonusMatch = local ? DEFAULT_MATCH_BONUS_LOCAL : DEFAULT_MATCH_BONUS;
279 penMmcType = ignoreQuals ? DEFAULT_MM_PENALTY_TYPE_IGNORE_QUALS :
280 DEFAULT_MM_PENALTY_TYPE;
281 penMmcMax = DEFAULT_MM_PENALTY_MAX;
282 penMmcMin = DEFAULT_MM_PENALTY_MIN;
283 penNType = DEFAULT_N_PENALTY_TYPE;
284 penN = DEFAULT_N_PENALTY;
285
286 penScMax = DEFAULT_SC_PENALTY_MAX;
287 penScMin = DEFAULT_SC_PENALTY_MIN;
288
289 const double DMAX = std::numeric_limits<double>::max();
290 costMin.init(
291 local ? SIMPLE_FUNC_LOG : SIMPLE_FUNC_LINEAR,
292 local ? DEFAULT_MIN_CONST_LOCAL : 0.0f,
293 local ? DEFAULT_MIN_LINEAR_LOCAL : -0.2f);
294 nCeil.init(
295 SIMPLE_FUNC_LINEAR, 0.0f, DMAX,
296 DEFAULT_N_CEIL_CONST, DEFAULT_N_CEIL_LINEAR);
297 multiseedIval.init(
298 DEFAULT_IVAL, 1.0f, DMAX,
299 DEFAULT_IVAL_B, DEFAULT_IVAL_A);
300 nCatPair = DEFAULT_N_CAT_PAIR;
301
302 if(!noisyHpolymer) {
303 penRdExConst = DEFAULT_READ_GAP_CONST;
304 penRdExLinear = DEFAULT_READ_GAP_LINEAR;
305 penRfExConst = DEFAULT_REF_GAP_CONST;
306 penRfExLinear = DEFAULT_REF_GAP_LINEAR;
307 } else {
308 penRdExConst = DEFAULT_READ_GAP_CONST_BADHPOLY;
309 penRdExLinear = DEFAULT_READ_GAP_LINEAR_BADHPOLY;
310 penRfExConst = DEFAULT_REF_GAP_CONST_BADHPOLY;
311 penRfExLinear = DEFAULT_REF_GAP_LINEAR_BADHPOLY;
312 }
313
314 multiseedMms = DEFAULT_SEEDMMS;
315 multiseedLen = DEFAULT_SEEDLEN;
316
317 EList<string> toks(MISC_CAT);
318 string tok;
319 istringstream ss(s);
320 int setting = 0;
321 // Get each ;-separated token
322 while(getline(ss, tok, ';')) {
323 setting++;
324 EList<string> etoks(MISC_CAT);
325 string etok;
326 // Divide into tokens on either side of =
327 istringstream ess(tok);
328 while(getline(ess, etok, '=')) {
329 etoks.push_back(etok);
330 }
331 // Must be exactly 1 =
332 if(etoks.size() != 2) {
333 cerr << "Error parsing alignment policy setting " << setting
334 << "; must be bisected by = sign" << endl
335 << "Policy: " << s.c_str() << endl;
336 assert(false); throw 1;
337 }
338 // LHS is tag, RHS value
339 string tag = etoks[0], val = etoks[1];
340 // Separate value into comma-separated tokens
341 EList<string> ctoks(MISC_CAT);
342 string ctok;
343 istringstream css(val);
344 while(getline(css, ctok, ',')) {
345 ctoks.push_back(ctok);
346 }
347 if(ctoks.size() == 0) {
348 cerr << "Error parsing alignment policy setting " << setting
349 << "; RHS must have at least 1 token" << endl
350 << "Policy: " << s.c_str() << endl;
351 assert(false); throw 1;
352 }
353 for(size_t i = 0; i < ctoks.size(); i++) {
354 if(ctoks[i].length() == 0) {
355 cerr << "Error parsing alignment policy setting " << setting
356 << "; token " << i+1 << " on RHS had length=0" << endl
357 << "Policy: " << s.c_str() << endl;
358 assert(false); throw 1;
359 }
360 }
361 // Bonus for a match
362 // MA=xx (default: MA=0, or MA=10 if --local is set)
363 if(tag == "MA") {
364 if(ctoks.size() != 1) {
365 cerr << "Error parsing alignment policy setting " << setting
366 << "; RHS must have 1 token" << endl
367 << "Policy: " << s.c_str() << endl;
368 assert(false); throw 1;
369 }
370 string tmp = ctoks[0];
371 istringstream tmpss(tmp);
372 tmpss >> bonusMatch;
373 }
374 // Scoring for mismatches
375 // MMP={Cxx|Q|RQ}
376 // Cxx = constant, where constant is integer xx
377 // Qxx = equal to quality, scaled
378 // R = equal to maq-rounded quality value (rounded to nearest
379 // 10, can't be greater than 30)
380 else if(tag == "MMP") {
381 if(ctoks.size() > 3) {
382 cerr << "Error parsing alignment policy setting "
383 << "'" << tag.c_str() << "'"
384 << "; RHS must have at most 3 tokens" << endl
385 << "Policy: '" << s.c_str() << "'" << endl;
386 assert(false); throw 1;
387 }
388 if(ctoks[0][0] == 'C') {
389 string tmp = ctoks[0].substr(1);
390 // Parse constant penalty
391 istringstream tmpss(tmp);
392 tmpss >> penMmcMax;
393 penMmcMin = penMmcMax;
394 // Parse constant penalty
395 penMmcType = COST_MODEL_CONSTANT;
396 } else if(ctoks[0][0] == 'Q') {
397 if(ctoks.size() >= 2) {
398 string tmp = ctoks[1];
399 istringstream tmpss(tmp);
400 tmpss >> penMmcMax;
401 } else {
402 penMmcMax = DEFAULT_MM_PENALTY_MAX;
403 }
404 if(ctoks.size() >= 3) {
405 string tmp = ctoks[2];
406 istringstream tmpss(tmp);
407 tmpss >> penMmcMin;
408 } else {
409 penMmcMin = DEFAULT_MM_PENALTY_MIN;
410 }
411 if(penMmcMin > penMmcMax) {
412 cerr << "Error: Maximum mismatch penalty (" << penMmcMax
413 << ") is less than minimum penalty (" << penMmcMin
414 << endl;
415 throw 1;
416 }
417 // Set type to =quality
418 penMmcType = COST_MODEL_QUAL;
419 } else if(ctoks[0][0] == 'R') {
420 // Set type to=Maq-quality
421 penMmcType = COST_MODEL_ROUNDED_QUAL;
422 } else {
423 cerr << "Error parsing alignment policy setting "
424 << "'" << tag.c_str() << "'"
425 << "; RHS must start with C, Q or R" << endl
426 << "Policy: '" << s.c_str() << "'" << endl;
427 assert(false); throw 1;
428 }
429 }
430 else if(tag == "SCP") {
431 if(ctoks.size() > 3) {
432 cerr << "Error parsing alignment policy setting "
433 << "'" << tag.c_str() << "'"
434 << "; SCP must have at most 3 tokens" << endl
435 << "Policy: '" << s.c_str() << "'" << endl;
436 assert(false); throw 1;
437 }
438 istringstream tmpMax(ctoks[1]);
439 tmpMax >> penScMax;
440 istringstream tmpMin(ctoks[1]);
441 tmpMin >> penScMin;
442 if(penScMin > penScMax) {
443 cerr << "max (" << penScMax << ") should be >= min (" << penScMin << ")" << endl;
444 assert(false); throw 1;
445 }
446 if(penScMin < 1) {
447 cerr << "min (" << penScMin << ") should be greater than 0" << endl;
448 assert(false); throw 1;
449 }
450 }
451 // Scoring for mismatches where read char=N
452 // NP={Cxx|Q|RQ}
453 // Cxx = constant, where constant is integer xx
454 // Q = equal to quality
455 // R = equal to maq-rounded quality value (rounded to nearest
456 // 10, can't be greater than 30)
457 else if(tag == "NP") {
458 if(ctoks.size() != 1) {
459 cerr << "Error parsing alignment policy setting "
460 << "'" << tag.c_str() << "'"
461 << "; RHS must have 1 token" << endl
462 << "Policy: '" << s.c_str() << "'" << endl;
463 assert(false); throw 1;
464 }
465 if(ctoks[0][0] == 'C') {
466 string tmp = ctoks[0].substr(1);
467 // Parse constant penalty
468 istringstream tmpss(tmp);
469 tmpss >> penN;
470 // Parse constant penalty
471 penNType = COST_MODEL_CONSTANT;
472 } else if(ctoks[0][0] == 'Q') {
473 // Set type to =quality
474 penNType = COST_MODEL_QUAL;
475 } else if(ctoks[0][0] == 'R') {
476 // Set type to=Maq-quality
477 penNType = COST_MODEL_ROUNDED_QUAL;
478 } else {
479 cerr << "Error parsing alignment policy setting "
480 << "'" << tag.c_str() << "'"
481 << "; RHS must start with C, Q or R" << endl
482 << "Policy: '" << s.c_str() << "'" << endl;
483 assert(false); throw 1;
484 }
485 }
486 // Scoring for read gaps
487 // RDG=xx,yy,zz
488 // xx = read gap open penalty
489 // yy = read gap extension penalty constant coefficient
490 // (defaults to open penalty)
491 // zz = read gap extension penalty linear coefficient
492 // (defaults to 0)
493 else if(tag == "RDG") {
494 if(ctoks.size() >= 1) {
495 istringstream tmpss(ctoks[0]);
496 tmpss >> penRdExConst;
497 } else {
498 penRdExConst = noisyHpolymer ?
499 DEFAULT_READ_GAP_CONST_BADHPOLY :
500 DEFAULT_READ_GAP_CONST;
501 }
502 if(ctoks.size() >= 2) {
503 istringstream tmpss(ctoks[1]);
504 tmpss >> penRdExLinear;
505 } else {
506 penRdExLinear = noisyHpolymer ?
507 DEFAULT_READ_GAP_LINEAR_BADHPOLY :
508 DEFAULT_READ_GAP_LINEAR;
509 }
510 }
511 // Scoring for reference gaps
512 // RFG=xx,yy,zz
513 // xx = ref gap open penalty
514 // yy = ref gap extension penalty constant coefficient
515 // (defaults to open penalty)
516 // zz = ref gap extension penalty linear coefficient
517 // (defaults to 0)
518 else if(tag == "RFG") {
519 if(ctoks.size() >= 1) {
520 istringstream tmpss(ctoks[0]);
521 tmpss >> penRfExConst;
522 } else {
523 penRfExConst = noisyHpolymer ?
524 DEFAULT_REF_GAP_CONST_BADHPOLY :
525 DEFAULT_REF_GAP_CONST;
526 }
527 if(ctoks.size() >= 2) {
528 istringstream tmpss(ctoks[1]);
529 tmpss >> penRfExLinear;
530 } else {
531 penRfExLinear = noisyHpolymer ?
532 DEFAULT_REF_GAP_LINEAR_BADHPOLY :
533 DEFAULT_REF_GAP_LINEAR;
534 }
535 }
536 // Minimum score as a function of read length
537 // MIN=xx,yy
538 // xx = constant coefficient
539 // yy = linear coefficient
540 else if(tag == "MIN") {
541 PARSE_FUNC(costMin);
542 }
543 // Per-read N ceiling as a function of read length
544 // NCEIL=xx,yy
545 // xx = N ceiling constant coefficient
546 // yy = N ceiling linear coefficient (set to 0 if unspecified)
547 else if(tag == "NCEIL") {
548 PARSE_FUNC(nCeil);
549 }
550 /*
551 * Seeds
552 * -----
553 *
554 * SEED=mm,len,ival (default: SEED=0,22)
555 *
556 * mm = Maximum number of mismatches allowed within a seed.
557 * Must be >= 0 and <= 2. Note that 2-mismatch mode is
558 * not fully sensitive; i.e. some 2-mismatch seed
559 * alignments may be missed.
560 * len = Length of seed.
561 * ival = Interval between seeds. If not specified, seed
562 * interval is determined by IVAL.
563 */
564 else if(tag == "SEED") {
565 if(ctoks.size() > 2) {
566 cerr << "Error parsing alignment policy setting "
567 << "'" << tag.c_str() << "'; RHS must have 1 or 2 tokens, "
568 << "had " << ctoks.size() << ". "
569 << "Policy: '" << s.c_str() << "'" << endl;
570 assert(false); throw 1;
571 }
572 if(ctoks.size() >= 1) {
573 istringstream tmpss(ctoks[0]);
574 tmpss >> multiseedMms;
575 if(multiseedMms > 1) {
576 cerr << "Error: -N was set to " << multiseedMms << ", but cannot be set greater than 1" << endl;
577 throw 1;
578 }
579 if(multiseedMms < 0) {
580 cerr << "Error: -N was set to a number less than 0 (" << multiseedMms << ")" << endl;
581 throw 1;
582 }
583 }
584 if(ctoks.size() >= 2) {
585 istringstream tmpss(ctoks[1]);
586 tmpss >> multiseedLen;
587 } else {
588 multiseedLen = DEFAULT_SEEDLEN;
589 }
590 }
591 else if(tag == "SEEDLEN") {
592 if(ctoks.size() > 1) {
593 cerr << "Error parsing alignment policy setting "
594 << "'" << tag.c_str() << "'; RHS must have 1 token, "
595 << "had " << ctoks.size() << ". "
596 << "Policy: '" << s.c_str() << "'" << endl;
597 assert(false); throw 1;
598 }
599 if(ctoks.size() >= 1) {
600 istringstream tmpss(ctoks[0]);
601 tmpss >> multiseedLen;
602 }
603 }
604 else if(tag == "DPS") {
605 if(ctoks.size() > 1) {
606 cerr << "Error parsing alignment policy setting "
607 << "'" << tag.c_str() << "'; RHS must have 1 token, "
608 << "had " << ctoks.size() << ". "
609 << "Policy: '" << s.c_str() << "'" << endl;
610 assert(false); throw 1;
611 }
612 if(ctoks.size() >= 1) {
613 istringstream tmpss(ctoks[0]);
614 tmpss >> failStreak;
615 }
616 }
617 else if(tag == "ROUNDS") {
618 if(ctoks.size() > 1) {
619 cerr << "Error parsing alignment policy setting "
620 << "'" << tag.c_str() << "'; RHS must have 1 token, "
621 << "had " << ctoks.size() << ". "
622 << "Policy: '" << s.c_str() << "'" << endl;
623 assert(false); throw 1;
624 }
625 if(ctoks.size() >= 1) {
626 istringstream tmpss(ctoks[0]);
627 tmpss >> seedRounds;
628 }
629 }
630 /*
631 * Seed interval
632 * -------------
633 *
634 * IVAL={L|S|C},a,b (default: IVAL=S,1.0,0.0)
635 *
636 * L = let interval between seeds be a linear function of the
637 * read length. xx and yy are the constant and linear
638 * coefficients respectively. In other words, the interval
639 * equals a * len + b, where len is the read length.
640 * Intervals less than 1 are rounded up to 1.
641 * S = let interval between seeds be a function of the sqaure
642 * root of the read length. xx and yy are the
643 * coefficients. In other words, the interval equals
644 * a * sqrt(len) + b, where len is the read length.
645 * Intervals less than 1 are rounded up to 1.
646 * C = Like S but uses cube root of length instead of square
647 * root.
648 */
649 else if(tag == "IVAL") {
650 PARSE_FUNC(multiseedIval);
651 }
652 else if(tag == "CANINTRONLEN") {
653 assert(penCanIntronLen != NULL);
654 PARSE_FUNC((*penCanIntronLen));
655 }
656 else if(tag == "NONCANINTRONLEN") {
657 assert(penNoncanIntronLen != NULL);
658 PARSE_FUNC((*penNoncanIntronLen));
659 }
660 else {
661 // Unknown tag
662 cerr << "Unexpected alignment policy setting "
663 << "'" << tag.c_str() << "'" << endl
664 << "Policy: '" << s.c_str() << "'" << endl;
665 assert(false); throw 1;
666 }
667 }
668 }
669
670 #ifdef ALIGNER_SEED_POLICY_MAIN
main()671 int main() {
672
673 int bonusMatchType;
674 int bonusMatch;
675 int penMmcType;
676 int penMmc;
677 int penScMax;
678 int penScMin;
679 int penNType;
680 int penN;
681 int penRdExConst;
682 int penRfExConst;
683 int penRdExLinear;
684 int penRfExLinear;
685 SimpleFunc costMin;
686 SimpleFunc costFloor;
687 SimpleFunc nCeil;
688 bool nCatPair;
689 int multiseedMms;
690 int multiseedLen;
691 SimpleFunc msIval;
692 SimpleFunc posfrac;
693 SimpleFunc rowmult;
694 uint32_t mhits;
695
696 {
697 cout << "Case 1: Defaults 1 ... ";
698 const char *pol = "";
699 SeedAlignmentPolicy::parseString(
700 string(pol),
701 false, // --local?
702 false, // noisy homopolymers a la 454?
703 false, // ignore qualities?
704 bonusMatchType,
705 bonusMatch,
706 penMmcType,
707 penMmc,
708 penScMax,
709 penScMin,
710 penNType,
711 penN,
712 penRdExConst,
713 penRfExConst,
714 penRdExLinear,
715 penRfExLinear,
716 costMin,
717 costFloor,
718 nCeil,
719 nCatPair,
720 multiseedMms,
721 multiseedLen,
722 msIval,
723 mhits);
724
725 assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType);
726 assert_eq(DEFAULT_MATCH_BONUS, bonusMatch);
727 assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
728 assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax);
729 assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin);
730 assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
731 assert_eq(DEFAULT_N_PENALTY, penN);
732 assert_eq(DEFAULT_MIN_CONST, costMin.getConst());
733 assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff());
734 assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst());
735 assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff());
736 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
737 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
738
739 assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst);
740 assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
741 assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst);
742 assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear);
743 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
744 assert_eq(DEFAULT_SEEDLEN, multiseedLen);
745 assert_eq(DEFAULT_IVAL, msIval.getType());
746 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
747 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
748
749 cout << "PASSED" << endl;
750 }
751
752 {
753 cout << "Case 2: Defaults 2 ... ";
754 const char *pol = "";
755 SeedAlignmentPolicy::parseString(
756 string(pol),
757 false, // --local?
758 true, // noisy homopolymers a la 454?
759 false, // ignore qualities?
760 bonusMatchType,
761 bonusMatch,
762 penMmcType,
763 penMmc,
764
765 penNType,
766 penN,
767 penRdExConst,
768 penRfExConst,
769 penRdExLinear,
770 penRfExLinear,
771 costMin,
772 costFloor,
773 nCeil,
774 nCatPair,
775 multiseedMms,
776 multiseedLen,
777 msIval,
778 mhits);
779
780 assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType);
781 assert_eq(DEFAULT_MATCH_BONUS, bonusMatch);
782 assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
783 assert_eq(DEFAULT_MM_PENALTY_MAX, penMmc);
784 assert_eq(DEFAULT_MM_PENALTY_MIN, penMmc);
785 assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
786 assert_eq(DEFAULT_N_PENALTY, penN);
787 assert_eq(DEFAULT_MIN_CONST, costMin.getConst());
788 assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff());
789 assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst());
790 assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff());
791 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
792 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
793
794 assert_eq(DEFAULT_READ_GAP_CONST_BADHPOLY, penRdExConst);
795 assert_eq(DEFAULT_READ_GAP_LINEAR_BADHPOLY, penRdExLinear);
796 assert_eq(DEFAULT_REF_GAP_CONST_BADHPOLY, penRfExConst);
797 assert_eq(DEFAULT_REF_GAP_LINEAR_BADHPOLY, penRfExLinear);
798 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
799 assert_eq(DEFAULT_SEEDLEN, multiseedLen);
800 assert_eq(DEFAULT_IVAL, msIval.getType());
801 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
802 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
803
804 cout << "PASSED" << endl;
805 }
806
807 {
808 cout << "Case 3: Defaults 3 ... ";
809 const char *pol = "";
810 SeedAlignmentPolicy::parseString(
811 string(pol),
812 true, // --local?
813 false, // noisy homopolymers a la 454?
814 false, // ignore qualities?
815 bonusMatchType,
816 bonusMatch,
817 penMmcType,
818 penMmc,
819 penNType,
820 penN,
821 penRdExConst,
822 penRfExConst,
823 penRdExLinear,
824 penRfExLinear,
825 costMin,
826 costFloor,
827 nCeil,
828 nCatPair,
829 multiseedMms,
830 multiseedLen,
831 msIval,
832 mhits);
833
834 assert_eq(DEFAULT_MATCH_BONUS_TYPE_LOCAL, bonusMatchType);
835 assert_eq(DEFAULT_MATCH_BONUS_LOCAL, bonusMatch);
836 assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
837 assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax);
838 assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin);
839 assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
840 assert_eq(DEFAULT_N_PENALTY, penN);
841 assert_eq(DEFAULT_MIN_CONST_LOCAL, costMin.getConst());
842 assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff());
843 assert_eq(DEFAULT_FLOOR_CONST_LOCAL, costFloor.getConst());
844 assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
845 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
846 assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff());
847 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
848
849 assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst);
850 assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
851 assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst);
852 assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear);
853 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
854 assert_eq(DEFAULT_SEEDLEN, multiseedLen);
855 assert_eq(DEFAULT_IVAL, msIval.getType());
856 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
857 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
858
859 cout << "PASSED" << endl;
860 }
861
862 {
863 cout << "Case 4: Simple string 1 ... ";
864 const char *pol = "MMP=C44;MA=4;RFG=24,12;FL=C,8;RDG=2;NP=C4;MIN=C,7";
865 SeedAlignmentPolicy::parseString(
866 string(pol),
867 true, // --local?
868 false, // noisy homopolymers a la 454?
869 false, // ignore qualities?
870 bonusMatchType,
871 bonusMatch,
872 penMmcType,
873 penMmc,
874 penNType,
875 penN,
876 penRdExConst,
877 penRfExConst,
878 penRdExLinear,
879 penRfExLinear,
880 costMin,
881 costFloor,
882 nCeil,
883 nCatPair,
884 multiseedMms,
885 multiseedLen,
886 msIval,
887 mhits);
888
889 assert_eq(COST_MODEL_CONSTANT, bonusMatchType);
890 assert_eq(4, bonusMatch);
891 assert_eq(COST_MODEL_CONSTANT, penMmcType);
892 assert_eq(44, penMmc);
893 assert_eq(COST_MODEL_CONSTANT, penNType);
894 assert_eq(4.0f, penN);
895 assert_eq(7, costMin.getConst());
896 assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff());
897 assert_eq(8, costFloor.getConst());
898 assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
899 assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
900 assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff());
901 assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
902
903 assert_eq(2.0f, penRdExConst);
904 assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
905 assert_eq(24.0f, penRfExConst);
906 assert_eq(12.0f, penRfExLinear);
907 assert_eq(DEFAULT_SEEDMMS, multiseedMms);
908 assert_eq(DEFAULT_SEEDLEN, multiseedLen);
909 assert_eq(DEFAULT_IVAL, msIval.getType());
910 assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
911 assert_eq(DEFAULT_IVAL_B, msIval.getConst());
912
913 cout << "PASSED" << endl;
914 }
915 }
916 #endif /*def ALIGNER_SEED_POLICY_MAIN*/
917