1 /* 2 * aligner_1mm.h 3 */ 4 5 #ifndef ALIGNER_1MM_H_ 6 #define ALIGNER_1MM_H_ 7 8 #include <utility> 9 10 #include "aligner.h" 11 #include "ds.h" 12 #include "hit.h" 13 #include "range_source.h" 14 #include "row_chaser.h" 15 #include "range_chaser.h" 16 #include "ref_aligner.h" 17 #include "sstring.h" 18 19 /** 20 * Concrete factory class for constructing unpaired exact aligners. 21 */ 22 class Unpaired1mmAlignerV1Factory : public AlignerFactory { 23 typedef RangeSourceDriver<EbwtRangeSource> TRangeSrcDr; 24 typedef CostAwareRangeSourceDriver<EbwtRangeSource> TCostAwareRangeSrcDr; 25 typedef EList<TRangeSrcDr*> TRangeSrcDrPtrVec; 26 public: Unpaired1mmAlignerV1Factory(Ebwt & ebwtFw,Ebwt * ebwtBw,bool doFw,bool doRc,HitSink & sink,const HitSinkPerThreadFactory & sinkPtFactory,RangeCache * cacheFw,RangeCache * cacheBw,uint32_t cacheLimit,ChunkPool * pool,BitPairReference * refs,EList<BTRefString> & os,bool maqPenalty,bool qualOrder,bool strandFix,bool rangeMode,bool verbose,bool quiet,uint32_t seed)27 Unpaired1mmAlignerV1Factory( 28 Ebwt& ebwtFw, 29 Ebwt* ebwtBw, 30 bool doFw, 31 bool doRc, 32 HitSink& sink, 33 const HitSinkPerThreadFactory& sinkPtFactory, 34 RangeCache *cacheFw, 35 RangeCache *cacheBw, 36 uint32_t cacheLimit, 37 ChunkPool *pool, 38 BitPairReference *refs, 39 EList<BTRefString >& os, 40 bool maqPenalty, 41 bool qualOrder, 42 bool strandFix, 43 bool rangeMode, 44 bool verbose, 45 bool quiet, 46 uint32_t seed) : 47 ebwtFw_(ebwtFw), 48 ebwtBw_(ebwtBw), 49 doFw_(doFw), 50 doRc_(doRc), 51 sink_(sink), 52 sinkPtFactory_(sinkPtFactory), 53 cacheFw_(cacheFw), 54 cacheBw_(cacheBw), 55 cacheLimit_(cacheLimit), 56 pool_(pool), 57 os_(os), refs_(refs), 58 maqPenalty_(maqPenalty), 59 qualOrder_(qualOrder), 60 strandFix_(strandFix), 61 rangeMode_(rangeMode), 62 verbose_(verbose), 63 quiet_(quiet) 64 { 65 assert(ebwtFw.isInMemory()); 66 assert(ebwtBw != NULL); 67 assert(ebwtBw->isInMemory()); 68 } 69 70 /** 71 * Create a new UnpairedExactAlignerV1s. 72 */ create()73 virtual Aligner* create() const { 74 75 HitSinkPerThread* sinkPt = sinkPtFactory_.create(); 76 EbwtSearchParams* params = 77 new EbwtSearchParams(*sinkPt, os_); 78 79 const int halfAndHalf = 0; 80 const bool seeded = false; 81 82 EbwtRangeSource *rFw_Bw = new EbwtRangeSource( 83 ebwtBw_, true, OFF_MASK, true, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 84 EbwtRangeSource *rFw_Fw = new EbwtRangeSource( 85 &ebwtFw_, true, OFF_MASK, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 86 87 EbwtRangeSourceDriver * drFw_Bw = new EbwtRangeSourceDriver( 88 *params, rFw_Bw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt, 89 0, // seedLen (0 = whole read is seed) 90 false, // nudgeLeft (true for Fw index, false for Bw) 91 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 92 PIN_TO_LEN, // allow 1 mismatch in rest of read 93 PIN_TO_LEN, // " 94 PIN_TO_LEN, // " 95 os_, verbose_, quiet_, true, pool_, NULL); 96 // 97 EbwtRangeSourceDriver * drFw_Fw = new EbwtRangeSourceDriver( 98 *params, rFw_Fw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt, 99 0, // seedLen (0 = whole read is seed) 100 true, // nudgeLeft (true for Fw index, false for Bw) 101 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 102 PIN_TO_LEN, // allow 1 mismatch in rest of read 103 PIN_TO_LEN, // " 104 PIN_TO_LEN, // " 105 os_, verbose_, quiet_, true, pool_, NULL); 106 TRangeSrcDrPtrVec *drVec = new TRangeSrcDrPtrVec(); 107 if(doFw_) { 108 drVec->push_back(drFw_Bw); 109 drVec->push_back(drFw_Fw); 110 } 111 112 EbwtRangeSource *rRc_Fw = new EbwtRangeSource( 113 &ebwtFw_, false, OFF_MASK, true, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 114 EbwtRangeSource *rRc_Bw = new EbwtRangeSource( 115 ebwtBw_, false, OFF_MASK, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 116 117 EbwtRangeSourceDriver * drRc_Fw = new EbwtRangeSourceDriver( 118 *params, rRc_Fw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt, 119 0, // seedLen (0 = whole read is seed) 120 true, // nudgeLeft (true for Fw index, false for Bw) 121 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 122 PIN_TO_LEN, // allow 1 mismatch in rest of read 123 PIN_TO_LEN, // " 124 PIN_TO_LEN, // " 125 os_, verbose_, quiet_, true, pool_, NULL); 126 // 127 EbwtRangeSourceDriver * drRc_Bw = new EbwtRangeSourceDriver( 128 *params, rRc_Bw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt, 129 0, // seedLen (0 = whole read is seed) 130 false, // nudgeLeft (true for Fw index, false for Bw) 131 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 132 PIN_TO_LEN, // allow 1 mismatch in rest of read 133 PIN_TO_LEN, // " 134 PIN_TO_LEN, // " 135 os_, verbose_, quiet_, true, pool_, NULL); 136 if(doRc_) { 137 drVec->push_back(drRc_Fw); 138 drVec->push_back(drRc_Bw); 139 } 140 TCostAwareRangeSrcDr* dr = new TCostAwareRangeSrcDr(strandFix_, drVec, verbose_, quiet_, false); 141 delete drVec; 142 143 // Set up a RangeChaser 144 RangeChaser *rchase = 145 new RangeChaser(cacheLimit_, cacheFw_, cacheBw_); 146 147 // Set up the aligner 148 return new UnpairedAlignerV2<EbwtRangeSource>( 149 params, dr, rchase, 150 sink_, sinkPtFactory_, sinkPt, os_, refs_, 151 rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL, NULL); 152 } 153 154 private: 155 Ebwt& ebwtFw_; 156 Ebwt* ebwtBw_; 157 bool doFw_; 158 bool doRc_; 159 HitSink& sink_; 160 const HitSinkPerThreadFactory& sinkPtFactory_; 161 RangeCache *cacheFw_; 162 RangeCache *cacheBw_; 163 const uint32_t cacheLimit_; 164 ChunkPool *pool_; 165 EList<BTRefString >& os_; 166 BitPairReference *refs_; 167 const bool maqPenalty_; 168 const bool qualOrder_; 169 bool strandFix_; 170 bool rangeMode_; 171 bool verbose_; 172 bool quiet_; 173 }; 174 175 /** 176 * Concrete factory class for constructing unpaired exact aligners. 177 */ 178 class Paired1mmAlignerV1Factory : public AlignerFactory { 179 typedef RangeSourceDriver<EbwtRangeSource> TRangeSrcDr; 180 typedef CostAwareRangeSourceDriver<EbwtRangeSource> TCostAwareRangeSrcDr; 181 typedef EList<TRangeSrcDr*> TRangeSrcDrPtrVec; 182 public: Paired1mmAlignerV1Factory(Ebwt & ebwtFw,Ebwt * ebwtBw,bool doFw,bool doRc,bool v1,HitSink & sink,const HitSinkPerThreadFactory & sinkPtFactory,bool mate1fw,bool mate2fw,uint32_t peInner,uint32_t peOuter,bool dontReconcile,uint32_t symCeil,uint32_t mixedThresh,uint32_t mixedAttemptLim,RangeCache * cacheFw,RangeCache * cacheBw,uint32_t cacheLimit,ChunkPool * pool,BitPairReference * refs,EList<BTRefString> & os,bool reportSe,bool maqPenalty,bool qualOrder,bool strandFix,bool rangeMode,bool verbose,bool quiet,uint32_t seed)183 Paired1mmAlignerV1Factory( 184 Ebwt& ebwtFw, 185 Ebwt* ebwtBw, 186 bool doFw, 187 bool doRc, 188 bool v1, 189 HitSink& sink, 190 const HitSinkPerThreadFactory& sinkPtFactory, 191 bool mate1fw, 192 bool mate2fw, 193 uint32_t peInner, 194 uint32_t peOuter, 195 bool dontReconcile, 196 uint32_t symCeil, 197 uint32_t mixedThresh, 198 uint32_t mixedAttemptLim, 199 RangeCache *cacheFw, 200 RangeCache *cacheBw, 201 uint32_t cacheLimit, 202 ChunkPool *pool, 203 BitPairReference* refs, 204 EList<BTRefString >& os, 205 bool reportSe, 206 bool maqPenalty, 207 bool qualOrder, 208 bool strandFix, 209 bool rangeMode, 210 bool verbose, 211 bool quiet, 212 uint32_t seed) : 213 ebwtFw_(ebwtFw), 214 ebwtBw_(ebwtBw), 215 doFw_(doFw), 216 doRc_(doRc), 217 v1_(v1), 218 sink_(sink), 219 sinkPtFactory_(sinkPtFactory), 220 mate1fw_(mate1fw), 221 mate2fw_(mate2fw), 222 peInner_(peInner), 223 peOuter_(peOuter), 224 dontReconcile_(dontReconcile), 225 symCeil_(symCeil), 226 mixedThresh_(mixedThresh), 227 mixedAttemptLim_(mixedAttemptLim), 228 cacheFw_(cacheFw), 229 cacheBw_(cacheBw), 230 cacheLimit_(cacheLimit), 231 pool_(pool), 232 refs_(refs), os_(os), 233 reportSe_(reportSe), 234 maqPenalty_(maqPenalty), 235 qualOrder_(qualOrder), 236 strandFix_(strandFix), 237 rangeMode_(rangeMode), 238 verbose_(verbose), 239 quiet_(quiet) 240 { 241 assert(ebwtBw != NULL); 242 assert(ebwtFw.isInMemory()); 243 assert(ebwtBw->isInMemory()); 244 } 245 246 /** 247 * Create a new UnpairedExactAlignerV1s. 248 */ create()249 virtual Aligner* create() const { 250 HitSinkPerThread* sinkPt = sinkPtFactory_.createMult(2); 251 HitSinkPerThread* sinkPtSe1 = NULL, * sinkPtSe2 = NULL; 252 EbwtSearchParams* params = 253 new EbwtSearchParams(*sinkPt, os_); 254 EbwtSearchParams* paramsSe1 = NULL, * paramsSe2 = NULL; 255 if(reportSe_) { 256 sinkPtSe1 = sinkPtFactory_.create(); 257 sinkPtSe2 = sinkPtFactory_.create(); 258 paramsSe1 = 259 new EbwtSearchParams(*sinkPtSe1, os_); 260 paramsSe2 = 261 new EbwtSearchParams(*sinkPtSe2, os_); 262 } 263 264 const int halfAndHalf = 0; 265 const bool seeded = false; 266 267 bool do1Fw = true; 268 bool do1Rc = true; 269 bool do2Fw = true; 270 bool do2Rc = true; 271 if(!doFw_) { 272 if(mate1fw_) do1Fw = false; 273 else do1Rc = false; 274 if(mate2fw_) do2Fw = false; 275 else do2Rc = false; 276 } 277 if(!doRc_) { 278 if(mate1fw_) do1Rc = false; 279 else do1Fw = false; 280 if(mate2fw_) do2Rc = false; 281 else do2Fw = false; 282 } 283 284 TRangeSrcDrPtrVec *dr1FwVec; 285 dr1FwVec = new TRangeSrcDrPtrVec(); 286 if(do1Fw) { 287 EbwtRangeSource *r1Fw_Bw = new EbwtRangeSource( 288 ebwtBw_, true, OFF_MASK, true, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 289 EbwtRangeSource *r1Fw_Fw = new EbwtRangeSource( 290 &ebwtFw_, true, OFF_MASK, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 291 292 EbwtRangeSourceDriver * dr1Fw_Bw = new EbwtRangeSourceDriver( 293 *params, r1Fw_Bw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt, 294 0, // seedLen (0 = whole read is seed) 295 true, // nudgeLeft (true for Fw index, false for Bw) 296 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 297 PIN_TO_LEN, // allow 1 mismatch in rest of read 298 PIN_TO_LEN, // " 299 PIN_TO_LEN, // " 300 os_, verbose_, quiet_, true, pool_, NULL); 301 EbwtRangeSourceDriver * dr1Fw_Fw = new EbwtRangeSourceDriver( 302 *params, r1Fw_Fw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt, 303 0, // seedLen 304 false, // nudgeLeft (true for Fw index, false for Bw) 305 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable 306 PIN_TO_LEN, // " 307 PIN_TO_LEN, // " 308 PIN_TO_LEN, // " 309 os_, verbose_, quiet_, true, pool_, NULL); 310 311 dr1FwVec->push_back(dr1Fw_Bw); 312 dr1FwVec->push_back(dr1Fw_Fw); 313 } 314 315 TRangeSrcDrPtrVec *dr1RcVec; 316 if(v1_) { 317 dr1RcVec = new TRangeSrcDrPtrVec(); 318 } else { 319 dr1RcVec = dr1FwVec; 320 } 321 if(do1Rc) { 322 EbwtRangeSource *r1Rc_Fw = new EbwtRangeSource( 323 &ebwtFw_, false, OFF_MASK, true, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 324 EbwtRangeSource *r1Rc_Bw = new EbwtRangeSource( 325 ebwtBw_, false, OFF_MASK, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 326 327 EbwtRangeSourceDriver * dr1Rc_Fw = new EbwtRangeSourceDriver( 328 *params, r1Rc_Fw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt, 329 0, // seedLen 330 true, // nudgeLeft (true for Fw index, false for Bw) 331 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable 332 PIN_TO_LEN, // " 333 PIN_TO_LEN, // " 334 PIN_TO_LEN, // " 335 os_, verbose_, quiet_, true, pool_, NULL); 336 EbwtRangeSourceDriver * dr1Rc_Bw = new EbwtRangeSourceDriver( 337 *params, r1Rc_Bw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt, 338 0, // seedLen (0 = whole read is seed) 339 false, // nudgeLeft (true for Fw index, false for Bw) 340 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 341 PIN_TO_LEN, // allow 1 mismatch in rest of read 342 PIN_TO_LEN, // " 343 PIN_TO_LEN, // " 344 os_, verbose_, quiet_, true, pool_, NULL); 345 dr1RcVec->push_back(dr1Rc_Fw); 346 dr1RcVec->push_back(dr1Rc_Bw); 347 } 348 349 TRangeSrcDrPtrVec *dr2FwVec; 350 if(v1_) { 351 dr2FwVec = new TRangeSrcDrPtrVec(); 352 } else { 353 dr2FwVec = dr1FwVec; 354 } 355 if(do2Fw) { 356 EbwtRangeSource *r2Fw_Bw = new EbwtRangeSource( 357 ebwtBw_, true, OFF_MASK, true, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 358 EbwtRangeSource *r2Fw_Fw = new EbwtRangeSource( 359 &ebwtFw_, true, OFF_MASK, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 360 361 EbwtRangeSourceDriver * dr2Fw_Bw = new EbwtRangeSourceDriver( 362 *params, r2Fw_Bw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt, 363 0, // seedLen (0 = whole read is seed) 364 true, // nudgeLeft (true for Fw index, false for Bw) 365 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 366 PIN_TO_LEN, // allow 1 mismatch in rest of read 367 PIN_TO_LEN, // " 368 PIN_TO_LEN, // " 369 os_, verbose_, quiet_, false, pool_, NULL); 370 EbwtRangeSourceDriver * dr2Fw_Fw = new EbwtRangeSourceDriver( 371 *params, r2Fw_Fw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt, 372 0, // seedLen 373 false, // nudgeLeft (true for Fw index, false for Bw) 374 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable 375 PIN_TO_LEN, // " 376 PIN_TO_LEN, // " 377 PIN_TO_LEN, // " 378 os_, verbose_, quiet_, false, pool_, NULL); 379 dr2FwVec->push_back(dr2Fw_Bw); 380 dr2FwVec->push_back(dr2Fw_Fw); 381 } 382 383 TRangeSrcDrPtrVec *dr2RcVec; 384 if(v1_) { 385 dr2RcVec = new TRangeSrcDrPtrVec(); 386 } else { 387 dr2RcVec = dr1FwVec; 388 } 389 if(do2Rc) { 390 EbwtRangeSource *r2Rc_Fw = new EbwtRangeSource( 391 &ebwtFw_, false, OFF_MASK, true, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 392 EbwtRangeSource *r2Rc_Bw = new EbwtRangeSource( 393 ebwtBw_, false, OFF_MASK, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_); 394 395 EbwtRangeSourceDriver * dr2Rc_Fw = new EbwtRangeSourceDriver( 396 *params, r2Rc_Fw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt, 397 0, // seedLen 398 true, // nudgeLeft (true for Fw index, false for Bw) 399 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable 400 PIN_TO_LEN, // " 401 PIN_TO_LEN, // " 402 PIN_TO_LEN, // " 403 os_, verbose_, quiet_, false, pool_, NULL); 404 EbwtRangeSourceDriver * dr2Rc_Bw = new EbwtRangeSourceDriver( 405 *params, r2Rc_Bw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt, 406 0, // seedLen (0 = whole read is seed) 407 false, // nudgeLeft (true for Fw index, false for Bw) 408 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable 409 PIN_TO_LEN, // allow 1 mismatch in rest of read 410 PIN_TO_LEN, // " 411 PIN_TO_LEN, // " 412 os_, verbose_, quiet_, false, pool_, NULL); 413 dr2RcVec->push_back(dr2Rc_Fw); 414 dr2RcVec->push_back(dr2Rc_Bw); 415 } 416 417 RefAligner* refAligner = 418 new OneMMRefAligner(verbose_, quiet_); 419 420 // Set up a RangeChaser 421 RangeChaser *rchase = 422 new RangeChaser(cacheLimit_, cacheFw_, cacheBw_); 423 424 if(v1_) { 425 PairedBWAlignerV1<EbwtRangeSource>* al = new PairedBWAlignerV1<EbwtRangeSource>( 426 params, 427 new TCostAwareRangeSrcDr(strandFix_, dr1FwVec, verbose_, quiet_, false), 428 new TCostAwareRangeSrcDr(strandFix_, dr1RcVec, verbose_, quiet_, false), 429 new TCostAwareRangeSrcDr(strandFix_, dr2FwVec, verbose_, quiet_, false), 430 new TCostAwareRangeSrcDr(strandFix_, dr2RcVec, verbose_, quiet_, false), 431 refAligner, rchase, 432 sink_, sinkPtFactory_, sinkPt, mate1fw_, mate2fw_, 433 peInner_, peOuter_, dontReconcile_, symCeil_, mixedThresh_, 434 mixedAttemptLim_, refs_, rangeMode_, verbose_, 435 quiet_, INT_MAX, pool_, NULL); 436 delete dr1FwVec; 437 delete dr1RcVec; 438 delete dr2FwVec; 439 delete dr2RcVec; 440 return al; 441 } else { 442 PairedBWAlignerV2<EbwtRangeSource>* al = new PairedBWAlignerV2<EbwtRangeSource>( 443 params, paramsSe1, paramsSe2, 444 new TCostAwareRangeSrcDr(strandFix_, dr1FwVec, verbose_, quiet_, true), 445 refAligner, rchase, 446 sink_, sinkPtFactory_, 447 sinkPt, sinkPtSe1, sinkPtSe2, 448 mate1fw_, mate2fw_, 449 peInner_, peOuter_, 450 mixedAttemptLim_, refs_, rangeMode_, 451 verbose_, quiet_, INT_MAX, pool_, NULL); 452 delete dr1FwVec; 453 return al; 454 } 455 } 456 457 private: 458 Ebwt& ebwtFw_; 459 Ebwt* ebwtBw_; 460 bool doFw_; 461 bool doRc_; 462 bool v1_; 463 HitSink& sink_; 464 const HitSinkPerThreadFactory& sinkPtFactory_; 465 const bool mate1fw_; 466 const bool mate2fw_; 467 const uint32_t peInner_; 468 const uint32_t peOuter_; 469 const bool dontReconcile_; 470 const uint32_t symCeil_; 471 const uint32_t mixedThresh_; 472 const uint32_t mixedAttemptLim_; 473 RangeCache *cacheFw_; 474 RangeCache *cacheBw_; 475 const uint32_t cacheLimit_; 476 ChunkPool *pool_; 477 BitPairReference* refs_; 478 EList<BTRefString >& os_; 479 const bool reportSe_; 480 const bool maqPenalty_; 481 const bool qualOrder_; 482 const bool strandFix_; 483 const bool rangeMode_; 484 const bool verbose_; 485 const bool quiet_; 486 }; 487 488 #endif /* ALIGNER_1MM_H_ */ 489