1 package assemble; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.BitSet; 6 import java.util.concurrent.atomic.AtomicInteger; 7 8 import dna.AminoAcid; 9 import jgi.BBMerge; 10 import shared.KillSwitch; 11 import shared.Shared; 12 import shared.Timer; 13 import shared.Tools; 14 import stream.ConcurrentReadInputStream; 15 import stream.Read; 16 import structures.ByteBuilder; 17 import structures.IntList; 18 import structures.ListNum; 19 import structures.LongList; 20 import ukmer.AbstractKmerTableU; 21 import ukmer.HashArrayU1D; 22 import ukmer.HashForestU; 23 import ukmer.Kmer; 24 import ukmer.KmerNodeU; 25 import ukmer.KmerTableSetU; 26 27 28 /** 29 * Long-kmer assembler based on KmerCountExact. 30 * @author Brian Bushnell 31 * @date May 15, 2015 32 * 33 */ 34 public class Tadpole2 extends Tadpole { 35 36 /** 37 * Code entrance from the command line. 38 * @param args Command line arguments 39 */ main(String[] args)40 public static void main(String[] args){ 41 Timer t=new Timer(), t2=new Timer(); 42 t.start(); 43 t2.start(); 44 45 //Create a new CountKmersExact instance 46 Tadpole2 wog=new Tadpole2(args, true); 47 t2.stop(); 48 outstream.println("Initialization Time: \t"+t2); 49 50 ///And run it 51 wog.process(t); 52 } 53 54 /** 55 * Constructor. 56 * @param args Command line arguments 57 */ Tadpole2(String[] args, boolean setDefaults)58 public Tadpole2(String[] args, boolean setDefaults){ 59 super(args, setDefaults); 60 61 final int extraBytesPerKmer; 62 { 63 int x=0; 64 if(useOwnership){x+=4;} 65 if(processingMode==correctMode || processingMode==discardMode){} 66 else if(processingMode==contigMode || processingMode==extendMode){x+=1;} 67 extraBytesPerKmer=x; 68 } 69 70 tables=new KmerTableSetU(args, extraBytesPerKmer); 71 assert(kbig==tables.kbig) : kbig+", "+tables.kbig; 72 // kbig=tables.kbig; 73 ksmall=tables.k; 74 // k2=tables.k2; 75 // ways=tables.ways; 76 } 77 78 79 /*--------------------------------------------------------------*/ 80 /*---------------- Outer Methods ----------------*/ 81 /*--------------------------------------------------------------*/ 82 83 84 /*--------------------------------------------------------------*/ 85 /*---------------- Inner Methods ----------------*/ 86 /*--------------------------------------------------------------*/ 87 88 @Override initializeOwnership()89 void initializeOwnership(){ 90 tables.initializeOwnership(); 91 } 92 93 @Override shave(boolean shave, boolean rinse)94 long shave(boolean shave, boolean rinse){ 95 long sum=0; 96 97 for(int i=0; i<maxShaveDepth; i++){ 98 int a=1, b=maxShaveDepth, c=i+1; 99 // if(i>3){Shaver2.verbose2=true;} 100 outstream.println("\nShave("+a+", "+b+", "+c+")"); 101 final Shaver shaver=Shaver.makeShaver(tables, THREADS, a, b, c, minCountExtend, branchMult2, Tools.max(minContigLen, shaveDiscardLen), shaveExploreDist, shave, rinse); 102 long removed=shaver.shave(a, b); 103 104 sum+=removed; 105 if(removed<100 || i>2){break;} 106 } 107 108 outstream.println(); 109 return sum; 110 } 111 112 @Override loadKmers(Timer t)113 public long loadKmers(Timer t){ 114 tables.process(t); 115 return tables.kmersLoaded; 116 } 117 118 /*--------------------------------------------------------------*/ 119 /*---------------- Recall Methods ----------------*/ 120 /*--------------------------------------------------------------*/ 121 getCount(Kmer kmer)122 public final int getCount(Kmer kmer){return tables.getCount(kmer);} claim(Kmer kmer, int id)123 final boolean claim(Kmer kmer, int id){return tables.claim(kmer, id);} doubleClaim(ByteBuilder bb, int id , Kmer kmer)124 final boolean doubleClaim(ByteBuilder bb, int id/*, long rid*/, Kmer kmer){return tables.doubleClaim(bb, id/*, rid*/, kmer);} claim(ByteBuilder bb, int id, boolean earlyExit, Kmer kmer)125 final boolean claim(ByteBuilder bb, int id, /*long rid, */boolean earlyExit, Kmer kmer){return tables.claim(bb, id/*, rid*/, earlyExit, kmer);} claim(byte[] array, int len, int id, boolean earlyExit, Kmer kmer)126 final boolean claim(byte[] array, int len, int id, /*long rid, */boolean earlyExit, Kmer kmer){return tables.claim(array, len, id/*, rid*/, earlyExit, kmer);} findOwner(Kmer kmer)127 final int findOwner(Kmer kmer){return tables.findOwner(kmer);} findOwner(ByteBuilder bb, int id, Kmer kmer)128 final int findOwner(ByteBuilder bb, int id, Kmer kmer){return tables.findOwner(bb, id, kmer);} findOwner(byte[] array, int len, int id, Kmer kmer)129 final int findOwner(byte[] array, int len, int id, Kmer kmer){return tables.findOwner(array, len, id, kmer);} release(Kmer kmer, int id)130 final void release(Kmer kmer, int id){tables.release(kmer, id);} release(ByteBuilder bb, int id, Kmer kmer)131 final void release(ByteBuilder bb, int id, Kmer kmer){tables.release(bb, id, kmer);} release(byte[] array, int len, int id, Kmer kmer)132 final void release(byte[] array, int len, int id, Kmer kmer){tables.release(array, len, id, kmer);} fillRightCounts(Kmer kmer, int[] counts)133 final int fillRightCounts(Kmer kmer, int[] counts){return tables.fillRightCounts(kmer, counts);} fillLeftCounts(Kmer kmer, int[] counts)134 final int fillLeftCounts(Kmer kmer, int[] counts){return tables.fillLeftCounts(kmer, counts);} toText(Kmer kmer)135 final static StringBuilder toText(Kmer kmer){return AbstractKmerTableU.toText(kmer);} toText(long[] key, int k)136 final static StringBuilder toText(long[] key, int k){return AbstractKmerTableU.toText(key, k);} 137 138 /*--------------------------------------------------------------*/ 139 /*---------------- Inner Classes ----------------*/ 140 /*--------------------------------------------------------------*/ 141 142 /*--------------------------------------------------------------*/ 143 /*---------------- BuildThread ----------------*/ 144 /*--------------------------------------------------------------*/ 145 146 @Override makeBuildThread(int id, int mode, ConcurrentReadInputStream[] crisa)147 BuildThread makeBuildThread(int id, int mode, ConcurrentReadInputStream[] crisa){ 148 return new BuildThread(id, mode, crisa); 149 } 150 151 /** 152 * Builds contigs. 153 */ 154 private class BuildThread extends AbstractBuildThread{ 155 BuildThread(int id_, int mode_, ConcurrentReadInputStream[] crisa_)156 public BuildThread(int id_, int mode_, ConcurrentReadInputStream[] crisa_){ 157 super(id_, mode_, crisa_); 158 } 159 160 @Override run()161 public void run(){ 162 if(crisa==null || crisa.length==0){ 163 //Build from kmers 164 165 if(id==0){outstream.print("Seeding with min count = ");} 166 String comma=""; 167 for(int i=contigPasses-1; i>0; i--){ 168 minCountSeedCurrent=(int)Tools.min(Integer.MAX_VALUE, Tools.max(minCountSeed+i, (long)Math.floor((minCountSeed)*Math.pow(contigPassMult, i)*0.92-0.25) )); 169 if(id==0){ 170 outstream.print(comma+minCountSeedCurrent); 171 comma=", "; 172 } 173 while(processNextTable(nextTable[i])){} 174 while(processNextVictims(nextVictims[i])){} 175 } 176 //Final pass 177 minCountSeedCurrent=minCountSeed; 178 if(id==0){outstream.println(comma+minCountSeedCurrent);} 179 while(processNextTable(nextTable[0])){} 180 while(processNextVictims(nextVictims[0])){} 181 }else{ 182 //Extend reads 183 for(ConcurrentReadInputStream cris : crisa){ 184 synchronized(crisa){ 185 if(!cris.started()){ 186 cris.start(); 187 } 188 } 189 run(cris); 190 } 191 } 192 } 193 processNextTable(AtomicInteger aint)194 private boolean processNextTable(AtomicInteger aint){ 195 final int tnum=aint.getAndAdd(1); 196 if(tnum>=tables.ways){return false;} 197 final HashArrayU1D table=tables.getTable(tnum); 198 final int max=table.arrayLength(); 199 if(verbose && id==0){outstream.println("Processing table "+tnum+", size "+table.size()+", length "+max);} 200 for(int cell=0; cell<max; cell++){ 201 if(verbose && id==0){outstream.println("Processing cell "+cell);} 202 int x=processCell(table, cell, myKmer); 203 } 204 return true; 205 } 206 processNextVictims(AtomicInteger aint)207 private boolean processNextVictims(AtomicInteger aint){ 208 final int tnum=aint.getAndAdd(1); 209 if(tnum>=tables.ways){return false;} 210 final HashArrayU1D table=tables.getTable(tnum); 211 final HashForestU forest=table.victims(); 212 if(verbose && id==0){outstream.println("Processing forest "+tnum+", size "+forest.size());} 213 final int max=forest.arrayLength(); 214 for(int cell=0; cell<max; cell++){ 215 KmerNodeU kn=forest.getNode(cell); 216 int x=traverseKmerNodeU(kn); 217 } 218 return true; 219 } 220 processCell(HashArrayU1D table, int cell, Kmer kmer)221 private int processCell(HashArrayU1D table, int cell, Kmer kmer){ 222 int count=table.readCellValue(cell); 223 if(count<minCountSeedCurrent){ 224 if(verbose){outstream.println("For cell "+cell+", count="+count);} 225 return 0; 226 } 227 228 kmer=table.fillKmer(cell, kmer); 229 // assert(kmer.verify(false)); 230 // assert(kmer.verify(true)); 231 232 if(verbose){outstream.println("id="+id+" processing cell "+cell+"; \tkmer="+kmer);} 233 if(useOwnership){ 234 int owner=table.getCellOwner(cell); 235 if(verbose){outstream.println("Owner is initially "+owner);} 236 if(owner>-1){return 0;} 237 owner=table.setOwner(kmer, id, cell); 238 if(verbose){outstream.println("Owner is now "+owner);} 239 if(owner!=id){return 0;} 240 } 241 return processKmer(kmer); 242 } 243 traverseKmerNodeU(KmerNodeU kn)244 private int traverseKmerNodeU(KmerNodeU kn){ 245 int sum=0; 246 if(kn!=null){ 247 sum+=processKmerNodeU(kn); 248 if(kn.left()!=null){ 249 sum+=traverseKmerNodeU(kn.left()); 250 } 251 if(kn.right()!=null){ 252 sum+=traverseKmerNodeU(kn.right()); 253 } 254 } 255 return sum; 256 } 257 processKmerNodeU(KmerNodeU kn)258 private int processKmerNodeU(KmerNodeU kn){ 259 final long[] key=kn.pivot(); 260 final int count=kn.getValue(key); 261 if(count<minCountSeedCurrent){return 0;} 262 263 if(verbose){outstream.println("id="+id+" processing KmerNodeU; \tkmer="+Arrays.toString(key)+"\t"+toText(key, ksmall));} 264 if(useOwnership){ 265 int owner=kn.getOwner(key); 266 if(verbose){outstream.println("Owner is initially "+owner);} 267 if(owner>-1){return 0;} 268 owner=kn.setOwner(key, id); 269 if(verbose){outstream.println("Owner is now "+owner);} 270 if(owner!=id){return 0;} 271 } 272 273 myKmer.setFrom(key); 274 return processKmer(myKmer); 275 } 276 processKmer(Kmer kmer)277 private int processKmer(Kmer kmer){ 278 Contig contig=makeContig(builderT, kmer, true); 279 if(contig!=null){ 280 float coverage=tables.calcCoverage(contig, kmer); 281 if(coverage<minCoverage || coverage>maxCoverage){return 0;} 282 if(verbose){outstream.println("Added "+contig.length());} 283 contig.id=(int)contigNum.incrementAndGet(); 284 contigs.add(contig); 285 return contig.length(); 286 }else{ 287 if(verbose){outstream.println("Created null contig.");} 288 } 289 return 0; 290 } 291 run(ConcurrentReadInputStream cris)292 private void run(ConcurrentReadInputStream cris){ 293 294 ListNum<Read> ln=cris.nextList(); 295 ArrayList<Read> reads=(ln!=null ? ln.list : null); 296 297 //While there are more reads lists... 298 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 299 300 //For each read (or pair) in the list... 301 for(int i=0; i<reads.size(); i++){ 302 final Read r1=reads.get(i); 303 final Read r2=r1.mate; 304 305 processReadPair(r1, r2); 306 } 307 308 //Fetch a new read list 309 cris.returnList(ln); 310 ln=cris.nextList(); 311 reads=(ln!=null ? ln.list : null); 312 } 313 cris.returnList(ln); 314 } 315 316 //TODO: This appears to do read extension but is very confusing. processReadPair(Read r1, Read r2)317 private void processReadPair(Read r1, Read r2){ 318 if(verbose){outstream.println("Considering read "+r1.id+" "+new String(r1.bases));} 319 320 readsInT++; 321 basesInT+=r1.length(); 322 if(r2!=null){ 323 readsInT++; 324 basesInT+=r2.length(); 325 } 326 327 if(mode==insertMode){ 328 int x=BBMerge.findOverlapStrict(r1, r2, false); 329 if(x<1){ 330 x=findInsertSize(r1, r2, rightCounts, myKmer, myKmer2); 331 } 332 insertSizes.increment(Tools.max(x, 0)); 333 return; 334 } 335 336 if(ecco && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){BBMerge.findOverlapStrict(r1, r2, true);} 337 if(r1!=null){ 338 if(r1.discarded()){ 339 lowqBasesT+=r1.length(); 340 lowqReadsT++; 341 }else{ 342 byte[] bases=makeContig(r1.bases, builderT, r1.numericID, myKmer); 343 if(bases!=null){ 344 if(verbose){outstream.println("Added "+bases.length);} 345 final long num=contigNum.incrementAndGet(); 346 Contig temp=new Contig(bases, "contig_"+num+"_length_"+bases.length, (int)num); 347 contigs.add(temp); 348 } 349 } 350 } 351 if(r2!=null){ 352 if(r2.discarded()){ 353 lowqBasesT+=r2.length(); 354 lowqReadsT++; 355 }else{ 356 byte[] bases=makeContig(r2.bases, builderT, r2.numericID, myKmer); 357 if(bases!=null){ 358 if(verbose){outstream.println("Added "+bases.length);} 359 final long num=contigNum.incrementAndGet(); 360 Contig temp=new Contig(bases, "contig_"+num+"_length_"+bases.length, (int)num); 361 contigs.add(temp); 362 } 363 } 364 } 365 } 366 367 /** From kmers */ makeContig(final ByteBuilder bb, Kmer kmer, boolean alreadyClaimed)368 private Contig makeContig(final ByteBuilder bb, Kmer kmer, boolean alreadyClaimed){ 369 bb.setLength(0); 370 bb.appendKmer(kmer); 371 if(verbose){outstream.println("Filled bb: "+bb);} 372 373 final int initialLength=bb.length(); 374 assert(initialLength==kbig); 375 if(initialLength<kbig){return null;} 376 377 boolean success=(alreadyClaimed || !useOwnership ? true : claim(kmer, id)); 378 if(verbose){outstream.println("Thread "+id+" checking owner after setting: "+findOwner(bb, id, kmer));} 379 if(!success){ 380 assert(bb.length()==kbig); 381 // release(bb, id); //no need to release 382 return null; 383 } 384 if(verbose /*|| true*/){outstream.println("Thread "+id+" building contig; initial length "+bb.length());} 385 if(verbose){outstream.println("Extending to right.");} 386 final int rightStatus, leftStatus; 387 float leftRatio=0, rightRatio=0; 388 { 389 final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer); 390 391 if(status==DEAD_END){ 392 //do nothing 393 }else if(status==LOOP){//TODO 394 //special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat. 395 //Perhaps, the last kmer should be reclassified as a junction and removed. 396 }else if(status==BAD_SEED){ 397 assert(bb.length()==kbig); 398 release(kmer, id); 399 return null; 400 }else{ 401 if(bb.length()==kbig){ 402 if(status==BAD_OWNER){ 403 release(kmer, id); 404 return null; 405 }else if(isBranchCode(status)){ 406 release(kmer, id); 407 return null; 408 }else{ 409 throw new RuntimeException("Bad return value: "+status); 410 } 411 }else{ 412 if(status==BAD_OWNER){ 413 release(bb, id, kmer); 414 return null; 415 }else if(status==F_BRANCH || status==D_BRANCH){ 416 rightRatio=calcRatio(rightCounts); 417 }else if(status==B_BRANCH){ 418 rightRatio=calcRatio(leftCounts); 419 }else{ 420 throw new RuntimeException("Bad return value: "+status); 421 } 422 } 423 } 424 rightStatus=status; 425 } 426 427 // success=extendToRight(bb, leftCounts, rightCounts, id, kmer); 428 // if(!success){ 429 // release(bb, id, kmer); 430 // return null; 431 // } 432 bb.reverseComplementInPlace(); 433 if(verbose /*|| true*/){outstream.println("Extending rcomp to right; current length "+bb.length());} 434 435 { 436 final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer); 437 438 if(status==DEAD_END){ 439 //do nothing 440 }else if(status==LOOP){//TODO 441 //special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat. 442 //Perhaps, the last kmer should be reclassified as a junction and removed. 443 }else if(status==BAD_SEED){ 444 assert(false) : bb;//This should never happen. 445 assert(bb.length()==kbig); 446 release(kmer, id); 447 return null; 448 }else{ 449 if(status==BAD_OWNER){ 450 release(bb, id, kmer); 451 return null; 452 }else if(status==F_BRANCH || status==D_BRANCH){ 453 leftRatio=calcRatio(rightCounts); 454 }else if(status==B_BRANCH){ 455 leftRatio=calcRatio(leftCounts); 456 }else{ 457 throw new RuntimeException("Bad return value: "+status); 458 } 459 } 460 leftStatus=status; 461 } 462 // success=extendToRight(bb, leftCounts, rightCounts, id, kmer); 463 // if(!success){ 464 // release(bb, id, kmer); 465 // return null; 466 // } 467 if(verbose /*|| true*/){outstream.println("Final length for thread "+id+": "+bb.length());} 468 // if(useOwnership && THREADS==1){assert(claim(bases, bases.length, id, rid));} 469 success=(useOwnership ? doubleClaim(bb, id, kmer) : true); 470 if(verbose /*|| true*/){outstream.println("Success for thread "+id+": "+success);} 471 472 if(trimEnds>0){bb.trimByAmount(trimEnds, trimEnds);} 473 else if(trimCircular && leftStatus==LOOP && rightStatus==LOOP){bb.trimByAmount(0, kbig-1);} 474 if(bb.length()>=initialLength+minExtension && (bb.length()>=minContigLen || popBubbles)){ 475 if(success){ 476 bb.reverseComplementInPlace(); 477 byte[] bases=bb.toBytes(); 478 Contig c=new Contig(bases); 479 c.leftCode=leftStatus; 480 c.rightCode=rightStatus; 481 c.rightRatio=rightRatio; 482 c.leftRatio=leftRatio; 483 if(!c.canonical()){c.rcomp();} 484 return c; 485 }else{ 486 // assert(false) : bb.length()+", "+id; 487 release(bb, id, kmer); 488 return null; 489 } 490 } 491 if(verbose /*|| true*/){outstream.println("Contig was too short for "+id+": "+bb.length());} 492 return null; 493 } 494 495 /** From a seed */ makeContig(final byte[] bases, final ByteBuilder bb, long rid, final Kmer kmer)496 private byte[] makeContig(final byte[] bases, final ByteBuilder bb, long rid, final Kmer kmer){ 497 if(bases==null || bases.length<kbig){return null;} 498 // if(verbose /*|| true*/){outstream.println("Thread "+id+" checking owner: "+findOwner(bases, bases.length, id));} 499 int owner=useOwnership ? findOwner(bases, bases.length, id, kmer) : -1; 500 if(owner>=id){return null;} 501 boolean success=(useOwnership ? claim(bases, bases.length, id, true, kmer) : true); 502 if(verbose /*|| true*/){outstream.println("Thread "+id+" checking owner after setting: "+findOwner(bases, bases.length, id, kmer));} 503 if(!success){ 504 release(bases, bases.length, id, kmer); 505 return null; 506 } 507 if(verbose /*|| true*/){outstream.println("Thread "+id+" building contig; initial length "+bases.length);} 508 bb.setLength(0); 509 bb.append(bases); 510 if(verbose){outstream.println("Extending to right.");} 511 { 512 final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer); 513 514 if(status==DEAD_END){ 515 //do nothing 516 }else if(status==LOOP){//TODO 517 //special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat. 518 //Perhaps, the last kmer should be reclassified as a junction and removed. 519 }else if(status==BAD_SEED){ 520 //do nothing 521 }else{ 522 if(status==BAD_OWNER){ 523 release(bb.array, bb.length(), id, kmer); 524 return null; 525 }else if(isBranchCode(status)){ 526 //do nothing 527 }else{ 528 throw new RuntimeException("Bad return value: "+status); 529 } 530 } 531 } 532 // success=extendToRight(bb, leftCounts, rightCounts, id, kmer); 533 // if(!success){ 534 // release(bb.array, bb.length(), id, kmer); 535 // return null; 536 // } 537 bb.reverseComplementInPlace(); 538 if(verbose /*|| true*/){outstream.println("Extending rcomp to right; current length "+bb.length());} 539 { 540 final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer); 541 542 if(status==DEAD_END){ 543 //do nothing 544 }else if(status==LOOP){//TODO 545 //special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat. 546 //Perhaps, the last kmer should be reclassified as a junction and removed. 547 }else if(status==BAD_SEED){ 548 //do nothing 549 }else{ 550 if(status==BAD_OWNER){ 551 release(bb.array, bb.length(), id, kmer); 552 return null; 553 }else if(isBranchCode(status)){ 554 //do nothing 555 }else{ 556 throw new RuntimeException("Bad return value: "+status); 557 } 558 } 559 } 560 // success=extendToRight(bb, leftCounts, rightCounts, id, kmer); 561 // if(!success){ 562 // release(bb.array, bb.length(), id, kmer); 563 // return null; 564 // } 565 if(verbose /*|| true*/){outstream.println("Final length for thread "+id+": "+bb.length());} 566 // if(useOwnership && THREADS==1){assert(claim(bases, bases.length, id, rid));} 567 success=(useOwnership ? doubleClaim(bb, id, kmer) : true); 568 if(verbose /*|| true*/){outstream.println("Success for thread "+id+": "+success);} 569 if(bb.length()>=bases.length+minExtension && (bb.length()>=minContigLen || popBubbles)){ 570 if(success){ 571 bb.reverseComplementInPlace(); 572 return bb.toBytes(); 573 }else{ 574 // assert(false) : bb.length()+", "+id; 575 release(bb.array, bb.length(), id, kmer); 576 return null; 577 } 578 } 579 if(verbose /*|| true*/){outstream.println("Contig was too short for "+id+": "+bb.length());} 580 return null; 581 } 582 583 /*--------------------------------------------------------------*/ 584 585 private final Kmer myKmer=new Kmer(kbig); 586 private final Kmer myKmer2=new Kmer(kbig); 587 588 } 589 590 591 /*--------------------------------------------------------------*/ 592 /*---------------- Contig Processing ----------------*/ 593 /*--------------------------------------------------------------*/ 594 595 @Override makeProcessContigThread(ArrayList<Contig> contigs, AtomicInteger next)596 ProcessContigThread makeProcessContigThread(ArrayList<Contig> contigs, AtomicInteger next){ 597 return new ProcessContigThread(contigs, next); 598 } 599 600 @Override initializeContigs(ArrayList<Contig> contigs)601 public void initializeContigs(ArrayList<Contig> contigs){ 602 tables.clearOwnership(); 603 tables.initializeOwnership(); 604 final Kmer kmer=new Kmer(kbig); 605 { 606 int cnum=0; 607 for(Contig c : contigs){ 608 c.id=cnum; 609 // if(c.leftBranch()){ 610 if(true){ 611 c.leftKmer(kmer); 612 tables.claim(kmer, cnum); 613 } 614 // if(c.rightBranch()){ 615 if(true){ 616 c.rightKmer(kmer); 617 tables.claim(kmer, cnum); 618 } 619 cnum++; 620 } 621 } 622 } 623 624 class ProcessContigThread extends AbstractProcessContigThread { 625 ProcessContigThread(ArrayList<Contig> contigs_, AtomicInteger next_)626 ProcessContigThread(ArrayList<Contig> contigs_, AtomicInteger next_){ 627 super(contigs_, next_); 628 kmerA=new Kmer(kbig); 629 kmerB=new Kmer(kbig); 630 kmerC=new Kmer(kbig); 631 lastExitCondition=BAD_SEED; 632 } 633 634 @Override processContigLeft(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb)635 public void processContigLeft(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb){ 636 // System.err.println("processContigLeft: "+c.id+", "+c.leftCode+", "+c.leftEdges); 637 if(c.leftCode==DEAD_END){return;} 638 639 final Kmer kmer0=c.leftKmer(kmerA); 640 final Kmer kmer=kmerB; 641 642 assert(tables.getCount(kmer0)>0); 643 assert(tables.findOwner(kmer0)==c.id) : tables.findOwner(kmer0)+", "+c.id; 644 645 int leftMaxPos=fillLeftCounts(kmer0, leftCounts); 646 int leftMax=leftCounts[leftMaxPos]; 647 int leftSecondPos=Tools.secondHighestPosition(leftCounts); 648 int leftSecond=leftCounts[leftSecondPos]; 649 650 for(int x=0; x<leftCounts.length; x++){ 651 bb.clear(); 652 final int count=leftCounts[x]; 653 int target=-1; 654 if(count>0 && isJunction(leftMax, count)){ 655 kmer.setFrom(kmer0); 656 kmer.addLeftNumeric(x); 657 assert(tables.getCount(kmer)==count) : count+", "+tables.getCount(kmer); 658 bb.append(AminoAcid.numberToBase[x]); 659 target=exploreRight(kmer, extraCounts, rightCounts, bb); 660 if(verbose){ 661 outstream.println(c.id+"L_F: x="+x+", cnt="+count+", dest="+target 662 +", "+codeStrings[lastExitCondition]+", len="+lastLength+", orient="+lastOrientation); 663 } 664 } 665 if(target>=0){ 666 Edge se=new Edge(c.id, target, lastLength, lastOrientation, count, bb.toBytes()); 667 // System.err.println("Adding "+se+"; x="+x+"; bb="+bb); 668 c.addLeftEdge(se); 669 edgesMadeT++; 670 } 671 } 672 } 673 674 @Override processContigRight(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb)675 public void processContigRight(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb){ 676 // System.err.println("processContigRight: "+c.id+", "+c.rightCode); 677 if(c.rightCode==DEAD_END){return;} 678 679 final Kmer kmer0=c.rightKmer(kmerA); 680 final Kmer kmer=kmerB; 681 682 int rightMaxPos=fillRightCounts(kmer0, rightCounts); 683 int rightMax=rightCounts[rightMaxPos]; 684 int rightSecondPos=Tools.secondHighestPosition(rightCounts); 685 int rightSecond=rightCounts[rightSecondPos]; 686 687 for(int x=0; x<rightCounts.length; x++){ 688 bb.clear(); 689 final int count=rightCounts[x]; 690 int target=-1; 691 if(count>0 && isJunction(rightMax, count)){ 692 kmer.setFrom(kmer0); 693 kmer.addRightNumeric(x); 694 assert(tables.getCount(kmer)==count) : count+", "+tables.getCount(kmer); 695 bb.append(AminoAcid.numberToBase[x]); 696 target=exploreRight(kmer, leftCounts, extraCounts, bb); 697 if(verbose){ 698 outstream.println(c.id+"R_F: x="+x+", cnt="+count+", dest="+target+", "+codeStrings[lastExitCondition]+", len="+lastLength+", orient="+lastOrientation); 699 } 700 } 701 if(target>=0){ 702 lastOrientation|=1; 703 Edge se=new Edge(c.id, target, lastLength, lastOrientation, count, bb.toBytes()); 704 c.addRightEdge(se); 705 edgesMadeT++; 706 } 707 } 708 } 709 exploreRight(Kmer kmer, int[] leftCounts, int[] rightCounts, ByteBuilder bb)710 private int exploreRight(Kmer kmer, int[] leftCounts, int[] rightCounts, ByteBuilder bb){ 711 final Kmer temp=kmerC; 712 int length=1; 713 int owner=-1; 714 lastTarget=-1; 715 for(; length<500; length++){ 716 owner=tables.findOwner(kmer); 717 if(owner>=0){break;} 718 719 final int leftMaxPos=fillLeftCounts(kmer, leftCounts); 720 final int leftMax=leftCounts[leftMaxPos]; 721 final int leftSecondPos=Tools.secondHighestPosition(leftCounts); 722 final int leftSecond=leftCounts[leftSecondPos]; 723 if(isJunction(leftMax, leftSecond)){ 724 lastExitCondition=B_BRANCH; 725 lastLength=length; 726 return -1; 727 } 728 729 final int rightMaxPos=fillRightCounts(kmer, rightCounts); 730 final int rightMax=rightCounts[rightMaxPos]; 731 final int rightSecondPos=Tools.secondHighestPosition(rightCounts); 732 final int rightSecond=rightCounts[rightSecondPos]; 733 734 // outstream.println("* "+Arrays.toString(leftCounts)+", "+Arrays.toString(rightCounts)+", "+rightMaxPos); 735 736 if(rightMax<minCountExtend){ 737 // assert(false) : Arrays.toString(rightCounts); 738 lastExitCondition=DEAD_END; 739 lastLength=length; 740 return -1; 741 }else if(isJunction(rightMax, rightSecond)){ 742 lastExitCondition=F_BRANCH; 743 lastLength=length; 744 return -1; 745 } 746 bb.append(AminoAcid.numberToBase[rightMaxPos]); 747 long x=rightMaxPos; 748 kmer.addRightNumeric(x); 749 } 750 lastLength=length; 751 lastTarget=owner; 752 if(owner>=0){ 753 lastExitCondition=SUCCESS; 754 Contig dest=contigs.get(owner); 755 dest.leftKmer(temp); 756 757 // if(temp.equals(kmer)){ 758 // lastOrientation=temp.sameOrientation(kmer) ? 0 : 1; 759 // }else{ 760 // dest.rightKmer(temp); 761 // if(temp.equals(kmer)){ 762 // lastOrientation=temp.sameOrientation(kmer) ? 2 : 3; 763 // } 764 // } 765 766 if(temp.equals(kmer)){ 767 lastOrientation=0; 768 }else{ 769 dest.rightKmer(temp); 770 if(temp.equals(kmer)){ 771 lastOrientation=2; 772 }else{ 773 assert(false); 774 } 775 } 776 }else{ 777 lastExitCondition=TOO_LONG; 778 } 779 return owner; 780 } 781 782 final Kmer kmerA, kmerB, kmerC; 783 784 } 785 786 787 /*--------------------------------------------------------------*/ 788 /*---------------- Extension Methods ----------------*/ 789 /*--------------------------------------------------------------*/ 790 791 findInsertSize(Read r1, Read r2, int[] rightCounts, Kmer kmer1, Kmer kmer2)792 public int findInsertSize(Read r1, Read r2, int[] rightCounts, Kmer kmer1, Kmer kmer2){ 793 kmer1=tables.rightmostKmer(r1.bases, r1.length(), kmer1); 794 kmer2=tables.rightmostKmer(r2.bases, r2.length(), kmer2); 795 if(kmer1==null || kmer2==null){return -1;} 796 final int x=measureInsert(kmer1, kmer2, 24000, rightCounts); 797 if(x<0){return -1;} 798 return r1.length()+r2.length()+x-kbig;//TODO: May be off by 1. 799 } 800 801 /* (non-Javadoc) 802 * @see assemble.Tadpole#extendRead(stream.Read, stream.ByteBuilder, int[], int[], int) 803 */ 804 @Override extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance)805 public int extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance) { 806 return extendRead(r, bb, leftCounts, rightCounts, distance, getLocalKmer()); 807 } 808 809 @Override extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance, final Kmer kmer)810 public int extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance, final Kmer kmer){ 811 final int initialLen=r.length(); 812 if(initialLen<kbig){return 0;} 813 bb.setLength(0); 814 bb.append(r.bases); 815 Kmer temp=tables.rightmostKmer(bb, kmer); 816 if(temp==null){return 0;} 817 final int extension=extendToRight2_inner(bb, leftCounts, rightCounts, distance, true, kmer); 818 if(extension>0){ 819 r.bases=bb.toBytes(); 820 if(r.quality!=null){ 821 final byte q=Shared.FAKE_QUAL; 822 r.quality=KillSwitch.copyOf(r.quality, r.bases.length); 823 for(int i=initialLen; i<r.quality.length; i++){ 824 r.quality[i]=q; 825 } 826 } 827 } 828 assert(extension==r.length()-initialLen); 829 return extension; 830 } 831 832 /** Returns distance between the two kmers, or -1 */ measureInsert(final Kmer kmer1, final Kmer kmer2, final int maxlen, final int[] rightCounts)833 public int measureInsert(final Kmer kmer1, final Kmer kmer2, final int maxlen, final int[] rightCounts){ 834 int len=0; 835 836 { 837 int count=tables.getCount(kmer2); 838 if(count<minCountSeed){return -1;} 839 } 840 841 int count=tables.getCount(kmer1); 842 if(count<minCountSeed){return -1;} 843 if(count<minCountSeed){ 844 if(verbose){outstream.println("Returning because count was too low: "+count);} 845 return -1; 846 } 847 848 int rightMaxPos=fillRightCounts(kmer1, rightCounts); 849 int rightMax=rightCounts[rightMaxPos]; 850 // int rightSecondPos=Tools.secondHighestPosition(rightCounts); 851 // int rightSecond=rightCounts[rightSecondPos]; 852 853 if(rightMax<minCountExtend){return -1;} 854 // if(isJunction(rightMax, rightSecond)){return -1;} 855 856 while(!kmer1.equals(kmer2) && len<maxlen){ 857 858 //Generate the new kmer 859 // final byte b=AminoAcid.numberToBase[rightMaxPos]; 860 final long x=rightMaxPos; 861 kmer1.addRightNumeric(x); 862 863 assert(tables.getCount(kmer1)==rightMax); 864 count=rightMax; 865 866 assert(count>=minCountExtend) : count; 867 868 rightMaxPos=fillRightCounts(kmer1, rightCounts); 869 rightMax=rightCounts[rightMaxPos]; 870 // rightSecondPos=Tools.secondHighestPosition(rightCounts); 871 // rightSecond=rightCounts[rightSecondPos]; 872 873 if(verbose){ 874 outstream.println("kmer: "+kmer1); 875 outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts)); 876 outstream.println("rightMaxPos="+rightMaxPos); 877 outstream.println("rightMax="+rightMax); 878 // outstream.println("rightSecondPos="+rightSecondPos); 879 // outstream.println("rightSecond="+rightSecond); 880 } 881 882 if(rightMax<minCountExtend){ 883 if(verbose){outstream.println("Breaking because highest right was too low:"+rightMax);} 884 break; 885 } 886 887 // if(isJunction(rightMax, rightSecond)){return -1;} 888 889 len++; 890 } 891 return len>=maxlen ? -1 : len; 892 } 893 894 895 896 /** 897 * Extend these bases into a contig. 898 * Stops at both left and right junctions. 899 * Claims ownership. 900 */ extendToRight(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int id, Kmer kmer)901 public int extendToRight(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int id, Kmer kmer){ 902 if(bb.length()<kbig){return BAD_SEED;} 903 kmer.clear(); 904 905 kmer=tables.rightmostKmer(bb, kmer); 906 if(kmer==null || kmer.len<kbig){return BAD_SEED;} 907 assert(kmer.len==kbig); 908 909 /* Now the trailing kmer has been initialized. */ 910 911 if(verbose){ 912 outstream.println("extendToRight kmer="+kmer+", bb="+bb); 913 } 914 915 HashArrayU1D table=tables.getTable(kmer); 916 int count=table.getValue(kmer); 917 if(count<minCountSeed){ 918 if(verbose){outstream.println("Returning because count was too low: "+count);} 919 return BAD_SEED; 920 } 921 922 int owner=(useOwnership ? table.getOwner(kmer) : id); 923 if(verbose){outstream.println("Owner: "+owner);} 924 if(owner>id){return BAD_OWNER;} 925 926 int leftMaxPos=0; 927 int leftMax=minCountExtend; 928 int leftSecondPos=1; 929 int leftSecond=0; 930 931 if(leftCounts!=null){ 932 leftMaxPos=fillLeftCounts(kmer, leftCounts); 933 leftMax=leftCounts[leftMaxPos]; 934 leftSecondPos=Tools.secondHighestPosition(leftCounts); 935 leftSecond=leftCounts[leftSecondPos]; 936 } 937 938 int rightMaxPos=fillRightCounts(kmer, rightCounts); 939 int rightMax=rightCounts[rightMaxPos]; 940 int rightSecondPos=Tools.secondHighestPosition(rightCounts); 941 int rightSecond=rightCounts[rightSecondPos]; 942 943 if(verbose){ 944 outstream.println("kmer: "+toText(kmer)); 945 outstream.println("Counts: "+count+", "+(leftCounts==null ? "null" : Arrays.toString(leftCounts))+", "+Arrays.toString(rightCounts)); 946 outstream.println("leftMaxPos="+leftMaxPos); 947 outstream.println("leftMax="+leftMax); 948 outstream.println("leftSecondPos="+leftSecondPos); 949 outstream.println("leftSecond="+leftSecond); 950 outstream.println("rightMaxPos="+rightMaxPos); 951 outstream.println("rightMax="+rightMax); 952 outstream.println("rightSecondPos="+rightSecondPos); 953 outstream.println("rightSecond="+rightSecond); 954 } 955 956 if(rightMax<minCountExtend){return DEAD_END;} 957 if(isJunction(rightMax, rightSecond)){//Returning here is fine because nothing can be added 958 if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");} 959 return isJunction(leftMax, leftSecond) ? D_BRANCH : F_BRANCH; 960 } 961 if(isJunction(leftMax, leftSecond)){//Returning here is necessary, but this should mean the the length is exactly K 962 assert(bb.length()==kbig) : bb.length()+", "+kbig+", "+leftMax+", "+leftSecond; 963 if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");} 964 return B_BRANCH; 965 } 966 967 if(useOwnership){ 968 owner=table.setOwner(kmer, id); 969 if(verbose){outstream.println("A. Owner is now "+id+" for kmer "+kmer);} 970 if(owner!=id){ 971 if(verbose){outstream.println("Returning early because owner was "+owner+" for thread "+id+".");} 972 return BAD_OWNER; 973 } 974 } 975 976 final int maxLen=Tools.min((extendRight<0 ? maxContigLen : bb.length()+extendRight), maxContigLen); 977 978 while(owner==id && bb.length()<maxLen){ 979 980 //Generate the new kmer 981 final byte b=AminoAcid.numberToBase[rightMaxPos]; 982 983 //Now consider the next kmer 984 final long evicted=kmer.addRightNumeric(rightMaxPos); 985 986 table=tables.getTable(kmer); 987 988 assert(table.getValue(kmer)==rightMax || rightMax==0); 989 count=rightMax; 990 991 assert(count>=minCountExtend) : count; 992 993 if(leftCounts!=null){ 994 leftMaxPos=fillLeftCounts(kmer, leftCounts); 995 leftMax=leftCounts[leftMaxPos]; 996 leftSecondPos=Tools.secondHighestPosition(leftCounts); 997 leftSecond=leftCounts[leftSecondPos]; 998 } 999 1000 rightMaxPos=fillRightCounts(kmer, rightCounts); 1001 rightMax=rightCounts[rightMaxPos]; 1002 rightSecondPos=Tools.secondHighestPosition(rightCounts); 1003 rightSecond=rightCounts[rightSecondPos]; 1004 1005 if(verbose){ 1006 outstream.println("kmer: "+toText(kmer)); 1007 outstream.println("Counts: "+count+", "+(leftCounts==null ? "null" : Arrays.toString(leftCounts))+", "+Arrays.toString(rightCounts)); 1008 outstream.println("leftMaxPos="+leftMaxPos); 1009 outstream.println("leftMax="+leftMax); 1010 outstream.println("leftSecondPos="+leftSecondPos); 1011 outstream.println("leftSecond="+leftSecond); 1012 outstream.println("rightMaxPos="+rightMaxPos); 1013 outstream.println("rightMax="+rightMax); 1014 outstream.println("rightSecondPos="+rightSecondPos); 1015 outstream.println("rightSecond="+rightSecond); 1016 } 1017 1018 final boolean fbranch=isJunction(rightMax, rightSecond); 1019 final boolean bbranch=isJunction(leftMax, leftSecond); 1020 final boolean hbranch=(leftCounts!=null && leftMaxPos!=evicted && branchMult1>0); 1021 if(bbranch){ 1022 if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+"); " 1023 + "("+fbranch+", "+bbranch+", "+hbranch+")");} 1024 return fbranch ? D_BRANCH : B_BRANCH; 1025 }else if(hbranch){ 1026 if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", " 1027 + ""+leftMax+", "+leftSecond+"); ("+fbranch+", "+bbranch+", "+hbranch+")");} 1028 if(verbose){outstream.println("Hidden branch: leftMaxPos!=evicted ("+leftMaxPos+"!="+evicted+")" + 1029 "\nleftMaxPos="+leftMaxPos+", leftMax="+leftMax+", leftSecondPos="+leftSecondPos+", leftSecond="+leftSecond);} 1030 return fbranch ? D_BRANCH : B_BRANCH; 1031 } 1032 1033 bb.append(b); 1034 if(verbose){outstream.println("Added base "+(char)b);} 1035 1036 if(useOwnership){ 1037 owner=table.getOwner(kmer); 1038 if(verbose){outstream.println("Owner is initially "+id+" for key "+kmer);} 1039 if(owner==id){//loop detection 1040 if(verbose /*|| true*/){ 1041 // outstream.println(new String(bb.array, bb.length()-31, 31)); 1042 outstream.println(bb); 1043 outstream.println(toText(kmer)); 1044 outstream.println("Breaking because owner was "+owner+" for thread "+id+"."); 1045 } 1046 return LOOP; 1047 } 1048 owner=table.setOwner(kmer, id); 1049 if(verbose){outstream.println("B. Owner is now "+id+" for kmer "+kmer);} 1050 } 1051 1052 if(fbranch){ 1053 if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+"); " 1054 + "("+fbranch+", "+bbranch+", "+hbranch+")");} 1055 return F_BRANCH; 1056 }else if(rightMax<minCountExtend){ 1057 if(verbose){outstream.println("B: Breaking because highest right was too low:"+rightMax);} 1058 return DEAD_END; 1059 } 1060 } 1061 assert(owner!=id); 1062 if(verbose /*|| true*/){ 1063 outstream.println("Current contig: "+bb+"\nReturning because owner was "+owner+" for thread "+id+"."); 1064 } 1065 return BAD_OWNER; 1066 } 1067 1068 @Override extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase)1069 public int extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase){ 1070 initializeThreadLocals(); 1071 return extendToRight2(bb, leftCounts, rightCounts, distance, includeJunctionBase, getLocalKmer()); 1072 } 1073 1074 @Override extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer)1075 public int extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer){ 1076 if(verbose || verbose2){outstream.println("Entering extendToRight2 (no kmers).");} 1077 final int initialLength=bb.length(); 1078 if(initialLength<kbig){return 0;} 1079 kmer.clear(); 1080 1081 kmer=tables.rightmostKmer(bb, kmer); 1082 if(kmer==null || kmer.len<kbig){return 0;} 1083 assert(kmer.len==kbig); 1084 1085 return extendToRight2_inner(bb, leftCounts, rightCounts, distance, includeJunctionBase, kmer); 1086 } 1087 1088 /** 1089 * Extend these bases to the right by at most 'distance'. 1090 * Stops at right junctions only. 1091 * Does not claim ownership. 1092 */ extendToRight2_inner(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer)1093 private int extendToRight2_inner(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer){ 1094 if(verbose || verbose2){outstream.println("Entering extendToRight2_inner (with kmers).");} 1095 final int initialLength=bb.length(); 1096 assert(kmer.len==kbig) : kmer.len+", "+kbig+", "+bb.length(); 1097 1098 HashArrayU1D table=tables.getTable(kmer); 1099 int count=table.getValue(kmer); 1100 if(count<minCountSeed){ 1101 if(verbose || verbose2){outstream.println("Returning because count was too low: "+count+"<"+minCountSeed);} 1102 return 0; 1103 } 1104 1105 int leftMaxPos=0; 1106 int leftMax=minCountExtend; 1107 int leftSecondPos=1; 1108 int leftSecond=0; 1109 1110 if(leftCounts!=null){ 1111 leftMaxPos=fillLeftCounts(kmer, leftCounts); 1112 leftMax=leftCounts[leftMaxPos]; 1113 leftSecondPos=Tools.secondHighestPosition(leftCounts); 1114 leftSecond=leftCounts[leftSecondPos]; 1115 } 1116 1117 int rightMaxPos=fillRightCounts(kmer, rightCounts); 1118 int rightMax=rightCounts[rightMaxPos]; 1119 int rightSecondPos=Tools.secondHighestPosition(rightCounts); 1120 int rightSecond=rightCounts[rightSecondPos]; 1121 1122 if(verbose){ 1123 outstream.println("kmer: "+toText(kmer)); 1124 outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts)); 1125 outstream.println("rightMaxPos="+rightMaxPos); 1126 outstream.println("rightMax="+rightMax); 1127 outstream.println("rightSecondPos="+rightSecondPos); 1128 outstream.println("rightSecond="+rightSecond); 1129 } 1130 1131 if(rightMax<minCountExtend){ 1132 if(verbose || verbose2){outstream.println("Returning because rightMax was too low: "+rightMax+"<"+minCountExtend+"\n"+count+", "+Arrays.toString(rightCounts));} 1133 return 0; 1134 } 1135 if(isJunction(rightMax, rightSecond, leftMax, leftSecond)){ 1136 if(verbose || verbose2){outstream.println("Returning because isJunction: "+rightMax+", "+rightSecond+"; "+leftMax+", "+leftSecond);} 1137 return 0; 1138 } 1139 1140 final int maxLen=Tools.min(bb.length()+distance, maxContigLen); 1141 1142 while(bb.length()<maxLen){ 1143 1144 //Generate the new kmer 1145 final byte b=AminoAcid.numberToBase[rightMaxPos]; 1146 1147 //Now consider the next kmer 1148 final long evicted=kmer.addRightNumeric(rightMaxPos); 1149 1150 table=tables.getTable(kmer); 1151 1152 assert(table.getValue(kmer)==rightMax || rightMax==0); 1153 count=rightMax; 1154 1155 assert(count>=minCountExtend) : count; 1156 1157 if(leftCounts!=null){ 1158 leftMaxPos=fillLeftCounts(kmer, leftCounts); 1159 leftMax=leftCounts[leftMaxPos]; 1160 leftSecondPos=Tools.secondHighestPosition(leftCounts); 1161 leftSecond=leftCounts[leftSecondPos]; 1162 } 1163 1164 rightMaxPos=fillRightCounts(kmer, rightCounts); 1165 rightMax=rightCounts[rightMaxPos]; 1166 rightSecondPos=Tools.secondHighestPosition(rightCounts); 1167 rightSecond=rightCounts[rightSecondPos]; 1168 1169 if(verbose){ 1170 outstream.println("kmer: "+toText(kmer)); 1171 outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts)); 1172 outstream.println("rightMaxPos="+rightMaxPos); 1173 outstream.println("rightMax="+rightMax); 1174 outstream.println("rightSecondPos="+rightSecondPos); 1175 outstream.println("rightSecond="+rightSecond); 1176 } 1177 1178 if(isJunction(rightMax, rightSecond, leftMax, leftSecond)){ 1179 if(includeJunctionBase && kmer.key()==kmer.array2()){//TODO: Does not work on palindromes. 1180 bb.append(b); 1181 if(verbose){outstream.println("Added base "+(char)b);} 1182 } 1183 break; 1184 } 1185 1186 if(leftCounts!=null && leftMaxPos!=evicted){ 1187 if(verbose){outstream.println("B: Breaking because of hidden branch: leftMaxPos!=evicted ("+leftMaxPos+"!="+evicted+")" + 1188 "\nleftMaxPos="+leftMaxPos+", leftMax="+leftMax+", leftSecondPos="+leftSecondPos+", leftSecond="+leftSecond);} 1189 if(includeJunctionBase && kmer.key()==kmer.array2()){//TODO: Does not work on palindromes. 1190 bb.append(b); 1191 if(verbose){outstream.println("Added base "+(char)b);} 1192 } 1193 break; 1194 } 1195 1196 bb.append(b); 1197 if(verbose){outstream.println("Added base "+(char)b);} 1198 1199 if(rightMax<minCountExtend){ 1200 if(verbose || verbose2){outstream.println("C: Breaking because highest right was too low: "+rightMax+"<"+minCountExtend);} 1201 break; 1202 } 1203 } 1204 if(verbose || verbose2){outstream.println("Extended by "+(bb.length()-initialLength));} 1205 return bb.length()-initialLength; 1206 } 1207 1208 1209 /*--------------------------------------------------------------*/ 1210 /*---------------- Junk Detection ----------------*/ 1211 /*--------------------------------------------------------------*/ 1212 1213 @Override isJunk(Read r)1214 public boolean isJunk(Read r){ 1215 boolean junk=isJunk(r, localRightCounts.get(), getLocalKmer()); 1216 return junk; 1217 } 1218 1219 @Override isJunk(Read r, final int[] counts, Kmer kmer)1220 public boolean isJunk(Read r, final int[] counts, Kmer kmer){ 1221 final int blen=r.length(); 1222 if(blen<kbig){return true;} 1223 final byte[] bases=r.bases; 1224 kmer.clearFast(); 1225 assert(kmer.len==0); 1226 1227 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */ 1228 for(int i=0; i<kbig; i++){ 1229 kmer.addRight(bases[i]); 1230 } 1231 1232 if(kmer.len>=kbig){ 1233 int maxPos=fillLeftCounts(kmer, counts); 1234 if(counts[maxPos]>0){return false;} 1235 } 1236 1237 final boolean paired=(r.mateLength()>=kbig); 1238 int maxDepth=0; 1239 { 1240 for(int i=kbig; i<blen; i++){ 1241 kmer.addRight(bases[i]); 1242 if(kmer.len>=kbig){ 1243 int depth=getCount(kmer); 1244 if(depth>maxDepth){ 1245 maxDepth=depth; 1246 if(maxDepth>1 && (!paired || maxDepth>2)){return false;} 1247 } 1248 } 1249 } 1250 } 1251 1252 if(kmer.len>=kbig && !paired){ 1253 int maxPos=fillRightCounts(kmer, counts); 1254 if(counts[maxPos]>0){return false;} 1255 } 1256 return true; 1257 } 1258 1259 @Override hasKmersAtOrBelow(Read r, int tooLow, final float fraction)1260 public boolean hasKmersAtOrBelow(Read r, int tooLow, final float fraction){ 1261 return hasKmersAtOrBelow(r, tooLow, fraction, getLocalKmer()); 1262 } 1263 1264 @Override hasKmersAtOrBelow(Read r, final int tooLow, final float fraction, Kmer kmer)1265 public boolean hasKmersAtOrBelow(Read r, final int tooLow, final float fraction, Kmer kmer){ 1266 final int blen=r.length(); 1267 if(blen<kbig){return true;} 1268 final byte[] bases=r.bases; 1269 kmer.clearFast(); 1270 assert(kmer.len==0) : kmer.len+", "+kmer; 1271 1272 // outstream.println("\n"+new String(r.bases)+":"); 1273 1274 final int limit=Tools.max(1, Math.round((bases.length-kbig+1)*fraction)); 1275 int valid=0, invalid=0; 1276 { 1277 for(int i=0; i<blen; i++){ 1278 kmer.addRight(bases[i]); 1279 if(kmer.len>=kbig){ 1280 int depth=getCount(kmer); 1281 // outstream.println("depth="+depth+", kmer="+kmer); 1282 if(depth>tooLow){valid++;} 1283 else{ 1284 invalid++; 1285 if(invalid>=limit){return true;} 1286 } 1287 } 1288 } 1289 } 1290 1291 //Compensate for nocalls changing the expected kmer count 1292 final int limit2=Tools.max(1, Math.round((valid+invalid)*fraction)); 1293 return valid<1 || invalid>=limit2; 1294 } 1295 1296 1297 /*--------------------------------------------------------------*/ 1298 /*---------------- Error Correction ----------------*/ 1299 /*--------------------------------------------------------------*/ 1300 1301 @Override errorCorrect(Read r)1302 public int errorCorrect(Read r){ 1303 initializeThreadLocals(); 1304 int corrected=errorCorrect(r, localLeftCounts.get(), localRightCounts.get(), localIntList.get(), localIntList2.get(), 1305 localByteBuilder.get(), localByteBuilder2.get(), localTracker.get(), localBitSet.get(), getLocalKmer(), getLocalKmer2()); 1306 return corrected; 1307 } 1308 1309 @Override errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2, final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, Kmer kmer, Kmer kmer2)1310 public int errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2, 1311 final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, Kmer kmer, Kmer kmer2){ 1312 return errorCorrect(r, leftCounts, rightCounts, counts, counts2, bb, bb2, tracker, bs, kmer, kmer2); 1313 } 1314 hasErrorsFast(byte[] bases, Kmer kmer)1315 boolean hasErrorsFast(byte[] bases, Kmer kmer){ 1316 if(bases.length<=kbig){return false;} 1317 int prev=-99; 1318 1319 kmer.clearFast(); 1320 final int incr=Tools.mid(1, kbig/2, 9), mcc=minCountCorrect(); 1321 for(int i=0, next=kbig-1; i<bases.length; i++){ 1322 kmer.addRight(bases[i]); 1323 if(i+1>=kbig){ 1324 if(kmer.len()<kbig){ 1325 assert(new String(bases).indexOf('N')>=0); 1326 return true; 1327 } 1328 if(i==next){ 1329 assert(kmer.len()>=kbig); 1330 int count=getCount(kmer); 1331 final int min=Tools.min(count, prev), max=Tools.max(count, prev); 1332 if(prev>-99 && (count<mcc || (i>0 && (isError(max+1, min-1))))){ 1333 return true; 1334 } 1335 prev=count; 1336 next=Tools.min(bases.length-1, next+incr); 1337 } 1338 }else{ 1339 assert(kmer.len()<kbig); 1340 } 1341 } 1342 return false; 1343 } 1344 1345 public int errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, IntList counts, IntList counts2, 1346 final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, final Kmer kmer, final Kmer regenKmer){ 1347 1348 // verbose=r.numericID==0; 1349 1350 final byte[] bases=r.bases; 1351 final byte[] quals=r.quality; 1352 tracker.clear(); 1353 if(!r.containsUndefined() && !hasErrorsFast(bases, kmer)){return 0;} 1354 1355 int valid=tables.fillCounts(bases, counts, kmer); 1356 if(valid<2){return 0;} 1357 int possibleErrors=countErrors(counts, quals); 1358 if(possibleErrors<0){return 0;} 1359 final float expectedErrors=r.expectedErrors(true, r.length()); 1360 final Rollback roll=ECC_ROLLBACK ? new Rollback(r, counts) : null; 1361 1362 int correctedPincer=0; 1363 int correctedTail=0; 1364 int correctedBrute=0; 1365 int correctedReassemble=0; 1366 1367 if(ECC_PINCER){ 1368 correctedPincer+=errorCorrectPincer(bases, quals, leftCounts, rightCounts, counts, bb, tracker, errorExtensionPincer, kmer); 1369 } 1370 1371 if(ECC_TAIL || ECC_ALL){ 1372 int start=(ECC_ALL ? 0 : counts.size-kbig-1); 1373 // if(ECC_PINCER && tracker!=null && tracker.detected>correctedPincer){start=start-kbig;} 1374 correctedTail+=errorCorrectTail(bases, quals, leftCounts, rightCounts, counts, bb, tracker, start, errorExtensionTail, kmer); 1375 r.reverseComplement(); 1376 1377 counts.reverse(); 1378 correctedTail+=errorCorrectTail(bases, quals, leftCounts, rightCounts, counts, bb, tracker, start, errorExtensionTail, kmer); 1379 r.reverseComplement(); 1380 counts.reverse(); 1381 } 1382 1383 if(ECC_REASSEMBLE){ 1384 if((correctedPincer<1 && correctedTail<1) || countErrors(counts, quals)>0){ 1385 correctedReassemble=reassemble(bases, quals, rightCounts, counts, counts2, tracker, errorExtensionReassemble, bb, bb2, kmer, regenKmer, bs); 1386 } 1387 } 1388 1389 assert(correctedPincer+correctedTail+correctedReassemble+correctedBrute==tracker.corrected()) 1390 : correctedPincer+", "+correctedTail+", "+correctedReassemble+", "+correctedBrute+", "+tracker; 1391 1392 if(ECC_ROLLBACK && (tracker.corrected()>0 || tracker.rollback)){ 1393 1394 if(!tracker.rollback && quals!=null && tracker.corrected()>3){ 1395 float mult=Tools.max(1, 0.5f*(0.5f+0.01f*r.length()));//1 for a 150bp read. 1396 if(countErrors(counts, quals)>0 && tracker.corrected()>mult+expectedErrors){tracker.rollback=true;} 1397 else if(tracker.corrected()>2.5f*mult+expectedErrors){tracker.rollback=true;} 1398 } 1399 1400 // boolean printed=false; 1401 IntList counts0=roll.counts0; 1402 for(int i=0; !tracker.rollback && i<counts.size; i++){ 1403 int a=Tools.max(0, counts0.get(i)); 1404 int b=Tools.max(0, counts.get(i)); 1405 // assert(b+1>=a) : "Z: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts; 1406 if(b<a-1 && !isSimilar(a, b)){ 1407 // assert(false) : "Y: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts; 1408 if(verbose){outstream.println("Y: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts);} 1409 tracker.rollback=true; 1410 } 1411 // else if(b<a-1 && !printed){ 1412 // assert(false); 1413 // if(verbose){outstream.println("X: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts);} 1414 // printed=true; 1415 // } 1416 } 1417 1418 if(tracker.rollback){ 1419 roll.rollback(r, counts); 1420 tracker.clearCorrected(); 1421 return 0; 1422 } 1423 } 1424 1425 if(MARK_BAD_BASES>0 && (!MARK_ERROR_READS_ONLY || countErrors(counts, quals)>0 || 1426 r.expectedErrors(false, r.length())>3)){ 1427 int marked=markBadBases(bases, quals, counts, bs, MARK_BAD_BASES, MARK_DELTA_ONLY, MARK_QUALITY); 1428 tracker.marked=marked; 1429 } 1430 1431 return tracker.corrected(); 1432 } 1433 errorCorrectPincer(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension, final Kmer kmer)1434 public int errorCorrectPincer(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, 1435 final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension, final Kmer kmer){ 1436 1437 int detected=0; 1438 int corrected=0; 1439 1440 //a is the index of the left kmer 1441 //b is a+1 1442 //c is d-1 1443 //d is the index of the right kmer 1444 //the base between the kmers is at a+k 1445 for(int a=0, d=kbig+1; d<counts.size; a++, d++){ 1446 final int aCount=counts.get(a); 1447 final int bCount=counts.get(a+1); 1448 final int cCount=counts.get(d-1); 1449 final int dCount=counts.get(d); 1450 final byte qb=(quals==null ? 20 : quals[a+kbig]); 1451 if(isError(aCount, bCount, qb) && isError(dCount, cCount, qb) && isSimilar(aCount, dCount)){ 1452 if(verbose){ 1453 outstream.println("Found error: "+aCount+", "+bCount+", "+cCount+", "+dCount); 1454 } 1455 //Looks like a 1bp substitution; attempt to correct. 1456 detected++; 1457 int ret=correctSingleBasePincer(a, d, bases, quals, leftBuffer, rightBuffer, counts, bb, errorExtension, kmer); 1458 corrected+=ret; 1459 if(verbose){ 1460 outstream.println("Corrected error."); 1461 } 1462 }else{ 1463 if(verbose){ 1464 outstream.println("Not an error: "+aCount+", "+bCount+", "+cCount+", "+dCount+ 1465 "; "+isError(aCount, bCount, qb)+", "+isError(dCount, cCount, qb)+", "+isSimilar(aCount, dCount)); 1466 } 1467 } 1468 } 1469 1470 // if(detected==0 && counts.get(0)>2 && counts.get(counts.size-1)>2){ 1471 // assert(!verbose); 1472 // verbose=true; 1473 // outstream.println("\n"+counts); 1474 // errorCorrectPincer(bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, tracker); 1475 // assert(false); 1476 // } 1477 1478 { 1479 tracker.detectedPincer+=detected; 1480 tracker.correctedPincer+=corrected; 1481 } 1482 1483 return corrected; 1484 } 1485 errorCorrectTail(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int startPos, final int errorExtension, final Kmer kmer)1486 public int errorCorrectTail(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, 1487 final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int startPos, final int errorExtension, final Kmer kmer){ 1488 if(bases.length<kbig+2+errorExtension+deadZone){return 0;} 1489 int detected=0; 1490 int corrected=0; 1491 1492 //a is the index of the left kmer 1493 //b is a+1 1494 //the base between the kmers is at a+k 1495 for(int a=Tools.max(startPos, errorExtension), lim=counts.size-deadZone-1; a<lim; a++){//errorExtension-1 1496 final int aCount=counts.get(a); 1497 final int bCount=counts.get(a+1); 1498 final byte qb=(quals==null ? 20 : quals[a+kbig]); 1499 if(isError(aCount, bCount, qb) && isSimilar(aCount, a-errorExtension, a-1, counts) && isError(aCount, a+2, a+kbig, counts)){ 1500 if(verbose){ 1501 outstream.println("Found error: "+aCount+", "+bCount); 1502 } 1503 //Assume like a 1bp substitution; attempt to correct. 1504 detected++; 1505 int ret=correctSingleBaseRight(a, bases, quals, leftBuffer, rightBuffer, counts, bb, errorExtension, kmer); 1506 corrected+=ret; 1507 if(verbose){ 1508 outstream.println("Corrected error."); 1509 } 1510 }else{ 1511 if(verbose){ 1512 outstream.println("Not an error: "+aCount+", "+bCount+ 1513 "; "+isError(aCount, bCount, qb)+", "+isSimilar(aCount, a-errorExtension, a-1, counts)+", "+isError(aCount, a+2, a+kbig, counts)); 1514 } 1515 } 1516 } 1517 1518 // if(detected==0 && counts.get(0)>2 && counts.get(counts.size-1)>2){ 1519 // assert(!verbose); 1520 // verbose=true; 1521 // outstream.println("\n"+counts); 1522 // errorCorrectPincer(bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, tracker); 1523 // assert(false); 1524 // } 1525 1526 { 1527 tracker.detectedTail+=detected; 1528 tracker.correctedTail+=corrected; 1529 } 1530 1531 return corrected; 1532 } 1533 1534 @Override reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts, final int errorExtension, final Kmer kmer, final Kmer regenKmer)1535 public int reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts, 1536 final int errorExtension, final Kmer kmer, final Kmer regenKmer){ 1537 final int length=bb.length(); 1538 if(length<kbig+1+deadZone){return 0;} 1539 final byte[] bases=bb.array; 1540 int detected=0; 1541 int corrected=0; 1542 1543 //Initialize the kmer 1544 kmer.clear(); 1545 1546 //a is the index of the first base of the left kmer 1547 //b=a+1 is the index of the next base 1548 //ca=a-kbig is the index of the next base 1549 //cb=a-kbig is the index of the next base 1550 //the base between the kmers is at a+k 1551 for(int a=0, lim=length-deadZone-1; a<lim; a++){ 1552 //Update the kmer 1553 kmer.addRight(bases[a]); 1554 1555 if(verbose){ 1556 outstream.println("kmer.len(): "+kmer.len()+" vs "+kbig+"; a="+a); 1557 } 1558 1559 if(kmer.len()>=kbig){ 1560 1561 final int b=a+1; 1562 final int ca=a-kbig+1; 1563 final int cb=ca+1; 1564 1565 final int aCount=counts.get(ca); 1566 final int bCount=counts.get(cb); 1567 final byte qb=(quals==null ? 20 : quals[b]); 1568 1569 if(verbose){ 1570 outstream.println("ca="+ca+", cb="+cb+"; aCount="+aCount+", bCount="+bCount); 1571 outstream.println(isError(aCount, bCount, qb)+", "+isSimilar(aCount, ca-errorExtension, ca-1, counts)+ 1572 ", "+isError(aCount, ca+2, ca+kbig, counts)); 1573 } 1574 1575 // if(isError(aCount, bCount) && isSimilar(aCount, ca-errorExtension, ca-1, counts) && isError(aCount, ca+2, ca+kbig, counts)){ 1576 if(isSubstitution(ca, errorExtension, qb, counts)){ 1577 if(verbose){ 1578 outstream.println("***Found error: "+aCount+", "+bCount); 1579 } 1580 //Assume like a 1bp substitution; attempt to correct. 1581 1582 int rightMaxPos=fillRightCounts(kmer, rightCounts); 1583 int rightMax=rightCounts[rightMaxPos]; 1584 int rightSecondPos=Tools.secondHighestPosition(rightCounts); 1585 int rightSecond=rightCounts[rightSecondPos]; 1586 1587 byte base=bases[b]; 1588 byte num=AminoAcid.baseToNumber[base]; 1589 1590 if(rightMax>=minCountExtend){ 1591 detected++; 1592 if(num==rightMax){ 1593 detected--; 1594 // bases2[b]=base; 1595 }else if((isError(rightMax, rightSecond, qb) || !isJunction(rightMax, rightSecond)) && isSimilar(aCount, rightMax)){ 1596 bases[b]=AminoAcid.numberToBase[rightMaxPos]; 1597 corrected++; 1598 tables.regenerateCounts(bases, counts, ca, regenKmer); 1599 if(verbose){outstream.println("Corrected error: "+num+"->"+rightMaxPos+". New counts:\n"+counts);} 1600 } 1601 1602 // else if(rightSecond>=minCountExtend && isJunction(rightMax, rightSecond) && isSimilar(aCount, rightSecond) 1603 // && !isSimilar(aCount, rightMax)){//This branch may not be very safe. 1604 // bases2[b]=AminoAcid.numberToBase[rightSecondPos]; 1605 // corrected++; 1606 // if(verbose){outstream.println("Corrected error.");} 1607 // } 1608 } 1609 1610 }else{ 1611 if(verbose){ 1612 outstream.println("Not an error: "+aCount+", "+bCount+ 1613 "; "+isError(aCount, bCount, qb)+", "+isSimilar(aCount, a-errorExtension, a-1, counts)+", "+isError(aCount, a+2, a+kbig, counts)); 1614 } 1615 } 1616 } 1617 } 1618 1619 return corrected; 1620 } 1621 correctSingleBasePincer(final int a, final int d, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final int errorExtension, final Kmer kmer0)1622 private int correctSingleBasePincer(final int a, final int d, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, 1623 final IntList counts, final ByteBuilder bb, final int errorExtension, final Kmer kmer0){ 1624 final byte leftReplacement, rightReplacement; 1625 final int loc=a+kbig; 1626 { 1627 bb.clear(); 1628 Kmer kmer=getKmer(bases, a, kmer0); 1629 if(kmer==null){return 0;} 1630 int extension=extendToRight2_inner(bb, null, rightBuffer, errorExtension, true, kmer); 1631 if(extension<errorExtension){return 0;} 1632 for(int i=1; i<extension; i++){ 1633 if(bb.get(i)!=bases[loc+i]){return 0;} 1634 } 1635 leftReplacement=bb.get(0); 1636 } 1637 { 1638 bb.clear(); 1639 Kmer kmer=getKmer(bases, d, kmer0); 1640 if(kmer==null){return 0;} 1641 kmer.rcomp(); 1642 int extension=extendToRight2_inner(bb, null, rightBuffer, errorExtension, true, kmer); 1643 if(extension<errorExtension){return 0;} 1644 bb.reverseComplementInPlace(); 1645 for(int i=0; i<extension-1; i++){ 1646 if(bb.get(i)!=bases[loc+i+1-extension]){return 0;} 1647 } 1648 rightReplacement=bb.get(extension-1); 1649 } 1650 if(leftReplacement!=rightReplacement){return 0;} 1651 if(bases[loc]==leftReplacement){return 0;} 1652 if(!isSimilar(bases, a, leftReplacement, counts, kmer0)){return 0;} 1653 1654 bases[loc]=leftReplacement; 1655 assert(d==a+kbig+1); 1656 tables.regenerateCounts(bases, counts, a, kmer0); 1657 return 1; 1658 } 1659 correctSingleBaseRight(final int a, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final int errorExtension0, final Kmer kmer0)1660 private int correctSingleBaseRight(final int a, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, 1661 final IntList counts, final ByteBuilder bb, final int errorExtension0, final Kmer kmer0){ 1662 final byte leftReplacement; 1663 final int loc=a+kbig; 1664 final int errorExtension=Tools.min(errorExtension0, bases.length-loc); 1665 { 1666 bb.clear(); 1667 Kmer kmer=getKmer(bases, a, kmer0); 1668 if(kmer==null){return 0;} 1669 int extension=extendToRight2_inner(bb, null, rightBuffer, errorExtension, true, kmer); 1670 if(extension<errorExtension){return 0;} 1671 for(int i=1; i<extension; i++){ 1672 if(bb.get(i)!=bases[loc+i]){ 1673 return 0; 1674 } 1675 } 1676 leftReplacement=bb.get(0); 1677 } 1678 1679 if(bases[loc]==leftReplacement){return 0;} 1680 if(!isSimilar(bases, a, leftReplacement, counts, kmer0)){return 0;} 1681 1682 bases[loc]=leftReplacement; 1683 tables.regenerateCounts(bases, counts, a, kmer0); 1684 return 1; 1685 } 1686 isSimilar(byte[] bases, int a, byte newBase, IntList counts, final Kmer kmer0)1687 private final boolean isSimilar(byte[] bases, int a, byte newBase, IntList counts, final Kmer kmer0){ 1688 Kmer kmer=getKmer(bases, a, kmer0); 1689 if(kmer==null){ 1690 assert(false); //Should never happen 1691 return false; 1692 } 1693 kmer.addRight(newBase); 1694 int count=getCount(kmer); 1695 int aCount=counts.get(a); 1696 boolean similar=isSimilar(aCount, count); 1697 return similar; 1698 } 1699 1700 /*--------------------------------------------------------------*/ 1701 /*---------------- Inherited Abstract Methods ----------------*/ 1702 /*--------------------------------------------------------------*/ 1703 1704 @Override makeKhist()1705 final void makeKhist(){ 1706 tables.makeKhist(outHist, histColumns, histMax, histHeader, histZeros, true, smoothHist, gcHist, false, 0.01, 1, 1); 1707 } 1708 @Override dumpKmersAsText()1709 final void dumpKmersAsText(){ 1710 tables.dumpKmersAsBytes_MT(outKmers, minToDump, maxToDump, true, null); 1711 } 1712 1713 /*--------------------------------------------------------------*/ 1714 /*---------------- Fields ----------------*/ 1715 /*--------------------------------------------------------------*/ 1716 1717 @Override tables()1718 public final KmerTableSetU tables(){return tables;} 1719 public final KmerTableSetU tables; 1720 1721 /** Normal kmer length */ 1722 final int ksmall; 1723 1724 } 1725