1 package assemble; 2 3 import java.util.Arrays; 4 import java.util.concurrent.atomic.AtomicIntegerArray; 5 6 import dna.AminoAcid; 7 import kmer.AbstractKmerTableSet; 8 import shared.Tools; 9 import structures.ByteBuilder; 10 import ukmer.AbstractKmerTableU; 11 import ukmer.HashArrayU1D; 12 import ukmer.HashForestU; 13 import ukmer.Kmer; 14 import ukmer.KmerNodeU; 15 import ukmer.KmerTableSetU; 16 17 /** 18 * Designed for removal of dead ends (aka hairs). 19 * @author Brian Bushnell 20 * @date Jun 26, 2015 21 * 22 */ 23 public class Shaver2 extends Shaver { 24 25 26 /*--------------------------------------------------------------*/ 27 /*---------------- Constructor ----------------*/ 28 /*--------------------------------------------------------------*/ 29 30 Shaver2(KmerTableSetU tables_, int threads_)31 public Shaver2(KmerTableSetU tables_, int threads_){ 32 this(tables_, threads_, 1, 1, 1, 1, 3, 100, 100, true, true); 33 } 34 Shaver2(KmerTableSetU tables_, int threads_, int minCount_, int maxCount_, int minSeed_, int minCountExtend_, float branchMult2_, int maxLengthToDiscard_, int maxDistanceToExplore_, boolean removeHair_, boolean removeBubbles_)35 public Shaver2(KmerTableSetU tables_, int threads_, 36 int minCount_, int maxCount_, int minSeed_, int minCountExtend_, float branchMult2_, int maxLengthToDiscard_, int maxDistanceToExplore_, 37 boolean removeHair_, boolean removeBubbles_){ 38 super(tables_, threads_, minCount_, maxCount_, minSeed_, minCountExtend_, branchMult2_, maxLengthToDiscard_, maxDistanceToExplore_, removeHair_, removeBubbles_); 39 tables=tables_; 40 41 } 42 43 /*--------------------------------------------------------------*/ 44 /*---------------- Outer Methods ----------------*/ 45 /*--------------------------------------------------------------*/ 46 47 @Override makeExploreThread(int id_)48 final AbstractExploreThread makeExploreThread(int id_){return new ExploreThread(id_);} 49 @Override makeShaveThread(int id_)50 final AbstractShaveThread makeShaveThread(int id_){return new ShaveThread(id_);} 51 52 53 /*--------------------------------------------------------------*/ 54 /*---------------- Dead-End Removal ----------------*/ 55 /*--------------------------------------------------------------*/ 56 57 // private boolean valid(ByteBuilder bb, boolean doAssertion){ 58 // Kmer kmer=new Kmer(kbig); 59 // if(bb.length()<kbig){return false;} 60 // kmer.clear(); 61 // for(int i=0; i<bb.length; i++){ 62 // byte b=bb.array[i]; 63 // kmer.addRight(b); 64 // if(kmer.len()>=kbig){ 65 // int count=getCount(kmer); 66 // if(count<1){ 67 // assert(!doAssertion || false) : "count="+count+", minCount="+minCount+", maxCount="+maxCount+", kbig="+kbig+", kmer.kbig="+kmer.kbig+"\n" 68 // +"kmer="+kmer.toString()+"\nbb= "+bb.toString()+"\n"; 69 // kmer.clear(); 70 // return false; 71 // } 72 // } 73 // } 74 // kmer.clear(); 75 // return true; 76 // } 77 exploreAndMark(Kmer kmer, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int minCount, int maxCount, int maxLengthToDiscard, int maxDistanceToExplore, boolean prune, long[][] countMatrixT, long[][] removeMatrixT)78 public boolean exploreAndMark(Kmer kmer, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int minCount, int maxCount, 79 int maxLengthToDiscard, int maxDistanceToExplore, boolean prune, 80 long[][] countMatrixT, long[][] removeMatrixT){ 81 bb.clear(); 82 assert(kmer.len>=kmer.kbig); 83 if(findOwner(kmer)>STATUS_UNEXPLORED){return false;} 84 85 assert(countWithinLimits(kmer)) : "count="+getCount(kmer)+", minCount="+minCount+", maxCount="+maxCount+"\n"+kmer.toString(); 86 87 bb.appendKmer(kmer); 88 // assert(kmer.toString().equals(bb.toString())) : "\n"+kmer+"\n"+bb+"\n";//123 89 //assert(valid(bb, true)); 90 // assert(tables.getCount(kmer)==1) : tables.getCount(kmer);//count>0 && count<=maxCount 91 final int rightCode=explore(kmer, bb, leftCounts, rightCounts, minCount, maxCount, maxDistanceToExplore); 92 //assert(valid(bb, true)); 93 94 bb.reverseComplementInPlace(); 95 //assert(valid(bb, true)); 96 kmer=tables.rightmostKmer(bb, kmer); 97 assert(getCount(kmer)>0) : "count="+getCount(kmer)+", minCount="+minCount+", maxCount="+maxCount+", rightCode="+rightCode+"\n"+kmer.toString();//123 98 // assert(tables.getCount(kmer)==1) : tables.getCount(kmer);//count>0 && count<=maxCount 99 final int leftCode=explore(kmer, bb, leftCounts, rightCounts, minCount, maxCount, maxDistanceToExplore); 100 //assert(valid(bb, true)); 101 102 kmer=tables.rightmostKmer(bb, kmer);//123 103 assert(getCount(kmer)>0) : "count="+getCount(kmer)+", minCount="+minCount+", maxCount="+maxCount+", leftCode="+leftCode+", rightCode="+rightCode+"\n"+kmer.toString();//123 104 105 final int min=Tools.min(rightCode, leftCode); 106 final int max=Tools.max(rightCode, leftCode); 107 108 countMatrixT[min][max]++; 109 110 if(rightCode==TOO_LONG || rightCode==TOO_DEEP || rightCode==LOOP || rightCode==F_BRANCH){ 111 claim(bb, STATUS_EXPLORED, false, kmer); 112 return false; 113 } 114 115 if(leftCode==TOO_LONG || leftCode==TOO_DEEP || leftCode==LOOP || leftCode==F_BRANCH){ 116 claim(bb, STATUS_EXPLORED, false, kmer); 117 return false; 118 } 119 120 if(bb.length()-kbig>maxLengthToDiscard){ 121 claim(bb, STATUS_EXPLORED, false, kmer); 122 return false; 123 } 124 125 if(removeHair && min==DEAD_END){ 126 if(max==DEAD_END || max==B_BRANCH){ 127 removeMatrixT[min][max]++; 128 boolean success=claim(bb, STATUS_REMOVE, false, kmer); 129 if(verbose || verbose2){System.err.println("Claiming ("+rightCode+","+leftCode+") length "+bb.length()+": "+bb);} 130 assert(success); 131 return true; 132 } 133 } 134 135 if(removeBubbles){ 136 if(rightCode==B_BRANCH && leftCode==B_BRANCH){ 137 removeMatrixT[min][max]++; 138 boolean success=claim(bb, STATUS_REMOVE, false, kmer); 139 if(verbose || verbose2){System.err.println("Claiming ("+rightCode+","+leftCode+") length "+bb.length()+": "+bb);} 140 assert(success); 141 return true; 142 } 143 } 144 145 claim(bb, STATUS_EXPLORED, false, kmer); 146 return false; 147 } 148 149 /** Explores a single unbranching path in the forward (right) direction. 150 * @param kmer 151 * @param bb 152 * @param leftCounts 153 * @param rightCounts 154 * @param minCount 155 * @param maxCount 156 * @param maxLength0 157 * @return A termination code such as DEAD_END 158 */ explore(Kmer kmer, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int minCount, int maxCount, int maxLength0)159 public int explore(Kmer kmer, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int minCount, int maxCount, int maxLength0){ 160 if(verbose){outstream.println("Entering explore with bb.length()="+bb.length());} 161 if(bb.length()==0){bb.appendKmer(kmer);} 162 assert(bb.length()>=kmer.kbig && kmer.len>=kmer.kbig) : bb.length()+", "+kmer.len+", "+kmer.kbig; 163 //assert(valid(bb, true)); 164 165 final int initialLength=bb.length(); 166 final int maxLength=maxLength0+kbig; 167 168 final long firstKey=kmer.xor(); 169 HashArrayU1D table=tables.getTable(kmer); 170 int count=table.getValue(kmer); 171 // kmer.verify(false); 172 // kmer.verify(true); 173 assert(count>=minCount && count<=maxCount) : count+", "+Kmer.MASK_CORE+", "+kmer.verify(false)+", "+kmer.verify(true); 174 175 int nextRightMaxPos=fillRightCounts(kmer, rightCounts); 176 int nextRightMax=rightCounts[nextRightMaxPos]; 177 if(nextRightMax<minCount){ 178 if(verbose){outstream.println("Returning DEAD_END: rightMax="+nextRightMax);} 179 return DEAD_END; 180 } 181 182 while(bb.length()<=maxLength){ 183 184 final int rightMaxPos=nextRightMaxPos; 185 final int rightMax=rightCounts[rightMaxPos]; 186 final int rightSecondPos=Tools.secondHighestPosition(rightCounts); 187 final int rightSecond=rightCounts[rightSecondPos]; 188 189 if(verbose){ 190 outstream.println("kmer: "+toText(kmer)); 191 outstream.println("Right counts: "+count+", "+Arrays.toString(rightCounts)); 192 outstream.println("rightMaxPos="+rightMaxPos); 193 outstream.println("rightMax="+rightMax); 194 outstream.println("rightSecondPos="+rightSecondPos); 195 outstream.println("rightSecond="+rightSecond); 196 } 197 assert(count>0); 198 // assert(getCount(kmer)==count); //123 199 200 final int prevCount=count; 201 202 //Generate the new base 203 final byte b=AminoAcid.numberToBase[rightMaxPos]; 204 final long x=rightMaxPos; 205 long evicted=kmer.addRightNumeric(x); 206 207 // assert(getCount(kmer)==rightMax); //123 208 209 //Now consider the next kmer 210 if(kmer.xor()==firstKey){ 211 if(verbose){outstream.println("Returning LOOP");} 212 return LOOP; 213 } 214 table=tables.getTable(kmer); 215 216 assert(table.getValue(kmer)==rightMax || rightMax==0); 217 count=rightMax; 218 assert(count>0); 219 220 221 {//Fill right and look for dead end 222 nextRightMaxPos=fillRightCounts(kmer, rightCounts); 223 nextRightMax=rightCounts[nextRightMaxPos]; 224 if(nextRightMax<minCount){ 225 if(verbose){outstream.println("Returning DEAD_END: rightMax="+rightMax);} 226 return DEAD_END; 227 } 228 } 229 230 231 {//Look left 232 final int leftMaxPos=fillLeftCounts(kmer, leftCounts); 233 final int leftMax=leftCounts[leftMaxPos]; 234 final int leftSecondPos=Tools.secondHighestPosition(leftCounts); 235 final int leftSecond=leftCounts[leftSecondPos]; 236 237 // assert(leftMax==1 || leftMax==0) : prevCount+" -> "+Arrays.toString(leftCounts)+", "+count+", "+Arrays.toString(rightCounts); 238 239 if(verbose){ 240 outstream.println("Left counts: "+count+", "+Arrays.toString(leftCounts)); 241 outstream.println("leftMaxPos="+leftMaxPos); 242 outstream.println("leftMax="+leftMax); 243 outstream.println("leftSecondPos="+leftSecondPos); 244 outstream.println("leftSecond="+leftSecond); 245 } 246 247 if(leftSecond>=minCount || leftMax>prevCount){//Backward branch 248 // assert(leftSecond==1) : prevCount+" -> "+Arrays.toString(leftCounts)+", "+count+", "+Arrays.toString(rightCounts); 249 if(leftMax>prevCount){ 250 if(verbose){outstream.println("Returning B_BRANCH_LOWER: " + 251 "count="+count+", prevCount="+prevCount+", leftMax="+leftMax+", leftSecond="+leftSecond);} 252 return B_BRANCH; 253 }else{ 254 assert(leftMax==prevCount); 255 if(leftMax>=2*leftSecond){//This constant is adjustable 256 // assert(false) : prevCount+" -> "+Arrays.toString(leftCounts)+", "+count+", "+Arrays.toString(rightCounts); 257 //keep going 258 }else{ 259 if(verbose){outstream.println("Returning B_BRANCH_SIMILAR: " + 260 "count="+count+", prevCount="+prevCount+", leftMax="+leftMax+", leftSecond="+leftSecond);} 261 return B_BRANCH; 262 } 263 } 264 } 265 266 } 267 268 //Look right 269 if(rightSecond>=minCount){ 270 if(verbose){outstream.println("Returning F_BRANCH: rightSecond="+rightSecond);} 271 return F_BRANCH; 272 } 273 274 if(count>maxCount){ 275 if(verbose){outstream.println("Returning TOO_DEEP: rightMax="+rightMax);} 276 return TOO_DEEP; 277 } 278 279 bb.append(b); 280 // assert(valid(bb, false)); //123 281 // if(!valid(bb, false)){ 282 // System.err.println("kbig="+kbig+", "+kmer.kbig+"\nb="+ 283 // (char)b+"\n"+Arrays.toString(rightCounts)+"\ncount="+count+", "+getCount(kmer)+"\nrightMax="+rightMax+"\nrightMax="+rightMax); 284 // System.err.println("\nkmer="+kmer+"\nbb= "+bb); 285 // kmer.addLeftNumeric(evicted); 286 // System.err.println("prev="+kmer+"\nprevCount="+prevCount+", "+getCount(kmer)); 287 // valid(bb, true); 288 // } 289 if(verbose){outstream.println("Added base "+(char)b);} 290 } 291 292 assert(bb.length()>maxLength); 293 if(verbose){outstream.println("Returning TOO_LONG: length="+bb.length());} 294 return TOO_LONG; 295 296 } 297 298 /*--------------------------------------------------------------*/ 299 /*---------------- Inner Classes ----------------*/ 300 /*--------------------------------------------------------------*/ 301 302 /*--------------------------------------------------------------*/ 303 /*---------------- ExploreThread ----------------*/ 304 /*--------------------------------------------------------------*/ 305 306 /** 307 * Searches for dead ends. 308 */ 309 class ExploreThread extends AbstractExploreThread{ 310 311 /** 312 * Constructor 313 */ ExploreThread(int id_)314 public ExploreThread(int id_){ 315 super(id_, kbig); 316 } 317 318 @Override processNextTable(final Kmer kmer, Kmer temp)319 boolean processNextTable(final Kmer kmer, Kmer temp){ 320 final int tnum=nextTable.getAndAdd(1); 321 if(tnum>=tables.ways){return false;} 322 final HashArrayU1D table=tables.getTable(tnum); 323 final int[] counts=table.values(); 324 final int max=counts.length; 325 // final int max=table.arrayLength(); 326 if(startFromHighCounts){ 327 // for(int cell=0; cell<max; cell++){ 328 // int x=processCell_high(table, cell, kmer, temp); 329 // deadEndsFoundT+=x; 330 // } 331 for(int cell=0; cell<max; cell++){//Not noticeably faster 332 final int count=counts[cell]; 333 if(count>maxCount){ 334 int x=processCell_high(table, cell, kmer, temp, count); 335 deadEndsFoundT+=x; 336 } 337 } 338 }else{ 339 for(int cell=0; cell<max; cell++){ 340 int x=processCell_low(table, cell, kmer, temp); 341 deadEndsFoundT+=x; 342 } 343 } 344 return true; 345 } 346 347 @Override processNextVictims(final Kmer kmer, Kmer temp)348 boolean processNextVictims(final Kmer kmer, Kmer temp){ 349 final int tnum=nextVictims.getAndAdd(1); 350 if(tnum>=tables.ways){return false;} 351 final HashArrayU1D table=tables.getTable(tnum); 352 final HashForestU forest=table.victims(); 353 final int max=forest.arrayLength(); 354 if(startFromHighCounts){ 355 for(int cell=0; cell<max; cell++){ 356 KmerNodeU kn=forest.getNode(cell); 357 int x=traverseKmerNodeU_high(kn, kmer, temp); 358 deadEndsFoundT+=x; 359 } 360 }else{ 361 for(int cell=0; cell<max; cell++){ 362 KmerNodeU kn=forest.getNode(cell); 363 int x=traverseKmerNodeU_low(kn, kmer, temp); 364 deadEndsFoundT+=x; 365 } 366 } 367 return true; 368 } 369 traverseKmerNodeU_high(KmerNodeU kn, Kmer kmer, Kmer temp)370 private int traverseKmerNodeU_high(KmerNodeU kn, Kmer kmer, Kmer temp){ 371 int sum=0; 372 if(kn!=null){ 373 sum+=processKmerNodeU_high(kn, kmer, temp); 374 if(kn.left()!=null){ 375 sum+=traverseKmerNodeU_high(kn.left(), kmer, temp); 376 } 377 if(kn.right()!=null){ 378 sum+=traverseKmerNodeU_high(kn.right(), kmer, temp); 379 } 380 } 381 return sum; 382 } 383 traverseKmerNodeU_low(KmerNodeU kn, Kmer kmer, Kmer temp)384 private int traverseKmerNodeU_low(KmerNodeU kn, Kmer kmer, Kmer temp){ 385 int sum=0; 386 if(kn!=null){ 387 sum+=processKmerNodeU_low(kn, kmer, temp); 388 if(kn.left()!=null){ 389 sum+=traverseKmerNodeU_low(kn.left(), kmer, temp); 390 } 391 if(kn.right()!=null){ 392 sum+=traverseKmerNodeU_low(kn.right(), kmer, temp); 393 } 394 } 395 return sum; 396 } 397 398 /*--------------------------------------------------------------*/ 399 400 //old processCell_low(HashArrayU1D table, int cell, Kmer kmer0, Kmer temp)401 private int processCell_low(HashArrayU1D table, int cell, Kmer kmer0, Kmer temp){ 402 int count=table.readCellValue(cell); 403 if(count<minSeed || count>maxCount){return 0;} 404 int owner=table.getCellOwner(cell); 405 if(owner>STATUS_UNEXPLORED){return 0;} 406 Kmer kmer=table.fillKmer(cell, kmer0); 407 if(kmer==null){return 0;} 408 if(verbose){outstream.println("id="+id+" processing cell "+cell+"; \tkmer="+kmer);} 409 410 return processKmer_low(kmer, temp); 411 } 412 413 //old processKmerNodeU_low(KmerNodeU kn, Kmer kmer, Kmer temp)414 private int processKmerNodeU_low(KmerNodeU kn, Kmer kmer, Kmer temp){ 415 kmer.setFrom(kn.pivot()); 416 final int count=kn.getValue(kmer); 417 if(count<minSeed || count>maxCount){return 0;} 418 int owner=kn.getOwner(kmer); 419 if(owner>STATUS_UNEXPLORED){return 0;} 420 421 return processKmer_low(kmer, temp); 422 } 423 424 //old processKmer_low(Kmer original, Kmer temp)425 private int processKmer_low(Kmer original, Kmer temp){ 426 kmersTestedT++; 427 boolean b=exploreAndMark(original, builderT, leftCounts, rightCounts, minCount, maxCount, maxLengthToDiscard, maxDistanceToExplore, true 428 , countMatrixT, removeMatrixT 429 ); 430 return b ? 1 : 0; 431 } 432 433 434 435 /*--------------------------------------------------------------*/ 436 437 //new processCell_high(HashArrayU1D table, int cell, Kmer kmer0, Kmer temp, int count)438 private int processCell_high(HashArrayU1D table, int cell, Kmer kmer0, Kmer temp, int count){ 439 440 // private int processCell_high(HashArrayU1D table, int cell, Kmer kmer0, Kmer temp){ 441 // int count=table.readCellValue(cell); 442 443 if(count<=maxCount){return 0;} 444 // if(shaveFast && maxCount==1 && count>=6){return 0;} 445 // int owner=table.getCellOwner(cell); 446 // if(owner>STATUS_UNEXPLORED){return 0;} 447 Kmer kmer=table.fillKmer(cell, kmer0); 448 if(kmer==null){return 0;} 449 if(verbose){outstream.println("id="+id+" processing cell "+cell+"; \tkmer="+kmer);} 450 // assert(false) : shaveFast; 451 452 return processKmer_high(kmer, temp, count); 453 } 454 455 //new processKmerNodeU_high(KmerNodeU kn, Kmer kmer, Kmer temp)456 private int processKmerNodeU_high(KmerNodeU kn, Kmer kmer, Kmer temp){ 457 kmer.setFrom(kn.pivot()); 458 final int count=kn.getValue(kmer); 459 if(count<=maxCount){return 0;} 460 // if(shaveFast && maxCount==1 && count>=6){return 0;} 461 // int owner=kn.getOwner(kmer); 462 // if(owner>STATUS_UNEXPLORED){return 0;} 463 464 return processKmer_high(kmer, temp, count); 465 } 466 467 //new processKmer_high(final Kmer original, Kmer kmer, final int count0)468 private int processKmer_high(final Kmer original, Kmer kmer, final int count0){ 469 int sum=0; 470 471 // assert(false) : "This loop is broken and removes kmers with counts outside the range, including non-existing kmers."; 472 // for(long i=0; i<4; i++){ 473 // kmer.setFrom(original); 474 // long old=kmer.addRightNumeric(i); 475 // HashArrayU1D table=(HashArrayU1D) tables.getTable(kmer); 476 // int count=table.getCount(kmer); 477 // if(count>0 && count<=maxCount && tables.getOwner(kmer)<=STATUS_UNEXPLORED){ 478 // kmersTestedT++; 479 // boolean b=exploreAndMark(kmer, builderT, leftCounts, rightCounts, 480 // minCount, maxCount, maxLengthToDiscard, maxDistanceToExplore, true, 481 // countMatrixT, removeMatrixT); 482 // if(b){sum++;} 483 // } 484 // } 485 486 assert(original.len()==kbig); 487 488 //This loop appears to work correctly. 489 sum+=processKmer_high_leftLoop(original, kmer, count0); 490 491 original.rcomp(); 492 assert(original.len()==kbig); 493 sum+=processKmer_high_leftLoop(original, kmer, count0); 494 return sum; 495 } 496 processKmer_high_leftLoop(final Kmer original, Kmer kmer, final int count0)497 private int processKmer_high_leftLoop(final Kmer original, Kmer kmer, final int count0){ 498 if(shaveVFast){//Optional accelerator; only examines kmers that actually branch 499 int inf=Integer.MAX_VALUE; 500 int maxHighBranch=-1, minHighBranch=inf, highBranches=0; 501 int maxLowBranch=-1, minLowBranch=inf, lowBranches=0; 502 for(long i=0; i<4; i++){ 503 kmer.setFrom(original); 504 long old=kmer.addLeftNumeric(i); 505 assert(kmer.len()>=kbig) : kmer.len()+", "+kbig; 506 HashArrayU1D table=(HashArrayU1D) tables.getTable(kmer); 507 int count=table.getValue(kmer); 508 if(count>0){ 509 if(count<=maxCount){ 510 minLowBranch=Tools.min(count, minLowBranch); 511 maxLowBranch=Tools.max(count, maxLowBranch); 512 lowBranches++; 513 }else{ 514 minHighBranch=Tools.min(count, minHighBranch); 515 maxHighBranch=Tools.max(count, maxHighBranch); 516 highBranches++; 517 } 518 } 519 } 520 521 if(maxLowBranch<0){return 0;} //This is fine 522 // if(maxHighBranch<0){return 0;} //Speed increase but quality decrease 523 524 if(highBranches+lowBranches<2){return 0;} //Speed increase (25% for shave) but contiguity decrease (~1%). 525 // if(maxLowBranch>0 && maxLowBranch*16<minHighBranch){return 0;} //Speed increase but quality decrease 526 } 527 528 int sum=0; 529 for(long i=0; i<4; i++){ 530 kmer.setFrom(original); 531 long old=kmer.addLeftNumeric(i); 532 assert(kmer.len()>=kbig) : kmer.len()+", "+kbig; 533 HashArrayU1D table=(HashArrayU1D) tables.getTable(kmer); 534 int count=table.getValue(kmer); 535 if(count>0 && count<=maxCount && table.getOwner(kmer)<=STATUS_UNEXPLORED){ 536 kmersTestedT++; 537 boolean b=exploreAndMark(kmer, builderT, leftCounts, rightCounts, 538 minCount, maxCount, maxLengthToDiscard, maxDistanceToExplore, true, 539 countMatrixT, removeMatrixT); 540 if(b){sum++;} 541 } 542 } 543 return sum; 544 } 545 } 546 547 /*--------------------------------------------------------------*/ 548 /*---------------- ShaveThread ----------------*/ 549 /*--------------------------------------------------------------*/ 550 551 /** 552 * Removes dead-end kmers. 553 */ 554 private class ShaveThread extends AbstractShaveThread{ 555 556 /** 557 * Constructor 558 */ ShaveThread(int id_)559 public ShaveThread(int id_){ 560 super(id_); 561 } 562 563 @Override processNextTable()564 boolean processNextTable(){ 565 final int tnum=nextTable.getAndAdd(1); 566 if(tnum>=tables.ways){return false;} 567 // long x=0; 568 final HashArrayU1D table=tables.getTable(tnum); 569 final AtomicIntegerArray owners=table.owners(); 570 final int[] values=table.values(); 571 final int max=table.arrayLength(); 572 for(int cell=0; cell<max; cell++){ 573 if(owners.get(cell)==STATUS_REMOVE){ 574 // x++; 575 values[cell]=0; 576 } 577 } 578 for(KmerNodeU kn : table.victims().array()){ 579 if(kn!=null){traverseKmerNodeU(kn);} 580 } 581 582 table.clearOwnership(); 583 kmersRemovedT+=table.regenerate(0); 584 // outstream.println(x); 585 return true; 586 } 587 traverseKmerNodeU(KmerNodeU kn)588 private void traverseKmerNodeU(KmerNodeU kn){ 589 if(kn==null){return;} 590 if(kn.owner()==STATUS_REMOVE){kn.set(0);} 591 traverseKmerNodeU(kn.left()); 592 traverseKmerNodeU(kn.right()); 593 } 594 595 } 596 597 598 /*--------------------------------------------------------------*/ 599 /*---------------- Recall Methods ----------------*/ 600 /*--------------------------------------------------------------*/ 601 claim(final ByteBuilder bb, final int id, final boolean exitEarly, Kmer kmer)602 public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly, Kmer kmer){ 603 return claim(bb.array, bb.length(), id, exitEarly, kmer); 604 } 605 claim(final byte[] bases, final int blen, final int id, boolean exitEarly, final Kmer kmer)606 public boolean claim(final byte[] bases, final int blen, final int id, boolean exitEarly, final Kmer kmer){ 607 if(blen<kbig){return false;} 608 if(verbose){outstream.println("Thread "+id+" claim start.");} 609 int len=0; 610 kmer.clear(); 611 boolean success=true; 612 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 613 for(int i=0; i<blen && success; i++){ 614 final byte b=bases[i]; 615 final long x=AminoAcid.baseToNumber[b]; 616 kmer.addRight(b); 617 618 if(x<0){len=0;} 619 else{len++;} 620 assert(len==kmer.len); 621 622 if(len>=kbig){ 623 assert(countWithinLimits(kmer)) : "count="+getCount(kmer)+", minCount="+minCount+", maxCount="+maxCount+"\n" 624 + "len="+len+", i="+i+", blen="+blen+"\n" 625 + ""+kmer.toString(); 626 success=claim(kmer, id/*, rid, i*/); 627 success=(success || !exitEarly); 628 } 629 } 630 return success; 631 } 632 countWithinLimits(Kmer kmer)633 final boolean countWithinLimits(Kmer kmer){ 634 int count=getCount(kmer); 635 return count>=minCount && count<=maxCount; 636 } 637 getCount(Kmer kmer)638 int getCount(Kmer kmer){return tables.getCount(kmer);} claim(Kmer kmer, int id)639 boolean claim(Kmer kmer, int id){return tables.claim(kmer, id);} doubleClaim(ByteBuilder bb, int id , Kmer kmer)640 boolean doubleClaim(ByteBuilder bb, int id/*, long rid*/, Kmer kmer){return tables.doubleClaim(bb, id, kmer/*, rid*/);} 641 // boolean claim(ByteBuilder bb, int id, /*long rid, */boolean earlyExit, Kmer kmer){return tables.claim(bb, id/*, rid*/, earlyExit, kmer);} 642 // boolean claim(byte[] array, int len, int id, /*long rid, */boolean earlyExit, Kmer kmer){return tables.claim(array, len, id/*, rid*/, earlyExit, kmer);} findOwner(Kmer kmer)643 int findOwner(Kmer kmer){return tables.findOwner(kmer);} findOwner(ByteBuilder bb, int id, Kmer kmer)644 int findOwner(ByteBuilder bb, int id, Kmer kmer){return tables.findOwner(bb, id, kmer);} findOwner(byte[] array, int len, int id, Kmer kmer)645 int findOwner(byte[] array, int len, int id, Kmer kmer){return tables.findOwner(array, len, id, kmer);} release(ByteBuilder bb, int id, Kmer kmer)646 void release(ByteBuilder bb, int id, Kmer kmer){tables.release(bb, id, kmer);} release(byte[] array, int len, int id, Kmer kmer)647 void release(byte[] array, int len, int id, Kmer kmer){tables.release(array, len, id, kmer);} fillRightCounts(Kmer kmer, int[] counts)648 int fillRightCounts(Kmer kmer, int[] counts){return tables.fillRightCounts(kmer, counts);} fillLeftCounts(Kmer kmer, int[] counts)649 int fillLeftCounts(Kmer kmer, int[] counts){return tables.fillLeftCounts(kmer, counts);} toText(Kmer kmer)650 static StringBuilder toText(Kmer kmer){return AbstractKmerTableU.toText(kmer);} 651 652 /*--------------------------------------------------------------*/ 653 /*---------------- Fields ----------------*/ 654 /*--------------------------------------------------------------*/ 655 656 /*--------------------------------------------------------------*/ 657 /*---------------- Final Fields ----------------*/ 658 /*--------------------------------------------------------------*/ 659 660 @Override tables()661 AbstractKmerTableSet tables(){return tables;} 662 663 final KmerTableSetU tables; 664 665 } 666