1 package ukmer; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.BitSet; 6 import java.util.concurrent.atomic.AtomicLong; 7 8 import assemble.Contig; 9 import bloom.KmerCountAbstract; 10 import dna.AminoAcid; 11 import fileIO.ByteStreamWriter; 12 import fileIO.FileFormat; 13 import fileIO.ReadWrite; 14 import jgi.BBMerge; 15 import kmer.AbstractKmerTableSet; 16 import kmer.DumpThread; 17 import kmer.ScheduleMaker; 18 import shared.Parse; 19 import shared.Parser; 20 import shared.PreParser; 21 import shared.Primes; 22 import shared.ReadStats; 23 import shared.Shared; 24 import shared.Timer; 25 import shared.Tools; 26 import shared.TrimRead; 27 import stream.ConcurrentReadInputStream; 28 import stream.FastaReadInputStream; 29 import stream.Read; 30 import structures.ByteBuilder; 31 import structures.IntList; 32 import structures.ListNum; 33 34 35 /** 36 * Loads and holds kmers for Tadpole2/KmerCountExact 37 * @author Brian Bushnell 38 * @date Jun 22, 2015 39 * 40 */ 41 public class KmerTableSetU extends AbstractKmerTableSet { 42 43 /** 44 * Code entrance from the command line. 45 * @param args Command line arguments 46 */ main(String[] args)47 public static void main(String[] args){ 48 49 Timer t=new Timer(), t2=new Timer(); 50 t.start(); 51 t2.start(); 52 53 //Create a new CountKmersExact instance 54 KmerTableSetU set=new KmerTableSetU(args); 55 t2.stop(); 56 outstream.println("Initialization Time: \t"+t2); 57 58 ///And run it 59 set.process(t); 60 61 //Close the print stream if it was redirected 62 Shared.closeStream(outstream); 63 } 64 65 /** 66 * Constructor. 67 * @param args Command line arguments 68 */ KmerTableSetU(String[] args)69 private KmerTableSetU(String[] args){ 70 this(args, 0);//+5 if using ownership and building contigs 71 } 72 73 74 /** 75 * Constructor. 76 * @param args Command line arguments 77 */ KmerTableSetU(String[] args, int extraBytesPerKmer_)78 public KmerTableSetU(String[] args, int extraBytesPerKmer_){ 79 80 {//Preparse block for help, config files, and outstream 81 PreParser pp=new PreParser(args, getClass(), false); 82 args=pp.args; 83 outstream=pp.outstream; 84 }//TODO - no easy way to close outstream 85 86 /* Initialize local variables with defaults */ 87 Parser parser=new Parser(); 88 boolean prealloc_=false; 89 int kbig_=62; 90 int ways_=-1; 91 int filterMax_=2; 92 boolean ecco_=false, merge_=false; 93 boolean rcomp_=true; 94 double minProb_=defaultMinprob; 95 96 /* Parse arguments */ 97 for(int i=0; i<args.length; i++){ 98 99 final String arg=args[i]; 100 String[] split=arg.split("="); 101 String a=split[0].toLowerCase(); 102 String b=split.length>1 ? split[1] : null; 103 104 if(Parser.parseCommonStatic(arg, a, b)){ 105 //do nothing 106 }else if(Parser.parseZip(arg, a, b)){ 107 //do nothing 108 }else if(Parser.parseQuality(arg, a, b)){ 109 //do nothing 110 }else if(Parser.parseFasta(arg, a, b)){ 111 //do nothing 112 }else if(parser.parseInterleaved(arg, a, b)){ 113 //do nothing 114 }else if(parser.parseTrim(arg, a, b)){ 115 //do nothing 116 }else if(a.equals("in") || a.equals("in1")){ 117 in1.clear(); 118 if(b!=null){ 119 String[] s=b.split(","); 120 for(String ss : s){ 121 in1.add(ss); 122 } 123 } 124 }else if(a.equals("in2")){ 125 in2.clear(); 126 if(b!=null){ 127 String[] s=b.split(","); 128 for(String ss : s){ 129 in2.add(ss); 130 } 131 } 132 }else if(a.equals("extra")){ 133 extra.clear(); 134 if(b!=null){ 135 String[] s=b.split(","); 136 for(String ss : s){ 137 extra.add(ss); 138 } 139 } 140 }else if(a.equals("append") || a.equals("app")){ 141 append=ReadStats.append=Parse.parseBoolean(b); 142 }else if(a.equals("overwrite") || a.equals("ow")){ 143 overwrite=Parse.parseBoolean(b); 144 }else if(a.equals("initialsize")){ 145 initialSize=Parse.parseIntKMG(b); 146 }else if(a.equals("showstats") || a.equals("stats")){ 147 showStats=Parse.parseBoolean(b); 148 }else if(a.equals("ways")){ 149 ways_=Parse.parseIntKMG(b); 150 }else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){ 151 buflen=Parse.parseIntKMG(b); 152 }else if(a.equals("k")){ 153 assert(b!=null) : "\nk needs an integer value such as k=50. Default is 62.\n"; 154 kbig_=Parse.parseIntKMG(b); 155 }else if(a.equals("threads") || a.equals("t")){ 156 THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b)); 157 }else if(a.equals("showspeed") || a.equals("ss")){ 158 showSpeed=Parse.parseBoolean(b); 159 }else if(a.equals("ecco")){ 160 ecco_=Parse.parseBoolean(b); 161 }else if(a.equals("merge")){ 162 merge_=Parse.parseBoolean(b); 163 }else if(a.equals("verbose")){ 164 // assert(false) : "Verbose flag is currently static final; must be recompiled to change."; 165 verbose=Parse.parseBoolean(b); 166 }else if(a.equals("verbose2")){ 167 // assert(false) : "Verbose flag is currently static final; must be recompiled to change."; 168 verbose2=Parse.parseBoolean(b); 169 }else if(a.equals("minprob")){ 170 minProb_=Double.parseDouble(b); 171 }else if(a.equals("minprobprefilter") || a.equals("mpp")){ 172 minProbPrefilter=Parse.parseBoolean(b); 173 }else if(a.equals("minprobmain") || a.equals("mpm")){ 174 minProbMain=Parse.parseBoolean(b); 175 }else if(a.equals("reads") || a.startsWith("maxreads")){ 176 maxReads=Parse.parseKMG(b); 177 }else if(a.equals("prealloc") || a.equals("preallocate")){ 178 if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){ 179 prealloc_=Parse.parseBoolean(b); 180 }else{ 181 preallocFraction=Tools.max(0, Double.parseDouble(b)); 182 prealloc_=(preallocFraction>0); 183 } 184 }else if(a.equals("prefilter")){ 185 if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){ 186 prefilter=Parse.parseBoolean(b); 187 }else{ 188 filterMax_=Parse.parseIntKMG(b); 189 prefilter=filterMax_>0; 190 } 191 }else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){ 192 prefilterFraction=Tools.max(0, Double.parseDouble(b)); 193 assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory."; 194 prefilter=prefilterFraction>0; 195 }else if(a.equals("prehashes") || a.equals("hashes")){ 196 prehashes=Parse.parseIntKMG(b); 197 }else if(a.equals("prefilterpasses") || a.equals("prepasses")){ 198 assert(b!=null) : "Bad parameter: "+arg; 199 if(b.equalsIgnoreCase("auto")){ 200 prepasses=-1; 201 }else{ 202 prepasses=Parse.parseIntKMG(b); 203 } 204 }else if(a.equals("onepass")){ 205 onePass=Parse.parseBoolean(b); 206 }else if(a.equals("passes")){ 207 int passes=Parse.parseIntKMG(b); 208 onePass=(passes<2); 209 }else if(a.equals("rcomp")){ 210 rcomp_=Parse.parseBoolean(b); 211 } 212 213 else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") || 214 a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){ 215 filterMemoryOverride=Parse.parseKMG(b); 216 } 217 218 else if(IGNORE_UNKNOWN_ARGS){ 219 //Do nothing 220 }else{ 221 throw new RuntimeException("Unknown parameter "+args[i]); 222 } 223 } 224 225 {//Process parser fields 226 Parser.processQuality(); 227 228 qtrimLeft=parser.qtrimLeft; 229 qtrimRight=parser.qtrimRight; 230 trimq=parser.trimq; 231 trimE=parser.trimE(); 232 233 minAvgQuality=parser.minAvgQuality; 234 minAvgQualityBases=parser.minAvgQualityBases; 235 } 236 237 if(prepasses==0 || !prefilter){ 238 prepasses=0; 239 prefilter=false; 240 } 241 242 { 243 long memory=Runtime.getRuntime().maxMemory(); 244 double xmsRatio=Shared.xmsRatio(); 245 // long tmemory=Runtime.getRuntime().totalMemory(); 246 usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45); 247 if(prepasses==0 || !prefilter){ 248 filterMemory0=filterMemory1=0; 249 }else if(filterMemoryOverride>0){ 250 filterMemory0=filterMemory1=filterMemoryOverride; 251 }else{ 252 double low=Tools.min(prefilterFraction, 1-prefilterFraction); 253 double high=1-low; 254 if(prepasses<0 || (prepasses&1)==1){//odd passes 255 filterMemory0=(long)(usableMemory*low); 256 filterMemory1=(long)(usableMemory*high); 257 }else{//even passes 258 filterMemory0=(long)(usableMemory*high); 259 filterMemory1=(long)(usableMemory*low); 260 } 261 } 262 tableMemory=(long)(usableMemory*.95-filterMemory0); 263 } 264 265 mult=Kmer.getMult(kbig_); 266 k=Kmer.getK(kbig_); 267 kbig=k*mult; 268 kbig2=kbig-1; 269 assert(k<=31); 270 271 prealloc=prealloc_; 272 bytesPerKmer=4+8*mult+extraBytesPerKmer_; 273 assert(bytesPerKmer>=4+8*mult) : bytesPerKmer+", "+mult+", "+k+", "+kbig+", "+(4+8*mult)+", "+extraBytesPerKmer_; 274 if(ways_<1){ 275 long maxKmers=(2*tableMemory)/bytesPerKmer; 276 long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE); 277 ways_=(int)Tools.max(31, (int)(THREADS*2.5), minWays); 278 ways_=(int)Primes.primeAtLeast(ways_); 279 assert(ways_>0); 280 // System.err.println("ways="+ways_); 281 } 282 // assert(false) : extraBytesPerKmer_+bytesPerKmer+", "+mult+", "+k+", "+kbig+", "+(4+8*mult)+", "+ways_; 283 284 /* Set final variables; post-process and validate argument combinations */ 285 286 onePass=onePass&prefilter; 287 ways=ways_; 288 filterMax=Tools.min(filterMax_, 0x7FFFFFFF); 289 ecco=ecco_; 290 merge=merge_; 291 minProb=(float)minProb_; 292 rcomp=rcomp_; 293 Kmer.rcomp=rcomp; 294 // System.err.println("rcomp="+rcomp); 295 estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArrayU.maxLoadFactorFinal*0.97)); 296 KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0); 297 298 if(kbig!=kbig_){ 299 System.err.println("K was changed from "+kbig_+" to "+kbig); 300 } 301 302 if(k<1){throw new RuntimeException("\nk needs an integer value above 0, such as k=27. Default is 62.\n");} 303 304 if(initialSize<1){ 305 final long memOverWays=tableMemory/(bytesPerKmer*ways); 306 final double mem2=(prealloc ? preallocFraction : 1)*tableMemory; 307 initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault); 308 if(initialSize!=initialSizeDefault){ 309 System.err.println("Initial size set to "+initialSize); 310 } 311 } 312 313 /* Adjust I/O settings and filenames */ 314 315 assert(FastaReadInputStream.settingsOK()); 316 317 if(in1.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} 318 319 for(int i=0; i<in1.size(); i++){ 320 String s=in1.get(i); 321 if(s!=null && s.contains("#") && !new File(s).exists()){ 322 int pound=s.lastIndexOf('#'); 323 String a=s.substring(0, pound); 324 String b=s.substring(pound+1); 325 in1.set(i, a+1+b); 326 in2.add(a+2+b); 327 } 328 } 329 330 if(!extra.isEmpty()){ 331 ArrayList<String> temp=(ArrayList<String>) extra.clone(); 332 extra.clear(); 333 for(String s : temp){ 334 if(s!=null && s.contains("#") && !new File(s).exists()){ 335 int pound=s.lastIndexOf('#'); 336 String a=s.substring(0, pound); 337 String b=s.substring(pound+1); 338 extra.add(a); 339 extra.add(b); 340 }else{ 341 extra.add(s); 342 } 343 } 344 } 345 346 { 347 boolean allowDuplicates=true; 348 if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){ 349 throw new RuntimeException("\nCan't read some input files.\n"); 350 } 351 } 352 assert(THREADS>0); 353 354 if(DISPLAY_PROGRESS){ 355 outstream.println("Initial:"); 356 outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f")); 357 Shared.printMemory(); 358 outstream.println(); 359 } 360 } 361 362 /*--------------------------------------------------------------*/ 363 /*---------------- Outer Methods ----------------*/ 364 /*--------------------------------------------------------------*/ 365 366 @Override clear()367 public void clear(){ 368 tables=null; 369 } 370 371 /*--------------------------------------------------------------*/ 372 /*---------------- Inner Methods ----------------*/ 373 /*--------------------------------------------------------------*/ 374 375 @Override allocateTables()376 protected void allocateTables(){ 377 assert(tables==null); 378 tables=null; 379 final int tableType=AbstractKmerTableU.ARRAY1D; 380 381 ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc, 382 (prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride); 383 int[] schedule=scheduleMaker.makeSchedule(); 384 tables=AbstractKmerTableU.preallocate(ways, tableType, schedule, k, kbig); 385 } 386 387 /** 388 * Load reads into tables, using multiple LoadThread. 389 */ 390 @Override loadKmers(String fname1, String fname2)391 public long loadKmers(String fname1, String fname2){ 392 393 /* Create read input stream */ 394 final ConcurrentReadInputStream cris; 395 { 396 FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true); 397 FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true); 398 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2); 399 cris.start(); //4567 400 } 401 402 /* Create ProcessThreads */ 403 ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS); 404 for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));} 405 for(LoadThread pt : alpt){pt.start();} 406 407 long added=0; 408 409 /* Wait for threads to die, and gather statistics */ 410 for(LoadThread pt : alpt){ 411 while(pt.getState()!=Thread.State.TERMINATED){ 412 try { 413 pt.join(); 414 } catch (InterruptedException e) { 415 // TODO Auto-generated catch block 416 e.printStackTrace(); 417 } 418 } 419 added+=pt.added; 420 421 readsIn+=pt.readsInT; 422 basesIn+=pt.basesInT; 423 lowqReads+=pt.lowqReadsT; 424 lowqBases+=pt.lowqBasesT; 425 readsTrimmed+=pt.readsTrimmedT; 426 basesTrimmed+=pt.basesTrimmedT; 427 kmersIn+=pt.kmersInT; 428 } 429 430 /* Shut down I/O streams; capture error status */ 431 errorState|=ReadWrite.closeStreams(cris); 432 return added; 433 } 434 435 /*--------------------------------------------------------------*/ 436 /*---------------- Inner Classes ----------------*/ 437 /*--------------------------------------------------------------*/ 438 439 /** 440 * Loads kmers. 441 */ 442 private class LoadThread extends Thread{ 443 444 /** 445 * Constructor 446 * @param cris_ Read input stream 447 */ LoadThread(ConcurrentReadInputStream cris_)448 public LoadThread(ConcurrentReadInputStream cris_){ 449 cris=cris_; 450 table=new HashBufferU(tables, buflen, kbig, false); 451 kmer=new Kmer(k, mult); 452 } 453 454 @Override run()455 public void run(){ 456 ListNum<Read> ln=cris.nextList(); 457 ArrayList<Read> reads=(ln!=null ? ln.list : null); 458 459 //While there are more reads lists... 460 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 461 462 //For each read (or pair) in the list... 463 for(int i=0; i<reads.size(); i++){ 464 Read r1=reads.get(i); 465 Read r2=r1.mate; 466 467 if(!r1.validated()){r1.validate(true);} 468 if(r2!=null && !r2.validated()){r2.validate(true);} 469 470 if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));} 471 472 readsInT++; 473 basesInT+=r1.length(); 474 if(r2!=null){ 475 readsInT++; 476 basesInT+=r2.length(); 477 } 478 479 //Determine whether to discard the reads based on average quality 480 if(minAvgQuality>0){ 481 if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);} 482 if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);} 483 } 484 485 if(r1!=null){ 486 if(qtrimLeft || qtrimRight){ 487 int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1); 488 basesTrimmedT+=x; 489 readsTrimmedT+=(x>0 ? 1 : 0); 490 } 491 if(r1.length()<kbig){r1.setDiscarded(true);} 492 } 493 if(r2!=null){ 494 if(qtrimLeft || qtrimRight){ 495 int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1); 496 basesTrimmedT+=x; 497 readsTrimmedT+=(x>0 ? 1 : 0); 498 } 499 if(r2.length()<kbig){r2.setDiscarded(true);} 500 } 501 502 if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){ 503 if(merge){ 504 final int insert=BBMerge.findOverlapStrict(r1, r2, false); 505 if(insert>0){ 506 r2.reverseComplement(); 507 r1=r1.joinRead(insert); 508 r2=null; 509 } 510 }else if(ecco){ 511 BBMerge.findOverlapStrict(r1, r2, true); 512 } 513 } 514 515 if(r1!=null){ 516 if(r1.discarded()){ 517 lowqBasesT+=r1.length(); 518 lowqReadsT++; 519 }else{ 520 long temp=addKmersToTable(r1, kmer); 521 added+=temp; 522 if(verbose){System.err.println("A: Added "+temp);} 523 } 524 } 525 if(r2!=null){ 526 if(r2.discarded()){ 527 lowqBasesT+=r2.length(); 528 lowqReadsT++; 529 }else{ 530 long temp=addKmersToTable(r2, kmer); 531 added+=temp; 532 if(verbose){System.err.println("B: Added "+temp);} 533 } 534 } 535 } 536 537 //Fetch a new read list 538 cris.returnList(ln); 539 ln=cris.nextList(); 540 reads=(ln!=null ? ln.list : null); 541 } 542 cris.returnList(ln); 543 long temp=table.flush(); 544 if(verbose){System.err.println("Flush: Added "+temp);} 545 added+=temp; 546 } 547 548 addKmersToTable(final Read r, Kmer kmer)549 private final int addKmersToTable(final Read r, Kmer kmer){ 550 if(onePass){return addKmersToTable_onePass(r, kmer);} 551 if(r==null || r.bases==null){return 0;} 552 final float minProb2=(minProbMain ? minProb : 0); 553 final byte[] bases=r.bases; 554 final byte[] quals=r.quality; 555 int created=0; 556 int len=0; 557 558 if(bases==null || bases.length<kbig){return -1;} 559 kmer.clear(); 560 561 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 562 float prob=1; 563 for(int i=0; i<bases.length; i++){ 564 final byte b=bases[i]; 565 final long x=AminoAcid.baseToNumber[b]; 566 // assert(x>=0) : ((char)b)+", "+x+", "+new String(bases); 567 568 //Update kmers 569 // kmer.addRight(b); 570 kmer.addRightNumeric(x); 571 572 if(minProb2>0 && quals!=null){//Update probability 573 prob=prob*PROB_CORRECT[quals[i]]; 574 if(len>kbig){ 575 byte oldq=quals[i-kbig]; 576 prob=prob*PROB_CORRECT_INVERSE[oldq]; 577 } 578 } 579 580 //Handle Ns 581 if(x<0){ 582 len=0; 583 prob=1; 584 }else{len++;} 585 586 assert(len==kmer.len); 587 588 // if(verbose){System.err.println("A: Scanning i="+i+", len="+len+", kmer="+kmer.toString()+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));} 589 if(len>=kbig && prob>=minProb2){ 590 kmersInT++; 591 // System.err.println("kmer="+kmer+"; xor()="+kmer.xor()+"; filterMax2="+filterMax2+"; prefilter="+prefilter); 592 // System.err.println("prefilterArray.read(xor.key())="+prefilterArray.read(kmer.xor())+""); 593 // System.err.println("prefilterArray.read(kmer.key())="+prefilterArray.read(kmer.key())+""); 594 if(!prefilter || prefilterArray.read(kmer.xor())>filterMax2){ 595 int temp=table.incrementAndReturnNumCreated(kmer); 596 // System.err.println("kmer="+kmer+"; xor()="+kmer.xor()+"; temp="+temp+" "); 597 created+=temp; 598 if(verbose){System.err.println("C: Added "+temp);} 599 } 600 } 601 } 602 603 return created; 604 } 605 606 addKmersToTable_onePass(final Read r, Kmer kmer)607 private final int addKmersToTable_onePass(final Read r, Kmer kmer){ 608 assert(prefilter); 609 if(r==null || r.bases==null){return 0;} 610 final byte[] bases=r.bases; 611 final byte[] quals=r.quality; 612 int created=0; 613 int len=0; 614 615 if(bases==null || bases.length<kbig){return -1;} 616 kmer.clear(); 617 618 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 619 float prob=1; 620 for(int i=0; i<bases.length; i++){ 621 final byte b=bases[i]; 622 final long x=AminoAcid.baseToNumber[b]; 623 624 //Update kmers 625 kmer.addRight(b); 626 627 if(minProb>0 && quals!=null){//Update probability 628 prob=prob*PROB_CORRECT[quals[i]]; 629 if(len>kbig){ 630 byte oldq=quals[i-kbig]; 631 prob=prob*PROB_CORRECT_INVERSE[oldq]; 632 } 633 } 634 635 //Handle Ns 636 if(x<0){ 637 len=0; 638 prob=1; 639 }else{len++;} 640 641 assert(len==kmer.len); 642 643 if(verbose){System.err.println("B: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));} 644 if(len>=kbig && prob>=minProb){ 645 final long xor=kmer.xor(); 646 int count=prefilterArray.incrementAndReturnUnincremented(xor, 1); 647 if(count>=filterMax2){ 648 int temp=table.incrementAndReturnNumCreated(kmer); 649 created+=temp; 650 if(verbose){System.err.println("D: Added "+temp);} 651 } 652 } 653 } 654 return created; 655 } 656 657 /*--------------------------------------------------------------*/ 658 659 /** Input read stream */ 660 private final ConcurrentReadInputStream cris; 661 662 private final HashBufferU table; 663 664 public long added=0; 665 666 private long readsInT=0; 667 private long basesInT=0; 668 private long lowqReadsT=0; 669 private long lowqBasesT=0; 670 private long readsTrimmedT=0; 671 private long basesTrimmedT=0; 672 private long kmersInT=0; 673 private final Kmer kmer; 674 675 } 676 677 678 /*--------------------------------------------------------------*/ 679 /*---------------- Convenience ----------------*/ 680 /*--------------------------------------------------------------*/ 681 regenerateCounts(byte[] bases, IntList counts, final int ca, final Kmer kmer)682 public int regenerateCounts(byte[] bases, IntList counts, final int ca, final Kmer kmer){ 683 final int b=ca+kbig; //first base changed 684 final int lim=Tools.min(counts.size, ca+kbig+1); //count limit 685 // System.err.println("ca="+ca+", b="+b+", lim="+lim); 686 // System.err.println("Regen from count "+(ca+1)+"-"+lim); 687 int len=0; 688 int valid=0; 689 kmer.clear(); 690 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts); 691 692 //Generate initial kmer 693 for(int i=b-kbig; i<b; i++){ 694 final byte base=bases[i]; 695 final long x=AminoAcid.baseToNumber[base]; 696 697 kmer.addRight(base); 698 699 if(x<0){ 700 len=0; 701 }else{len++;} 702 assert(len==kmer.len); 703 } 704 assert(len==kbig || Tools.indexOf(bases, (byte)'N')>=ca) : new String(bases)+"\n"+ca+", "+len; 705 706 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts. 707 * i is an index in the base array, j is an index in the count array. */ 708 for(int i=b, j=ca+1; j<lim; i++, j++){ 709 final byte base=bases[i]; 710 final long x=AminoAcid.baseToNumber[base]; 711 712 kmer.addRight(base); 713 714 if(x<0){len=0;} 715 else{len++;} 716 assert(len==kmer.len); 717 718 if(len>=kbig){ 719 valid++; 720 int count=getCount(kmer); 721 counts.set(j, count); 722 }else{ 723 counts.set(j, 0); 724 } 725 } 726 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts); 727 return valid; 728 } 729 730 @Override regenerateCounts(byte[] bases, IntList counts, final Kmer kmer, BitSet changed)731 public int regenerateCounts(byte[] bases, IntList counts, final Kmer kmer, BitSet changed){ 732 assert(!changed.isEmpty()); 733 final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1; 734 final int ca=firstBase-kbig; 735 // final int b=changed.nextSetBit(0);ca+kbig; //first base changed 736 final int firstCount=Tools.max(firstBase-kbig+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit 737 // System.err.println("ca="+ca+", b="+b+", lim="+lim); 738 // System.err.println("Regen from count "+(ca+1)+"-"+lim); 739 int len=0; 740 int valid=0; 741 kmer.clear(); 742 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts); 743 744 //Generate initial kmer 745 for(int i=Tools.max(0, firstBase-kbig+1); i<firstBase; i++){ 746 final byte base=bases[i]; 747 final long x=AminoAcid.baseToNumber[base]; 748 749 kmer.addRight(base); 750 751 if(x<0){ 752 len=0; 753 }else{len++;} 754 assert(len==kmer.len); 755 } 756 assert(len==kbig-1 || Tools.indexOf(bases, (byte)'N')>=0 || firstBase<kbig) : 757 new String(bases)+"\n"+ca+", "+len+", "+firstBase; 758 759 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts. 760 * i is an index in the base array, j is an index in the count array. */ 761 for(int i=firstBase, lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){ 762 final byte base=bases[i]; 763 final long x=AminoAcid.baseToNumber[base]; 764 765 kmer.addRight(base); 766 767 if(x<0){len=0;} 768 else{len++;} 769 assert(len==kmer.len); 770 771 final int c=i-kbig+1; 772 if(i>=firstBase){ 773 if(len>=kbig){ 774 valid++; 775 int count=getCount(kmer); 776 counts.set(c, count); 777 }else if(c>=0){ 778 counts.set(c, 0); 779 } 780 } 781 } 782 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts); 783 return valid; 784 } 785 786 @Override fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer kmer)787 public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer kmer){ 788 counts.clear(); 789 790 { 791 Kmer x=leftmostKmer(bases, bases.length, kmer); 792 assert((x!=null)==(kmer.len==kbig)); 793 } 794 int len=kmer.len; 795 int valid=0; 796 if(len>=kbig){ 797 valid++; 798 int count=getCount(kmer); 799 counts.add(count); 800 }else{ 801 counts.add(0); 802 } 803 assert(kmer.len==len); 804 assert(len<=kbig) : len+", "+kbig; 805 806 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 807 for(int i=kbig, j=0; i<bases.length; i++, j++){ 808 final byte base=bases[i]; 809 final long x=AminoAcid.baseToNumber[base]; 810 811 kmer.addRight(base); 812 813 if(x<0){len=0;} 814 else{len++;} 815 assert(len==kmer.len); 816 817 if(len>=kbig && (positions==null || positions.get(j))){ 818 valid++; 819 int count=getCount(kmer); 820 counts.add(Tools.max(count, 0)); 821 }else{ 822 counts.add(0); 823 } 824 } 825 return valid; 826 } 827 828 829 /*--------------------------------------------------------------*/ 830 /*---------------- Helper Methods ----------------*/ 831 /*--------------------------------------------------------------*/ 832 833 @Override regenerate(final int limit)834 public long regenerate(final int limit){ 835 long sum=0; 836 for(AbstractKmerTableU akt : tables){ 837 sum+=akt.regenerate(limit); 838 } 839 return sum; 840 } 841 getTable(Kmer kmer)842 public HashArrayU1D getTable(Kmer kmer){ 843 return (HashArrayU1D) tables[kmer.mod(ways)]; 844 } 845 846 @Override getTable(int tnum)847 public HashArrayU1D getTable(int tnum){ 848 return (HashArrayU1D) tables[tnum]; 849 } 850 851 @Override fillHistogram(int histMax)852 public long[] fillHistogram(int histMax) { 853 return HistogramMakerU.fillHistogram(tables, histMax); 854 } 855 856 @Override countGC(long[] gcCounts, int max)857 public void countGC(long[] gcCounts, int max) { 858 for(AbstractKmerTableU set : tables){ 859 set.countGC(gcCounts, max); 860 } 861 } 862 863 @Override initializeOwnership()864 public void initializeOwnership(){ 865 OwnershipThread.initialize(tables); 866 } 867 868 @Override clearOwnership()869 public void clearOwnership(){ 870 OwnershipThread.clear(tables); 871 } 872 rightmostKmer(final ByteBuilder bb, Kmer kmer)873 public Kmer rightmostKmer(final ByteBuilder bb, Kmer kmer){ 874 return rightmostKmer(bb.array, bb.length(), kmer); 875 } 876 rightmostKmer(final byte[] bases, final int blen, final Kmer kmer)877 public Kmer rightmostKmer(final byte[] bases, final int blen, final Kmer kmer){ 878 kmer.clear(); 879 if(blen<kbig){return null;} 880 int len=0; 881 882 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */ 883 { 884 for(int i=blen-kbig; i<blen; i++){ 885 final byte b=bases[i]; 886 final long x=AminoAcid.baseToNumber[b]; 887 kmer.addRight(b); 888 889 if(x<0){len=0;} 890 else{len++;} 891 assert(len==kmer.len); 892 893 //if(verbose){outstream.println("C: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));} 894 } 895 } 896 897 if(len<kbig){return null;} 898 else{assert(len==kbig);} 899 return kmer; 900 } 901 leftmostKmer(final ByteBuilder bb, final Kmer kmer)902 public Kmer leftmostKmer(final ByteBuilder bb, final Kmer kmer){ 903 return leftmostKmer(bb.array, bb.length(), kmer); 904 } 905 leftmostKmer(final byte[] bases, final int blen, final Kmer kmer)906 public Kmer leftmostKmer(final byte[] bases, final int blen, final Kmer kmer){ 907 kmer.clear(); 908 if(blen<kbig){return null;} 909 int len=0; 910 911 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */ 912 { 913 for(int i=0; i<kbig; i++){ 914 final byte b=bases[i]; 915 final long x=AminoAcid.baseToNumber[b]; 916 kmer.addRight(b); 917 918 if(x<0){len=0;} 919 else{len++;} 920 assert(len==kmer.len); 921 922 if(verbose){outstream.println("D: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));} 923 } 924 } 925 926 if(len<kbig){return null;} 927 else{assert(len==kbig);} 928 return kmer; 929 } 930 doubleClaim(final ByteBuilder bb, final int id, Kmer kmer)931 public boolean doubleClaim(final ByteBuilder bb, final int id, Kmer kmer){ 932 return doubleClaim(bb.array, bb.length(), id, kmer); 933 } 934 935 /** Ensures there can be only one owner. */ doubleClaim(final byte[] bases, final int blength, final int id, Kmer kmer)936 public boolean doubleClaim(final byte[] bases, final int blength, final int id, Kmer kmer){ 937 boolean success=claim(bases, blength, id, true, kmer); 938 if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);} 939 if(!success){return false;} 940 success=claim(bases, blength, id+CLAIM_OFFSET, true, kmer); 941 if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);} 942 return success; 943 } 944 claim(final ByteBuilder bb, final int id, final boolean exitEarly, Kmer kmer)945 public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly, Kmer kmer){ 946 return claim(bb.array, bb.length(), id, exitEarly, kmer); 947 } 948 calcCoverage(final byte[] bases, final int blen, final Kmer kmer)949 public float calcCoverage(final byte[] bases, final int blen, final Kmer kmer){ 950 if(blen<kbig){return 0;} 951 int len=0; 952 kmer.clear(); 953 long sum=0; 954 int kmers=0; 955 956 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */ 957 for(int i=0; i<blen; i++){ 958 final byte b=bases[i]; 959 final long x=AminoAcid.baseToNumber[b]; 960 kmer.addRight(b); 961 962 if(x<0){len=0;} 963 else{len++;} 964 assert(len==kmer.len); 965 966 if(len>=kbig){ 967 int count=getCount(kmer); 968 sum+=count; 969 kmers++; 970 } 971 } 972 return sum==0 ? 0 : sum/(float)kmers; 973 } 974 calcCoverage(final Contig contig, final Kmer kmer)975 public float calcCoverage(final Contig contig, final Kmer kmer){ 976 final byte[] bases=contig.bases; 977 if(bases.length<kbig){return 0;} 978 int len=0; 979 kmer.clear(); 980 long sum=0; 981 int max=0, min=Integer.MAX_VALUE; 982 int kmers=0; 983 984 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */ 985 for(int i=0; i<bases.length; i++){ 986 final byte b=bases[i]; 987 final long x=AminoAcid.baseToNumber[b]; 988 kmer.addRight(b); 989 990 if(x<0){len=0;} 991 else{len++;} 992 assert(len==kmer.len); 993 994 if(len>=kbig){ 995 int count=getCount(kmer); 996 sum+=count; 997 max=Tools.max(count, max); 998 min=Tools.min(count, min); 999 kmers++; 1000 } 1001 } 1002 contig.coverage=sum==0 ? 0 : sum/(float)kmers; 1003 contig.maxCov=max; 1004 contig.minCov=sum==0 ? 0 : min; 1005 return contig.coverage; 1006 } 1007 claim(final byte[] bases, final int blen, final int id, boolean exitEarly, final Kmer kmer)1008 public boolean claim(final byte[] bases, final int blen, final int id, boolean exitEarly, final Kmer kmer){ 1009 if(blen<kbig){return false;} 1010 if(verbose){outstream.println("Thread "+id+" claim start.");} 1011 int len=0; 1012 kmer.clear(); 1013 boolean success=true; 1014 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 1015 for(int i=0; i<blen && success; i++){ 1016 final byte b=bases[i]; 1017 final long x=AminoAcid.baseToNumber[b]; 1018 kmer.addRight(b); 1019 1020 if(x<0){len=0;} 1021 else{len++;} 1022 assert(len==kmer.len); 1023 1024 if(len>=kbig){ 1025 success=claim(kmer, id/*, rid, i*/); 1026 success=(success || !exitEarly); 1027 } 1028 } 1029 return success; 1030 } 1031 claim(Kmer kmer, final int id )1032 public boolean claim(Kmer kmer, final int id/*, final long rid, final int pos*/){ 1033 //TODO: rid and pos are just for debugging. 1034 final int way=kmer.mod(ways); 1035 final AbstractKmerTableU table=tables[way]; 1036 final int count=table.getValue(kmer); 1037 assert(count==-1 || count>0) : count; 1038 // if(verbose /*|| true*/){outstream.println("Count="+count+".");} 1039 if(count<0){return true;} 1040 assert(count>0) : count; 1041 final int owner=table.setOwner(kmer, id); 1042 if(verbose){outstream.println("owner="+owner+".");} 1043 // assert(owner==id) : id+", "+owner+", "+rid+", "+pos; 1044 return owner==id; 1045 } 1046 release(ByteBuilder bb, final int id, final Kmer kmer)1047 public void release(ByteBuilder bb, final int id, final Kmer kmer){ 1048 release(bb.array, bb.length(), id, kmer); 1049 } 1050 release(final byte[] bases, final int blen, final int id, final Kmer kmer)1051 public void release(final byte[] bases, final int blen, final int id, final Kmer kmer){ 1052 if(verbose /*|| true*/){outstream.println("*Thread "+id+" release start.");} 1053 int len=0; 1054 kmer.clear(); 1055 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 1056 for(int i=0; i<blen; i++){ 1057 final byte b=bases[i]; 1058 final long x=AminoAcid.baseToNumber[b]; 1059 kmer.addRight(b); 1060 1061 if(x<0){len=0;} 1062 else{len++;} 1063 assert(len==kmer.len); 1064 1065 if(len>=kbig){ 1066 release(kmer, id); 1067 } 1068 } 1069 } 1070 release(Kmer kmer, final int id)1071 public boolean release(Kmer kmer, final int id){ 1072 final int way=kmer.mod(ways); 1073 final AbstractKmerTableU table=tables[way]; 1074 final int count=table.getValue(kmer); 1075 // if(verbose /*|| true*/){outstream.println("Count="+count+".");} 1076 if(count<1){return true;} 1077 return table.clearOwner(kmer, id); 1078 } 1079 findOwner(ByteBuilder bb, final int id, final Kmer kmer)1080 public int findOwner(ByteBuilder bb, final int id, final Kmer kmer){ 1081 return findOwner(bb.array, bb.length(), id, kmer); 1082 } 1083 findOwner(final byte[] bases, final int blen, final int id, final Kmer kmer)1084 public int findOwner(final byte[] bases, final int blen, final int id, final Kmer kmer){ 1085 int len=0; 1086 kmer.clear(); 1087 int maxOwner=-1; 1088 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ 1089 for(int i=0; i<blen; i++){ 1090 final byte b=bases[i]; 1091 final long x=AminoAcid.baseToNumber[b]; 1092 kmer.addRight(b); 1093 1094 if(x<0){len=0;} 1095 else{len++;} 1096 assert(len==kmer.len); 1097 //if(verbose){System.err.println("E: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));} 1098 if(len>=kbig){ 1099 int owner=findOwner(kmer); 1100 maxOwner=Tools.max(owner, maxOwner); 1101 if(maxOwner>id){break;} 1102 } 1103 } 1104 return maxOwner; 1105 } 1106 findOwner(final Kmer kmer)1107 public int findOwner(final Kmer kmer){ 1108 final int way=kmer.mod(ways); 1109 final AbstractKmerTableU table=tables[way]; 1110 final int count=table.getValue(kmer); 1111 if(count<0){return -1;} 1112 final int owner=table.getOwner(kmer); 1113 return owner; 1114 } 1115 getCount(Kmer kmer)1116 public int getCount(Kmer kmer){ 1117 int way=kmer.mod(ways); 1118 return tables[way].getValue(kmer); 1119 } 1120 getOwner(Kmer kmer)1121 public int getOwner(Kmer kmer){ 1122 int way=kmer.mod(ways); 1123 return tables[way].getOwner(kmer); 1124 } 1125 1126 /*--------------------------------------------------------------*/ 1127 /*---------------- Fill Counts ----------------*/ 1128 /*--------------------------------------------------------------*/ 1129 fillRightCounts(Kmer kmer, int[] counts)1130 public int fillRightCounts(Kmer kmer, int[] counts){ 1131 if(FAST_FILL && MASK_CORE && k>2){ 1132 return fillRightCounts_fast(kmer, counts); 1133 }else{ 1134 return fillRightCounts_safe(kmer, counts); 1135 } 1136 } 1137 fillLeftCounts(Kmer kmer, int[] counts)1138 public int fillLeftCounts(Kmer kmer, int[] counts){ 1139 if(FAST_FILL && MASK_CORE && k>2){ 1140 return fillLeftCounts_fast(kmer, counts); 1141 }else{ 1142 return fillLeftCounts_safe(kmer, counts); 1143 } 1144 } 1145 fillRightCounts_fast(Kmer kmer, int[] counts)1146 public int fillRightCounts_fast(Kmer kmer, int[] counts){ 1147 assert(MASK_CORE); 1148 final long old=kmer.addRightNumeric(0); 1149 if(kmer.corePalindrome()){ 1150 kmer.addLeftNumeric(old); 1151 return fillRightCounts_safe(kmer, counts); 1152 } 1153 final int way=kmerToWay(kmer); 1154 final HashArrayU table=(HashArrayU) tables[way]; 1155 final int cell=table.kmerToCell(kmer); 1156 int max=-1, maxPos=0; 1157 1158 {//Iteration 0 1159 final int count=Tools.max(0, table.getValue(kmer, cell)); 1160 assert(count==NOT_PRESENT || count>=0); 1161 counts[0]=count; 1162 max=count; 1163 maxPos=0; 1164 kmer.addLeftNumeric(old); 1165 } 1166 1167 for(int i=1; i<=3; i++){ 1168 kmer.addRightNumeric(i); 1169 if(verbose){outstream.println("kmer: "+kmer);} 1170 final int count=Tools.max(0, table.getValue(kmer, cell)); 1171 assert(count==NOT_PRESENT || count>=0); 1172 counts[i]=count; 1173 if(count>max){ 1174 max=count; 1175 maxPos=i; 1176 } 1177 kmer.addLeftNumeric(old); 1178 } 1179 return maxPos; 1180 } 1181 fillRightCounts_safe(Kmer kmer, int[] counts)1182 public int fillRightCounts_safe(Kmer kmer, int[] counts){ 1183 assert(kmer.len>=kbig); 1184 if(verbose){outstream.println("fillRightCounts: "+kmer);} 1185 int max=-1, maxPos=0; 1186 1187 // final Kmer kmer2=new Kmer(kmer);//123 TODO: Slow, for an assertion only; 1188 1189 for(int i=0; i<=3; i++){ 1190 final long old=kmer.addRightNumeric(i); 1191 if(verbose){outstream.println("kmer: "+kmer);} 1192 int way=kmer.mod(ways); 1193 int count=tables[way].getValue(kmer); 1194 assert(count==NOT_PRESENT || count>=0); 1195 count=Tools.max(count, 0); 1196 counts[i]=count; 1197 if(count>max){ 1198 max=count; 1199 maxPos=i; 1200 } 1201 kmer.addLeftNumeric(old); 1202 } 1203 return maxPos; 1204 } 1205 1206 // public int fillLeftCounts_fast(Kmer kmer, int[] counts){ 1207 // assert(MASK_CORE); 1208 // final long old=kmer.addLeftNumeric(0); 1209 // if(kmer.corePalindrome()){ 1210 // kmer.addRightNumeric(old); 1211 // return fillLeftCounts_safe(kmer, counts); 1212 // } 1213 // final int way=kmerToWay(kmer); 1214 // final HashArrayU table=(HashArrayU) tables[way]; 1215 // final int cell=table.kmerToCell(kmer); 1216 // int max=-1, maxPos=0; 1217 // 1218 // for(int i=0; i<=3; i++){ 1219 // kmer.addLeftNumeric(i); 1220 // if(verbose){outstream.println("kmer: "+kmer);} 1221 // final int count=Tools.max(0, table.getValue(kmer, cell)); 1222 // assert(count==NOT_PRESENT || count>=0); 1223 // counts[i]=count; 1224 // if(count>max){ 1225 // max=count; 1226 // maxPos=i; 1227 // } 1228 // kmer.addRightNumeric(old); 1229 // } 1230 // return maxPos; 1231 // } 1232 fillLeftCounts_fast(Kmer kmer, int[] counts)1233 public int fillLeftCounts_fast(Kmer kmer, int[] counts){ 1234 assert(MASK_CORE); 1235 final long old=kmer.addLeftNumeric(0); 1236 if(kmer.corePalindrome()){ 1237 kmer.addRightNumeric(old); 1238 return fillLeftCounts_safe(kmer, counts); 1239 } 1240 final int way=kmerToWay(kmer); 1241 final HashArrayU table=(HashArrayU) tables[way]; 1242 final int cell=table.kmerToCell(kmer); 1243 int max=-1, maxPos=0; 1244 1245 {//Iteration 0 1246 if(verbose){outstream.println("kmer: "+kmer);} 1247 final int count=Tools.max(0, table.getValue(kmer, cell)); 1248 assert(count==NOT_PRESENT || count>=0); 1249 counts[0]=count; 1250 max=count; 1251 maxPos=0; 1252 kmer.addRightNumeric(old); 1253 } 1254 1255 for(int i=1; i<=3; i++){ 1256 kmer.addLeftNumeric(i); 1257 if(verbose){outstream.println("kmer: "+kmer);} 1258 final int count=Tools.max(0, table.getValue(kmer, cell)); 1259 assert(count==NOT_PRESENT || count>=0); 1260 counts[i]=count; 1261 if(count>max){ 1262 max=count; 1263 maxPos=i; 1264 } 1265 kmer.addRightNumeric(old); 1266 } 1267 return maxPos; 1268 } 1269 1270 fillLeftCounts_safe(final Kmer kmer, int[] counts)1271 public int fillLeftCounts_safe(final Kmer kmer, int[] counts){ 1272 assert(kmer.len>=kbig); 1273 if(verbose){outstream.println("fillLeftCounts: "+kmer);} 1274 int max=-1, maxPos=0; 1275 1276 // final Kmer kmer2=new Kmer(kmer);//123 TODO: Slow, for an assertion only; 1277 // assert(false) : kmer+", "+kmer2; 1278 1279 for(int i=0; i<=3; i++){ 1280 if(verbose){ 1281 outstream.println("kmer: "+kmer+" (key==array1 ? "+(kmer.key()==kmer.array1())); 1282 // outstream.println("kmer2: "+kmer2); 1283 } 1284 final long old=kmer.addLeftNumeric(i); 1285 if(verbose){ 1286 outstream.println("after: "+kmer+" (key==array1 ? "+(kmer.key()==kmer.array1())); 1287 outstream.println("i="+i+", old="+old); 1288 } 1289 int way=kmer.mod(ways); 1290 int count=tables[way].getValue(kmer); 1291 assert(count==NOT_PRESENT || count>=0); 1292 count=Tools.max(count, 0); 1293 counts[i]=count; 1294 if(count>max){ 1295 max=count; 1296 maxPos=i; 1297 } 1298 kmer.addRightNumeric(old); 1299 if(verbose){outstream.println("restored: "+kmer);} 1300 // assert(kmer.equals(kmer2)) : kmer+", "+kmer2+", "+kmer.xor()+", "+kmer2.xor(); 1301 } 1302 return maxPos; 1303 } 1304 1305 /*--------------------------------------------------------------*/ 1306 /*---------------- Printing Methods ----------------*/ 1307 /*--------------------------------------------------------------*/ 1308 1309 @Override dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining)1310 public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){ 1311 if(fname==null){return false;} 1312 Timer t=new Timer(); 1313 1314 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true); 1315 bsw.start(); 1316 for(AbstractKmerTableU set : tables){ 1317 set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining); 1318 } 1319 bsw.poisonAndWait(); 1320 1321 t.stop(); 1322 if(printTime){outstream.println("Kmer Dump Time: \t"+t);} 1323 return bsw.errorState; 1324 } 1325 1326 @Override dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining)1327 public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){ 1328 1329 final int threads=Tools.min(Shared.threads(), tables.length); 1330 if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);} 1331 1332 if(fname==null){return false;} 1333 Timer t=new Timer(); 1334 1335 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true); 1336 bsw.start(); 1337 DumpThreadU.dump(k, mincount, maxcount, tables, bsw, remaining); 1338 bsw.poisonAndWait(); 1339 1340 t.stop(); 1341 if(printTime){outstream.println("Kmer Dump Time: \t"+t);} 1342 return bsw.errorState; 1343 } 1344 1345 /*--------------------------------------------------------------*/ 1346 /*---------------- Recall Methods ----------------*/ 1347 /*--------------------------------------------------------------*/ 1348 toText(long[] kmer)1349 private final StringBuilder toText(long[] kmer){return AbstractKmerTableU.toText(kmer, k);} 1350 1351 /*--------------------------------------------------------------*/ 1352 /*---------------- Final Primitives ----------------*/ 1353 /*--------------------------------------------------------------*/ 1354 1355 @Override kbig()1356 public int kbig(){return kbig;} 1357 @Override filterMemory(int pass)1358 public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;} 1359 @Override ecco()1360 public boolean ecco(){return ecco;} 1361 @Override qtrimLeft()1362 public boolean qtrimLeft(){return qtrimLeft;} 1363 @Override qtrimRight()1364 public boolean qtrimRight(){return qtrimRight;} 1365 @Override minAvgQuality()1366 public float minAvgQuality(){return minAvgQuality;} 1367 @Override tableMemory()1368 public long tableMemory(){return tableMemory;} 1369 @Override estimatedKmerCapacity()1370 public long estimatedKmerCapacity(){return estimatedKmerCapacity;} 1371 @Override ways()1372 public int ways(){return ways;} 1373 @Override rcomp()1374 public boolean rcomp(){return rcomp;} 1375 kmerToWay(final Kmer kmer)1376 public final int kmerToWay(final Kmer kmer){ 1377 final int way=(int)(kmer.xor()%ways); 1378 return way; 1379 } 1380 1381 /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in tables[Y] */ 1382 private AbstractKmerTableU[] tables; tables()1383 public AbstractKmerTableU[] tables(){return tables;} 1384 1385 public long filterMemoryOverride=0; 1386 1387 private final int bytesPerKmer; 1388 1389 private final long usableMemory; 1390 private final long filterMemory0; 1391 private final long filterMemory1; 1392 private final long tableMemory; 1393 private final long estimatedKmerCapacity; 1394 1395 /** Number of tables (and threads, during loading) */ 1396 private final boolean prealloc; 1397 1398 /** Number of tables (and threads, during loading) */ 1399 public final int ways; 1400 1401 /** Total kmer length */ 1402 public final int kbig; 1403 /** Normal kmer length */ 1404 public final int k; 1405 /** kbig-1; used in some expressions */ 1406 public final int kbig2; 1407 /** Number of little kmers in a big kmer */ 1408 public final int mult; 1409 1410 /** Look for reverse-complements as well as forward kmers. Default: true */ 1411 private final boolean rcomp; 1412 1413 /** Quality-trim the left side */ 1414 public final boolean qtrimLeft; 1415 /** Quality-trim the right side */ 1416 public final boolean qtrimRight; 1417 /** Trim bases at this quality or below. Default: 4 */ 1418 public final float trimq; 1419 /** Error rate for trimming (derived from trimq) */ 1420 private final float trimE; 1421 1422 /** Throw away reads below this average quality after trimming. Default: 0 */ 1423 public final float minAvgQuality; 1424 /** If positive, calculate average quality from the first X bases only. Default: 0 */ 1425 public final int minAvgQualityBases; 1426 1427 /** Ignore kmers with probability of correctness less than this */ 1428 public final float minProb; 1429 1430 /** Correct via overlap */ 1431 private final boolean ecco; 1432 1433 /** Attempt to merge via overlap prior to counting kmers */ 1434 private final boolean merge; 1435 1436 /*--------------------------------------------------------------*/ 1437 /*---------------- Walker ----------------*/ 1438 /*--------------------------------------------------------------*/ 1439 walk()1440 public WalkerU walk(){ 1441 return new WalkerKSTU(); 1442 } 1443 1444 public class WalkerKSTU extends WalkerU { 1445 WalkerKSTU()1446 WalkerKSTU(){ 1447 w=tables[0].walk(); 1448 } 1449 1450 /** 1451 * Fills this object with the next key and value. 1452 * @return True if successful. 1453 */ next()1454 public boolean next(){ 1455 if(w==null){return false;} 1456 if(w.next()){return true;} 1457 if(tnum<tables.length){tnum++;} 1458 w=(tnum<tables.length ? tables[tnum].walk() : null); 1459 return w==null ? false : w.next(); 1460 } 1461 kmer()1462 public Kmer kmer(){return w.kmer();} value()1463 public int value(){return w.value();} 1464 1465 private WalkerU w=null; 1466 1467 /** current table number */ 1468 private int tnum=0; 1469 } 1470 1471 } 1472