1 package ukmer;
2 
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.BitSet;
6 import java.util.concurrent.atomic.AtomicLong;
7 
8 import assemble.Contig;
9 import bloom.KmerCountAbstract;
10 import dna.AminoAcid;
11 import fileIO.ByteStreamWriter;
12 import fileIO.FileFormat;
13 import fileIO.ReadWrite;
14 import jgi.BBMerge;
15 import kmer.AbstractKmerTableSet;
16 import kmer.DumpThread;
17 import kmer.ScheduleMaker;
18 import shared.Parse;
19 import shared.Parser;
20 import shared.PreParser;
21 import shared.Primes;
22 import shared.ReadStats;
23 import shared.Shared;
24 import shared.Timer;
25 import shared.Tools;
26 import shared.TrimRead;
27 import stream.ConcurrentReadInputStream;
28 import stream.FastaReadInputStream;
29 import stream.Read;
30 import structures.ByteBuilder;
31 import structures.IntList;
32 import structures.ListNum;
33 
34 
35 /**
36  * Loads and holds kmers for Tadpole2/KmerCountExact
37  * @author Brian Bushnell
38  * @date Jun 22, 2015
39  *
40  */
41 public class KmerTableSetU extends AbstractKmerTableSet {
42 
43 	/**
44 	 * Code entrance from the command line.
45 	 * @param args Command line arguments
46 	 */
main(String[] args)47 	public static void main(String[] args){
48 
49 		Timer t=new Timer(), t2=new Timer();
50 		t.start();
51 		t2.start();
52 
53 		//Create a new CountKmersExact instance
54 		KmerTableSetU set=new KmerTableSetU(args);
55 		t2.stop();
56 		outstream.println("Initialization Time:      \t"+t2);
57 
58 		///And run it
59 		set.process(t);
60 
61 		//Close the print stream if it was redirected
62 		Shared.closeStream(outstream);
63 	}
64 
65 	/**
66 	 * Constructor.
67 	 * @param args Command line arguments
68 	 */
KmerTableSetU(String[] args)69 	private KmerTableSetU(String[] args){
70 		this(args, 0);//+5 if using ownership and building contigs
71 	}
72 
73 
74 	/**
75 	 * Constructor.
76 	 * @param args Command line arguments
77 	 */
KmerTableSetU(String[] args, int extraBytesPerKmer_)78 	public KmerTableSetU(String[] args, int extraBytesPerKmer_){
79 
80 		{//Preparse block for help, config files, and outstream
81 			PreParser pp=new PreParser(args, getClass(), false);
82 			args=pp.args;
83 			outstream=pp.outstream;
84 		}//TODO - no easy way to close outstream
85 
86 		/* Initialize local variables with defaults */
87 		Parser parser=new Parser();
88 		boolean prealloc_=false;
89 		int kbig_=62;
90 		int ways_=-1;
91 		int filterMax_=2;
92 		boolean ecco_=false, merge_=false;
93 		boolean rcomp_=true;
94 		double minProb_=defaultMinprob;
95 
96 		/* Parse arguments */
97 		for(int i=0; i<args.length; i++){
98 
99 			final String arg=args[i];
100 			String[] split=arg.split("=");
101 			String a=split[0].toLowerCase();
102 			String b=split.length>1 ? split[1] : null;
103 
104 			if(Parser.parseCommonStatic(arg, a, b)){
105 				//do nothing
106 			}else if(Parser.parseZip(arg, a, b)){
107 				//do nothing
108 			}else if(Parser.parseQuality(arg, a, b)){
109 				//do nothing
110 			}else if(Parser.parseFasta(arg, a, b)){
111 				//do nothing
112 			}else if(parser.parseInterleaved(arg, a, b)){
113 				//do nothing
114 			}else if(parser.parseTrim(arg, a, b)){
115 				//do nothing
116 			}else if(a.equals("in") || a.equals("in1")){
117 				in1.clear();
118 				if(b!=null){
119 					String[] s=b.split(",");
120 					for(String ss : s){
121 						in1.add(ss);
122 					}
123 				}
124 			}else if(a.equals("in2")){
125 				in2.clear();
126 				if(b!=null){
127 					String[] s=b.split(",");
128 					for(String ss : s){
129 						in2.add(ss);
130 					}
131 				}
132 			}else if(a.equals("extra")){
133 				extra.clear();
134 				if(b!=null){
135 					String[] s=b.split(",");
136 					for(String ss : s){
137 						extra.add(ss);
138 					}
139 				}
140 			}else if(a.equals("append") || a.equals("app")){
141 				append=ReadStats.append=Parse.parseBoolean(b);
142 			}else if(a.equals("overwrite") || a.equals("ow")){
143 				overwrite=Parse.parseBoolean(b);
144 			}else if(a.equals("initialsize")){
145 				initialSize=Parse.parseIntKMG(b);
146 			}else if(a.equals("showstats") || a.equals("stats")){
147 				showStats=Parse.parseBoolean(b);
148 			}else if(a.equals("ways")){
149 				ways_=Parse.parseIntKMG(b);
150 			}else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){
151 				buflen=Parse.parseIntKMG(b);
152 			}else if(a.equals("k")){
153 				assert(b!=null) : "\nk needs an integer value such as k=50.  Default is 62.\n";
154 				kbig_=Parse.parseIntKMG(b);
155 			}else if(a.equals("threads") || a.equals("t")){
156 				THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b));
157 			}else if(a.equals("showspeed") || a.equals("ss")){
158 				showSpeed=Parse.parseBoolean(b);
159 			}else if(a.equals("ecco")){
160 				ecco_=Parse.parseBoolean(b);
161 			}else if(a.equals("merge")){
162 				merge_=Parse.parseBoolean(b);
163 			}else if(a.equals("verbose")){
164 //				assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
165 				verbose=Parse.parseBoolean(b);
166 			}else if(a.equals("verbose2")){
167 //				assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
168 				verbose2=Parse.parseBoolean(b);
169 			}else if(a.equals("minprob")){
170 				minProb_=Double.parseDouble(b);
171 			}else if(a.equals("minprobprefilter") || a.equals("mpp")){
172 				minProbPrefilter=Parse.parseBoolean(b);
173 			}else if(a.equals("minprobmain") || a.equals("mpm")){
174 				minProbMain=Parse.parseBoolean(b);
175 			}else if(a.equals("reads") || a.startsWith("maxreads")){
176 				maxReads=Parse.parseKMG(b);
177 			}else if(a.equals("prealloc") || a.equals("preallocate")){
178 				if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){
179 					prealloc_=Parse.parseBoolean(b);
180 				}else{
181 					preallocFraction=Tools.max(0, Double.parseDouble(b));
182 					prealloc_=(preallocFraction>0);
183 				}
184 			}else if(a.equals("prefilter")){
185 				if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){
186 					prefilter=Parse.parseBoolean(b);
187 				}else{
188 					filterMax_=Parse.parseIntKMG(b);
189 					prefilter=filterMax_>0;
190 				}
191 			}else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){
192 				prefilterFraction=Tools.max(0, Double.parseDouble(b));
193 				assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory.";
194 				prefilter=prefilterFraction>0;
195 			}else if(a.equals("prehashes") || a.equals("hashes")){
196 				prehashes=Parse.parseIntKMG(b);
197 			}else if(a.equals("prefilterpasses") || a.equals("prepasses")){
198 				assert(b!=null) : "Bad parameter: "+arg;
199 				if(b.equalsIgnoreCase("auto")){
200 					prepasses=-1;
201 				}else{
202 					prepasses=Parse.parseIntKMG(b);
203 				}
204 			}else if(a.equals("onepass")){
205 				onePass=Parse.parseBoolean(b);
206 			}else if(a.equals("passes")){
207 				int passes=Parse.parseIntKMG(b);
208 				onePass=(passes<2);
209 			}else if(a.equals("rcomp")){
210 				rcomp_=Parse.parseBoolean(b);
211 			}
212 
213 			else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") ||
214 					a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){
215 				filterMemoryOverride=Parse.parseKMG(b);
216 			}
217 
218 			else if(IGNORE_UNKNOWN_ARGS){
219 				//Do nothing
220 			}else{
221 				throw new RuntimeException("Unknown parameter "+args[i]);
222 			}
223 		}
224 
225 		{//Process parser fields
226 			Parser.processQuality();
227 
228 			qtrimLeft=parser.qtrimLeft;
229 			qtrimRight=parser.qtrimRight;
230 			trimq=parser.trimq;
231 			trimE=parser.trimE();
232 
233 			minAvgQuality=parser.minAvgQuality;
234 			minAvgQualityBases=parser.minAvgQualityBases;
235 		}
236 
237 		if(prepasses==0 || !prefilter){
238 			prepasses=0;
239 			prefilter=false;
240 		}
241 
242 		{
243 			long memory=Runtime.getRuntime().maxMemory();
244 			double xmsRatio=Shared.xmsRatio();
245 //			long tmemory=Runtime.getRuntime().totalMemory();
246 			usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45);
247 			if(prepasses==0 || !prefilter){
248 				filterMemory0=filterMemory1=0;
249 			}else if(filterMemoryOverride>0){
250 				filterMemory0=filterMemory1=filterMemoryOverride;
251 			}else{
252 				double low=Tools.min(prefilterFraction, 1-prefilterFraction);
253 				double high=1-low;
254 				if(prepasses<0 || (prepasses&1)==1){//odd passes
255 					filterMemory0=(long)(usableMemory*low);
256 					filterMemory1=(long)(usableMemory*high);
257 				}else{//even passes
258 					filterMemory0=(long)(usableMemory*high);
259 					filterMemory1=(long)(usableMemory*low);
260 				}
261 			}
262 			tableMemory=(long)(usableMemory*.95-filterMemory0);
263 		}
264 
265 		mult=Kmer.getMult(kbig_);
266 		k=Kmer.getK(kbig_);
267 		kbig=k*mult;
268 		kbig2=kbig-1;
269 		assert(k<=31);
270 
271 		prealloc=prealloc_;
272 		bytesPerKmer=4+8*mult+extraBytesPerKmer_;
273 		assert(bytesPerKmer>=4+8*mult) : bytesPerKmer+", "+mult+", "+k+", "+kbig+", "+(4+8*mult)+", "+extraBytesPerKmer_;
274 		if(ways_<1){
275 			long maxKmers=(2*tableMemory)/bytesPerKmer;
276 			long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE);
277 			ways_=(int)Tools.max(31, (int)(THREADS*2.5), minWays);
278 			ways_=(int)Primes.primeAtLeast(ways_);
279 			assert(ways_>0);
280 //			System.err.println("ways="+ways_);
281 		}
282 //		assert(false) : extraBytesPerKmer_+bytesPerKmer+", "+mult+", "+k+", "+kbig+", "+(4+8*mult)+", "+ways_;
283 
284 		/* Set final variables; post-process and validate argument combinations */
285 
286 		onePass=onePass&prefilter;
287 		ways=ways_;
288 		filterMax=Tools.min(filterMax_, 0x7FFFFFFF);
289 		ecco=ecco_;
290 		merge=merge_;
291 		minProb=(float)minProb_;
292 		rcomp=rcomp_;
293 		Kmer.rcomp=rcomp;
294 //		System.err.println("rcomp="+rcomp);
295 		estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArrayU.maxLoadFactorFinal*0.97));
296 		KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0);
297 
298 		if(kbig!=kbig_){
299 			System.err.println("K was changed from "+kbig_+" to "+kbig);
300 		}
301 
302 		if(k<1){throw new RuntimeException("\nk needs an integer value above 0, such as k=27.  Default is 62.\n");}
303 
304 		if(initialSize<1){
305 			final long memOverWays=tableMemory/(bytesPerKmer*ways);
306 			final double mem2=(prealloc ? preallocFraction : 1)*tableMemory;
307 			initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault);
308 			if(initialSize!=initialSizeDefault){
309 				System.err.println("Initial size set to "+initialSize);
310 			}
311 		}
312 
313 		/* Adjust I/O settings and filenames */
314 
315 		assert(FastaReadInputStream.settingsOK());
316 
317 		if(in1.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
318 
319 		for(int i=0; i<in1.size(); i++){
320 			String s=in1.get(i);
321 			if(s!=null && s.contains("#") && !new File(s).exists()){
322 				int pound=s.lastIndexOf('#');
323 				String a=s.substring(0, pound);
324 				String b=s.substring(pound+1);
325 				in1.set(i, a+1+b);
326 				in2.add(a+2+b);
327 			}
328 		}
329 
330 		if(!extra.isEmpty()){
331 			ArrayList<String> temp=(ArrayList<String>) extra.clone();
332 			extra.clear();
333 			for(String s : temp){
334 				if(s!=null && s.contains("#") && !new File(s).exists()){
335 					int pound=s.lastIndexOf('#');
336 					String a=s.substring(0, pound);
337 					String b=s.substring(pound+1);
338 					extra.add(a);
339 					extra.add(b);
340 				}else{
341 					extra.add(s);
342 				}
343 			}
344 		}
345 
346 		{
347 			boolean allowDuplicates=true;
348 			if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){
349 				throw new RuntimeException("\nCan't read some input files.\n");
350 			}
351 		}
352 		assert(THREADS>0);
353 
354 		if(DISPLAY_PROGRESS){
355 			outstream.println("Initial:");
356 			outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f"));
357 			Shared.printMemory();
358 			outstream.println();
359 		}
360 	}
361 
362 	/*--------------------------------------------------------------*/
363 	/*----------------         Outer Methods        ----------------*/
364 	/*--------------------------------------------------------------*/
365 
366 	@Override
clear()367 	public void clear(){
368 		tables=null;
369 	}
370 
371 	/*--------------------------------------------------------------*/
372 	/*----------------         Inner Methods        ----------------*/
373 	/*--------------------------------------------------------------*/
374 
375 	@Override
allocateTables()376 	protected void allocateTables(){
377 		assert(tables==null);
378 		tables=null;
379 		final int tableType=AbstractKmerTableU.ARRAY1D;
380 
381 		ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc,
382 				(prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride);
383 		int[] schedule=scheduleMaker.makeSchedule();
384 		tables=AbstractKmerTableU.preallocate(ways, tableType, schedule, k, kbig);
385 	}
386 
387 	/**
388 	 * Load reads into tables, using multiple LoadThread.
389 	 */
390 	@Override
loadKmers(String fname1, String fname2)391 	public long loadKmers(String fname1, String fname2){
392 
393 		/* Create read input stream */
394 		final ConcurrentReadInputStream cris;
395 		{
396 			FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true);
397 			FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true);
398 			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2);
399 			cris.start(); //4567
400 		}
401 
402 		/* Create ProcessThreads */
403 		ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS);
404 		for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));}
405 		for(LoadThread pt : alpt){pt.start();}
406 
407 		long added=0;
408 
409 		/* Wait for threads to die, and gather statistics */
410 		for(LoadThread pt : alpt){
411 			while(pt.getState()!=Thread.State.TERMINATED){
412 				try {
413 					pt.join();
414 				} catch (InterruptedException e) {
415 					// TODO Auto-generated catch block
416 					e.printStackTrace();
417 				}
418 			}
419 			added+=pt.added;
420 
421 			readsIn+=pt.readsInT;
422 			basesIn+=pt.basesInT;
423 			lowqReads+=pt.lowqReadsT;
424 			lowqBases+=pt.lowqBasesT;
425 			readsTrimmed+=pt.readsTrimmedT;
426 			basesTrimmed+=pt.basesTrimmedT;
427 			kmersIn+=pt.kmersInT;
428 		}
429 
430 		/* Shut down I/O streams; capture error status */
431 		errorState|=ReadWrite.closeStreams(cris);
432 		return added;
433 	}
434 
435 	/*--------------------------------------------------------------*/
436 	/*----------------         Inner Classes        ----------------*/
437 	/*--------------------------------------------------------------*/
438 
439 	/**
440 	 * Loads kmers.
441 	 */
442 	private class LoadThread extends Thread{
443 
444 		/**
445 		 * Constructor
446 		 * @param cris_ Read input stream
447 		 */
LoadThread(ConcurrentReadInputStream cris_)448 		public LoadThread(ConcurrentReadInputStream cris_){
449 			cris=cris_;
450 			table=new HashBufferU(tables, buflen, kbig, false);
451 			kmer=new Kmer(k, mult);
452 		}
453 
454 		@Override
run()455 		public void run(){
456 			ListNum<Read> ln=cris.nextList();
457 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
458 
459 			//While there are more reads lists...
460 			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
461 
462 				//For each read (or pair) in the list...
463 				for(int i=0; i<reads.size(); i++){
464 					Read r1=reads.get(i);
465 					Read r2=r1.mate;
466 
467 					if(!r1.validated()){r1.validate(true);}
468 					if(r2!=null && !r2.validated()){r2.validate(true);}
469 
470 					if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));}
471 
472 					readsInT++;
473 					basesInT+=r1.length();
474 					if(r2!=null){
475 						readsInT++;
476 						basesInT+=r2.length();
477 					}
478 
479 					//Determine whether to discard the reads based on average quality
480 					if(minAvgQuality>0){
481 						if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);}
482 						if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);}
483 					}
484 
485 					if(r1!=null){
486 						if(qtrimLeft || qtrimRight){
487 							int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1);
488 							basesTrimmedT+=x;
489 							readsTrimmedT+=(x>0 ? 1 : 0);
490 						}
491 						if(r1.length()<kbig){r1.setDiscarded(true);}
492 					}
493 					if(r2!=null){
494 						if(qtrimLeft || qtrimRight){
495 							int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1);
496 							basesTrimmedT+=x;
497 							readsTrimmedT+=(x>0 ? 1 : 0);
498 						}
499 						if(r2.length()<kbig){r2.setDiscarded(true);}
500 					}
501 
502 					if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){
503 						if(merge){
504 							final int insert=BBMerge.findOverlapStrict(r1, r2, false);
505 							if(insert>0){
506 								r2.reverseComplement();
507 								r1=r1.joinRead(insert);
508 								r2=null;
509 							}
510 						}else if(ecco){
511 							BBMerge.findOverlapStrict(r1, r2, true);
512 						}
513 					}
514 
515 					if(r1!=null){
516 						if(r1.discarded()){
517 							lowqBasesT+=r1.length();
518 							lowqReadsT++;
519 						}else{
520 							long temp=addKmersToTable(r1, kmer);
521 							added+=temp;
522 							if(verbose){System.err.println("A: Added "+temp);}
523 						}
524 					}
525 					if(r2!=null){
526 						if(r2.discarded()){
527 							lowqBasesT+=r2.length();
528 							lowqReadsT++;
529 						}else{
530 							long temp=addKmersToTable(r2, kmer);
531 							added+=temp;
532 							if(verbose){System.err.println("B: Added "+temp);}
533 						}
534 					}
535 				}
536 
537 				//Fetch a new read list
538 				cris.returnList(ln);
539 				ln=cris.nextList();
540 				reads=(ln!=null ? ln.list : null);
541 			}
542 			cris.returnList(ln);
543 			long temp=table.flush();
544 			if(verbose){System.err.println("Flush: Added "+temp);}
545 			added+=temp;
546 		}
547 
548 
addKmersToTable(final Read r, Kmer kmer)549 		private final int addKmersToTable(final Read r, Kmer kmer){
550 			if(onePass){return addKmersToTable_onePass(r, kmer);}
551 			if(r==null || r.bases==null){return 0;}
552 			final float minProb2=(minProbMain ? minProb : 0);
553 			final byte[] bases=r.bases;
554 			final byte[] quals=r.quality;
555 			int created=0;
556 			int len=0;
557 
558 			if(bases==null || bases.length<kbig){return -1;}
559 			kmer.clear();
560 
561 			/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
562 			float prob=1;
563 			for(int i=0; i<bases.length; i++){
564 				final byte b=bases[i];
565 				final long x=AminoAcid.baseToNumber[b];
566 //				assert(x>=0) : ((char)b)+", "+x+", "+new String(bases);
567 
568 				//Update kmers
569 //				kmer.addRight(b);
570 				kmer.addRightNumeric(x);
571 
572 				if(minProb2>0 && quals!=null){//Update probability
573 					prob=prob*PROB_CORRECT[quals[i]];
574 					if(len>kbig){
575 						byte oldq=quals[i-kbig];
576 						prob=prob*PROB_CORRECT_INVERSE[oldq];
577 					}
578 				}
579 
580 				//Handle Ns
581 				if(x<0){
582 					len=0;
583 					prob=1;
584 				}else{len++;}
585 
586 				assert(len==kmer.len);
587 
588 //				if(verbose){System.err.println("A: Scanning i="+i+", len="+len+", kmer="+kmer.toString()+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));}
589 				if(len>=kbig && prob>=minProb2){
590 					kmersInT++;
591 //					System.err.println("kmer="+kmer+"; xor()="+kmer.xor()+"; filterMax2="+filterMax2+"; prefilter="+prefilter);
592 //					System.err.println("prefilterArray.read(xor.key())="+prefilterArray.read(kmer.xor())+"");
593 //					System.err.println("prefilterArray.read(kmer.key())="+prefilterArray.read(kmer.key())+"");
594 					if(!prefilter || prefilterArray.read(kmer.xor())>filterMax2){
595 						int temp=table.incrementAndReturnNumCreated(kmer);
596 //						System.err.println("kmer="+kmer+"; xor()="+kmer.xor()+"; temp="+temp+" ");
597 						created+=temp;
598 						if(verbose){System.err.println("C: Added "+temp);}
599 					}
600 				}
601 			}
602 
603 			return created;
604 		}
605 
606 
addKmersToTable_onePass(final Read r, Kmer kmer)607 		private final int addKmersToTable_onePass(final Read r, Kmer kmer){
608 			assert(prefilter);
609 			if(r==null || r.bases==null){return 0;}
610 			final byte[] bases=r.bases;
611 			final byte[] quals=r.quality;
612 			int created=0;
613 			int len=0;
614 
615 			if(bases==null || bases.length<kbig){return -1;}
616 			kmer.clear();
617 
618 			/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
619 			float prob=1;
620 			for(int i=0; i<bases.length; i++){
621 				final byte b=bases[i];
622 				final long x=AminoAcid.baseToNumber[b];
623 
624 				//Update kmers
625 				kmer.addRight(b);
626 
627 				if(minProb>0 && quals!=null){//Update probability
628 					prob=prob*PROB_CORRECT[quals[i]];
629 					if(len>kbig){
630 						byte oldq=quals[i-kbig];
631 						prob=prob*PROB_CORRECT_INVERSE[oldq];
632 					}
633 				}
634 
635 				//Handle Ns
636 				if(x<0){
637 					len=0;
638 					prob=1;
639 				}else{len++;}
640 
641 				assert(len==kmer.len);
642 
643 				if(verbose){System.err.println("B: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));}
644 				if(len>=kbig && prob>=minProb){
645 					final long xor=kmer.xor();
646 					int count=prefilterArray.incrementAndReturnUnincremented(xor, 1);
647 					if(count>=filterMax2){
648 						int temp=table.incrementAndReturnNumCreated(kmer);
649 						created+=temp;
650 						if(verbose){System.err.println("D: Added "+temp);}
651 					}
652 				}
653 			}
654 			return created;
655 		}
656 
657 		/*--------------------------------------------------------------*/
658 
659 		/** Input read stream */
660 		private final ConcurrentReadInputStream cris;
661 
662 		private final HashBufferU table;
663 
664 		public long added=0;
665 
666 		private long readsInT=0;
667 		private long basesInT=0;
668 		private long lowqReadsT=0;
669 		private long lowqBasesT=0;
670 		private long readsTrimmedT=0;
671 		private long basesTrimmedT=0;
672 		private long kmersInT=0;
673 		private final Kmer kmer;
674 
675 	}
676 
677 
678 	/*--------------------------------------------------------------*/
679 	/*----------------          Convenience         ----------------*/
680 	/*--------------------------------------------------------------*/
681 
regenerateCounts(byte[] bases, IntList counts, final int ca, final Kmer kmer)682 	public int regenerateCounts(byte[] bases, IntList counts, final int ca, final Kmer kmer){
683 		final int b=ca+kbig; //first base changed
684 		final int lim=Tools.min(counts.size, ca+kbig+1); //count limit
685 //		System.err.println("ca="+ca+", b="+b+", lim="+lim);
686 //		System.err.println("Regen from count "+(ca+1)+"-"+lim);
687 		int len=0;
688 		int valid=0;
689 		kmer.clear();
690 //		System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
691 
692 		//Generate initial kmer
693 		for(int i=b-kbig; i<b; i++){
694 			final byte base=bases[i];
695 			final long x=AminoAcid.baseToNumber[base];
696 
697 			kmer.addRight(base);
698 
699 			if(x<0){
700 				len=0;
701 			}else{len++;}
702 			assert(len==kmer.len);
703 		}
704 		assert(len==kbig || Tools.indexOf(bases, (byte)'N')>=ca) : new String(bases)+"\n"+ca+", "+len;
705 
706 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts.
707 		 * i is an index in the base array, j is an index in the count array. */
708 		for(int i=b, j=ca+1; j<lim; i++, j++){
709 			final byte base=bases[i];
710 			final long x=AminoAcid.baseToNumber[base];
711 
712 			kmer.addRight(base);
713 
714 			if(x<0){len=0;}
715 			else{len++;}
716 			assert(len==kmer.len);
717 
718 			if(len>=kbig){
719 				valid++;
720 				int count=getCount(kmer);
721 				counts.set(j, count);
722 			}else{
723 				counts.set(j, 0);
724 			}
725 		}
726 //		System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
727 		return valid;
728 	}
729 
730 	@Override
regenerateCounts(byte[] bases, IntList counts, final Kmer kmer, BitSet changed)731 	public int regenerateCounts(byte[] bases, IntList counts, final Kmer kmer, BitSet changed){
732 		assert(!changed.isEmpty());
733 		final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1;
734 		final int ca=firstBase-kbig;
735 //		final int b=changed.nextSetBit(0);ca+kbig; //first base changed
736 		final int firstCount=Tools.max(firstBase-kbig+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit
737 //		System.err.println("ca="+ca+", b="+b+", lim="+lim);
738 //		System.err.println("Regen from count "+(ca+1)+"-"+lim);
739 		int len=0;
740 		int valid=0;
741 		kmer.clear();
742 //		System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
743 
744 		//Generate initial kmer
745 		for(int i=Tools.max(0, firstBase-kbig+1); i<firstBase; i++){
746 			final byte base=bases[i];
747 			final long x=AminoAcid.baseToNumber[base];
748 
749 			kmer.addRight(base);
750 
751 			if(x<0){
752 				len=0;
753 			}else{len++;}
754 			assert(len==kmer.len);
755 		}
756 		assert(len==kbig-1 || Tools.indexOf(bases, (byte)'N')>=0 || firstBase<kbig) :
757 			new String(bases)+"\n"+ca+", "+len+", "+firstBase;
758 
759 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts.
760 		 * i is an index in the base array, j is an index in the count array. */
761 		for(int i=firstBase, lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){
762 			final byte base=bases[i];
763 			final long x=AminoAcid.baseToNumber[base];
764 
765 			kmer.addRight(base);
766 
767 			if(x<0){len=0;}
768 			else{len++;}
769 			assert(len==kmer.len);
770 
771 			final int c=i-kbig+1;
772 			if(i>=firstBase){
773 				if(len>=kbig){
774 					valid++;
775 					int count=getCount(kmer);
776 					counts.set(c, count);
777 				}else if(c>=0){
778 					counts.set(c, 0);
779 				}
780 			}
781 		}
782 //		System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
783 		return valid;
784 	}
785 
786 	@Override
fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer kmer)787 	public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer kmer){
788 		counts.clear();
789 
790 		{
791 			Kmer x=leftmostKmer(bases, bases.length, kmer);
792 			assert((x!=null)==(kmer.len==kbig));
793 		}
794 		int len=kmer.len;
795 		int valid=0;
796 		if(len>=kbig){
797 			valid++;
798 			int count=getCount(kmer);
799 			counts.add(count);
800 		}else{
801 			counts.add(0);
802 		}
803 		assert(kmer.len==len);
804 		assert(len<=kbig) : len+", "+kbig;
805 
806 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
807 		for(int i=kbig, j=0; i<bases.length; i++, j++){
808 			final byte base=bases[i];
809 			final long x=AminoAcid.baseToNumber[base];
810 
811 			kmer.addRight(base);
812 
813 			if(x<0){len=0;}
814 			else{len++;}
815 			assert(len==kmer.len);
816 
817 			if(len>=kbig && (positions==null || positions.get(j))){
818 				valid++;
819 				int count=getCount(kmer);
820 				counts.add(Tools.max(count, 0));
821 			}else{
822 				counts.add(0);
823 			}
824 		}
825 		return valid;
826 	}
827 
828 
829 	/*--------------------------------------------------------------*/
830 	/*----------------        Helper Methods        ----------------*/
831 	/*--------------------------------------------------------------*/
832 
833 	@Override
regenerate(final int limit)834 	public long regenerate(final int limit){
835 		long sum=0;
836 		for(AbstractKmerTableU akt : tables){
837 			sum+=akt.regenerate(limit);
838 		}
839 		return sum;
840 	}
841 
getTable(Kmer kmer)842 	public HashArrayU1D getTable(Kmer kmer){
843 		return (HashArrayU1D) tables[kmer.mod(ways)];
844 	}
845 
846 	@Override
getTable(int tnum)847 	public HashArrayU1D getTable(int tnum){
848 		return (HashArrayU1D) tables[tnum];
849 	}
850 
851 	@Override
fillHistogram(int histMax)852 	public long[] fillHistogram(int histMax) {
853 		return HistogramMakerU.fillHistogram(tables, histMax);
854 	}
855 
856 	@Override
countGC(long[] gcCounts, int max)857 	public void countGC(long[] gcCounts, int max) {
858 		for(AbstractKmerTableU set : tables){
859 			set.countGC(gcCounts, max);
860 		}
861 	}
862 
863 	@Override
initializeOwnership()864 	public void initializeOwnership(){
865 		OwnershipThread.initialize(tables);
866 	}
867 
868 	@Override
clearOwnership()869 	public void clearOwnership(){
870 		OwnershipThread.clear(tables);
871 	}
872 
rightmostKmer(final ByteBuilder bb, Kmer kmer)873 	public Kmer rightmostKmer(final ByteBuilder bb, Kmer kmer){
874 		return rightmostKmer(bb.array, bb.length(), kmer);
875 	}
876 
rightmostKmer(final byte[] bases, final int blen, final Kmer kmer)877 	public Kmer rightmostKmer(final byte[] bases, final int blen, final Kmer kmer){
878 		kmer.clear();
879 		if(blen<kbig){return null;}
880 		int len=0;
881 
882 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
883 		{
884 			for(int i=blen-kbig; i<blen; i++){
885 				final byte b=bases[i];
886 				final long x=AminoAcid.baseToNumber[b];
887 				kmer.addRight(b);
888 
889 				if(x<0){len=0;}
890 				else{len++;}
891 				assert(len==kmer.len);
892 
893 				//if(verbose){outstream.println("C: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));}
894 			}
895 		}
896 
897 		if(len<kbig){return null;}
898 		else{assert(len==kbig);}
899 		return kmer;
900 	}
901 
leftmostKmer(final ByteBuilder bb, final Kmer kmer)902 	public Kmer leftmostKmer(final ByteBuilder bb, final Kmer kmer){
903 		return leftmostKmer(bb.array, bb.length(), kmer);
904 	}
905 
leftmostKmer(final byte[] bases, final int blen, final Kmer kmer)906 	public Kmer leftmostKmer(final byte[] bases, final int blen, final Kmer kmer){
907 		kmer.clear();
908 		if(blen<kbig){return null;}
909 		int len=0;
910 
911 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
912 		{
913 			for(int i=0; i<kbig; i++){
914 				final byte b=bases[i];
915 				final long x=AminoAcid.baseToNumber[b];
916 				kmer.addRight(b);
917 
918 				if(x<0){len=0;}
919 				else{len++;}
920 				assert(len==kmer.len);
921 
922 				if(verbose){outstream.println("D: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));}
923 			}
924 		}
925 
926 		if(len<kbig){return null;}
927 		else{assert(len==kbig);}
928 		return kmer;
929 	}
930 
doubleClaim(final ByteBuilder bb, final int id, Kmer kmer)931 	public boolean doubleClaim(final ByteBuilder bb, final int id, Kmer kmer){
932 		return doubleClaim(bb.array, bb.length(), id, kmer);
933 	}
934 
935 	/** Ensures there can be only one owner. */
doubleClaim(final byte[] bases, final int blength, final int id, Kmer kmer)936 	public boolean doubleClaim(final byte[] bases, final int blength, final int id, Kmer kmer){
937 		boolean success=claim(bases, blength, id, true, kmer);
938 		if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);}
939 		if(!success){return false;}
940 		success=claim(bases, blength, id+CLAIM_OFFSET, true, kmer);
941 		if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);}
942 		return success;
943 	}
944 
claim(final ByteBuilder bb, final int id, final boolean exitEarly, Kmer kmer)945 	public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly, Kmer kmer){
946 		return claim(bb.array, bb.length(), id, exitEarly, kmer);
947 	}
948 
calcCoverage(final byte[] bases, final int blen, final Kmer kmer)949 	public float calcCoverage(final byte[] bases, final int blen, final Kmer kmer){
950 		if(blen<kbig){return 0;}
951 		int len=0;
952 		kmer.clear();
953 		long sum=0;
954 		int kmers=0;
955 
956 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
957 		for(int i=0; i<blen; i++){
958 			final byte b=bases[i];
959 			final long x=AminoAcid.baseToNumber[b];
960 			kmer.addRight(b);
961 
962 			if(x<0){len=0;}
963 			else{len++;}
964 			assert(len==kmer.len);
965 
966 			if(len>=kbig){
967 				int count=getCount(kmer);
968 				sum+=count;
969 				kmers++;
970 			}
971 		}
972 		return sum==0 ? 0 : sum/(float)kmers;
973 	}
974 
calcCoverage(final Contig contig, final Kmer kmer)975 	public float calcCoverage(final Contig contig, final Kmer kmer){
976 		final byte[] bases=contig.bases;
977 		if(bases.length<kbig){return 0;}
978 		int len=0;
979 		kmer.clear();
980 		long sum=0;
981 		int max=0, min=Integer.MAX_VALUE;
982 		int kmers=0;
983 
984 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
985 		for(int i=0; i<bases.length; i++){
986 			final byte b=bases[i];
987 			final long x=AminoAcid.baseToNumber[b];
988 			kmer.addRight(b);
989 
990 			if(x<0){len=0;}
991 			else{len++;}
992 			assert(len==kmer.len);
993 
994 			if(len>=kbig){
995 				int count=getCount(kmer);
996 				sum+=count;
997 				max=Tools.max(count, max);
998 				min=Tools.min(count, min);
999 				kmers++;
1000 			}
1001 		}
1002 		contig.coverage=sum==0 ? 0 : sum/(float)kmers;
1003 		contig.maxCov=max;
1004 		contig.minCov=sum==0 ? 0 : min;
1005 		return contig.coverage;
1006 	}
1007 
claim(final byte[] bases, final int blen, final int id, boolean exitEarly, final Kmer kmer)1008 	public boolean claim(final byte[] bases, final int blen, final int id, boolean exitEarly, final Kmer kmer){
1009 		if(blen<kbig){return false;}
1010 		if(verbose){outstream.println("Thread "+id+" claim start.");}
1011 		int len=0;
1012 		kmer.clear();
1013 		boolean success=true;
1014 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1015 		for(int i=0; i<blen && success; i++){
1016 			final byte b=bases[i];
1017 			final long x=AminoAcid.baseToNumber[b];
1018 			kmer.addRight(b);
1019 
1020 			if(x<0){len=0;}
1021 			else{len++;}
1022 			assert(len==kmer.len);
1023 
1024 			if(len>=kbig){
1025 				success=claim(kmer, id/*, rid, i*/);
1026 				success=(success || !exitEarly);
1027 			}
1028 		}
1029 		return success;
1030 	}
1031 
claim(Kmer kmer, final int id )1032 	public boolean claim(Kmer kmer, final int id/*, final long rid, final int pos*/){
1033 		//TODO: rid and pos are just for debugging.
1034 		final int way=kmer.mod(ways);
1035 		final AbstractKmerTableU table=tables[way];
1036 		final int count=table.getValue(kmer);
1037 		assert(count==-1 || count>0) : count;
1038 //		if(verbose  /*|| true*/){outstream.println("Count="+count+".");}
1039 		if(count<0){return true;}
1040 		assert(count>0) : count;
1041 		final int owner=table.setOwner(kmer, id);
1042 		if(verbose){outstream.println("owner="+owner+".");}
1043 //		assert(owner==id) : id+", "+owner+", "+rid+", "+pos;
1044 		return owner==id;
1045 	}
1046 
release(ByteBuilder bb, final int id, final Kmer kmer)1047 	public void release(ByteBuilder bb, final int id, final Kmer kmer){
1048 		release(bb.array, bb.length(), id, kmer);
1049 	}
1050 
release(final byte[] bases, final int blen, final int id, final Kmer kmer)1051 	public void release(final byte[] bases, final int blen, final int id, final Kmer kmer){
1052 		if(verbose  /*|| true*/){outstream.println("*Thread "+id+" release start.");}
1053 		int len=0;
1054 		kmer.clear();
1055 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1056 		for(int i=0; i<blen; i++){
1057 			final byte b=bases[i];
1058 			final long x=AminoAcid.baseToNumber[b];
1059 			kmer.addRight(b);
1060 
1061 			if(x<0){len=0;}
1062 			else{len++;}
1063 			assert(len==kmer.len);
1064 
1065 			if(len>=kbig){
1066 				release(kmer, id);
1067 			}
1068 		}
1069 	}
1070 
release(Kmer kmer, final int id)1071 	public boolean release(Kmer kmer, final int id){
1072 		final int way=kmer.mod(ways);
1073 		final AbstractKmerTableU table=tables[way];
1074 		final int count=table.getValue(kmer);
1075 //		if(verbose  /*|| true*/){outstream.println("Count="+count+".");}
1076 		if(count<1){return true;}
1077 		return table.clearOwner(kmer, id);
1078 	}
1079 
findOwner(ByteBuilder bb, final int id, final Kmer kmer)1080 	public int findOwner(ByteBuilder bb, final int id, final Kmer kmer){
1081 		return findOwner(bb.array, bb.length(), id, kmer);
1082 	}
1083 
findOwner(final byte[] bases, final int blen, final int id, final Kmer kmer)1084 	public int findOwner(final byte[] bases, final int blen, final int id, final Kmer kmer){
1085 		int len=0;
1086 		kmer.clear();
1087 		int maxOwner=-1;
1088 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1089 		for(int i=0; i<blen; i++){
1090 			final byte b=bases[i];
1091 			final long x=AminoAcid.baseToNumber[b];
1092 			kmer.addRight(b);
1093 
1094 			if(x<0){len=0;}
1095 			else{len++;}
1096 			assert(len==kmer.len);
1097 			//if(verbose){System.err.println("E: Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-kbig2), Tools.min(i+1, kbig)));}
1098 			if(len>=kbig){
1099 				int owner=findOwner(kmer);
1100 				maxOwner=Tools.max(owner, maxOwner);
1101 				if(maxOwner>id){break;}
1102 			}
1103 		}
1104 		return maxOwner;
1105 	}
1106 
findOwner(final Kmer kmer)1107 	public int findOwner(final Kmer kmer){
1108 		final int way=kmer.mod(ways);
1109 		final AbstractKmerTableU table=tables[way];
1110 		final int count=table.getValue(kmer);
1111 		if(count<0){return -1;}
1112 		final int owner=table.getOwner(kmer);
1113 		return owner;
1114 	}
1115 
getCount(Kmer kmer)1116 	public int getCount(Kmer kmer){
1117 		int way=kmer.mod(ways);
1118 		return tables[way].getValue(kmer);
1119 	}
1120 
getOwner(Kmer kmer)1121 	public int getOwner(Kmer kmer){
1122 		int way=kmer.mod(ways);
1123 		return tables[way].getOwner(kmer);
1124 	}
1125 
1126 	/*--------------------------------------------------------------*/
1127 	/*----------------          Fill Counts         ----------------*/
1128 	/*--------------------------------------------------------------*/
1129 
fillRightCounts(Kmer kmer, int[] counts)1130 	public int fillRightCounts(Kmer kmer, int[] counts){
1131 		if(FAST_FILL && MASK_CORE && k>2){
1132 			return fillRightCounts_fast(kmer, counts);
1133 		}else{
1134 			return fillRightCounts_safe(kmer, counts);
1135 		}
1136 	}
1137 
fillLeftCounts(Kmer kmer, int[] counts)1138 	public int fillLeftCounts(Kmer kmer, int[] counts){
1139 		if(FAST_FILL && MASK_CORE && k>2){
1140 			return fillLeftCounts_fast(kmer, counts);
1141 		}else{
1142 			return fillLeftCounts_safe(kmer, counts);
1143 		}
1144 	}
1145 
fillRightCounts_fast(Kmer kmer, int[] counts)1146 	public int fillRightCounts_fast(Kmer kmer, int[] counts){
1147 		assert(MASK_CORE);
1148 		final long old=kmer.addRightNumeric(0);
1149 		if(kmer.corePalindrome()){
1150 			kmer.addLeftNumeric(old);
1151 			return fillRightCounts_safe(kmer, counts);
1152 		}
1153 		final int way=kmerToWay(kmer);
1154 		final HashArrayU table=(HashArrayU) tables[way];
1155 		final int cell=table.kmerToCell(kmer);
1156 		int max=-1, maxPos=0;
1157 
1158 		{//Iteration 0
1159 			final int count=Tools.max(0, table.getValue(kmer, cell));
1160 			assert(count==NOT_PRESENT || count>=0);
1161 			counts[0]=count;
1162 			max=count;
1163 			maxPos=0;
1164 			kmer.addLeftNumeric(old);
1165 		}
1166 
1167 		for(int i=1; i<=3; i++){
1168 			kmer.addRightNumeric(i);
1169 			if(verbose){outstream.println("kmer:               "+kmer);}
1170 			final int count=Tools.max(0, table.getValue(kmer, cell));
1171 			assert(count==NOT_PRESENT || count>=0);
1172 			counts[i]=count;
1173 			if(count>max){
1174 				max=count;
1175 				maxPos=i;
1176 			}
1177 			kmer.addLeftNumeric(old);
1178 		}
1179 		return maxPos;
1180 	}
1181 
fillRightCounts_safe(Kmer kmer, int[] counts)1182 	public int fillRightCounts_safe(Kmer kmer, int[] counts){
1183 		assert(kmer.len>=kbig);
1184 		if(verbose){outstream.println("fillRightCounts:   "+kmer);}
1185 		int max=-1, maxPos=0;
1186 
1187 //		final Kmer kmer2=new Kmer(kmer);//123 TODO: Slow, for an assertion only;
1188 
1189 		for(int i=0; i<=3; i++){
1190 			final long old=kmer.addRightNumeric(i);
1191 			if(verbose){outstream.println("kmer:               "+kmer);}
1192 			int way=kmer.mod(ways);
1193 			int count=tables[way].getValue(kmer);
1194 			assert(count==NOT_PRESENT || count>=0);
1195 			count=Tools.max(count, 0);
1196 			counts[i]=count;
1197 			if(count>max){
1198 				max=count;
1199 				maxPos=i;
1200 			}
1201 			kmer.addLeftNumeric(old);
1202 		}
1203 		return maxPos;
1204 	}
1205 
1206 //	public int fillLeftCounts_fast(Kmer kmer, int[] counts){
1207 //		assert(MASK_CORE);
1208 //		final long old=kmer.addLeftNumeric(0);
1209 //		if(kmer.corePalindrome()){
1210 //			kmer.addRightNumeric(old);
1211 //			return fillLeftCounts_safe(kmer, counts);
1212 //		}
1213 //		final int way=kmerToWay(kmer);
1214 //		final HashArrayU table=(HashArrayU) tables[way];
1215 //		final int cell=table.kmerToCell(kmer);
1216 //		int max=-1, maxPos=0;
1217 //
1218 //		for(int i=0; i<=3; i++){
1219 //			kmer.addLeftNumeric(i);
1220 //			if(verbose){outstream.println("kmer:               "+kmer);}
1221 //			final int count=Tools.max(0, table.getValue(kmer, cell));
1222 //			assert(count==NOT_PRESENT || count>=0);
1223 //			counts[i]=count;
1224 //			if(count>max){
1225 //				max=count;
1226 //				maxPos=i;
1227 //			}
1228 //			kmer.addRightNumeric(old);
1229 //		}
1230 //		return maxPos;
1231 //	}
1232 
fillLeftCounts_fast(Kmer kmer, int[] counts)1233 	public int fillLeftCounts_fast(Kmer kmer, int[] counts){
1234 		assert(MASK_CORE);
1235 		final long old=kmer.addLeftNumeric(0);
1236 		if(kmer.corePalindrome()){
1237 			kmer.addRightNumeric(old);
1238 			return fillLeftCounts_safe(kmer, counts);
1239 		}
1240 		final int way=kmerToWay(kmer);
1241 		final HashArrayU table=(HashArrayU) tables[way];
1242 		final int cell=table.kmerToCell(kmer);
1243 		int max=-1, maxPos=0;
1244 
1245 		{//Iteration 0
1246 			if(verbose){outstream.println("kmer:               "+kmer);}
1247 			final int count=Tools.max(0, table.getValue(kmer, cell));
1248 			assert(count==NOT_PRESENT || count>=0);
1249 			counts[0]=count;
1250 			max=count;
1251 			maxPos=0;
1252 			kmer.addRightNumeric(old);
1253 		}
1254 
1255 		for(int i=1; i<=3; i++){
1256 			kmer.addLeftNumeric(i);
1257 			if(verbose){outstream.println("kmer:               "+kmer);}
1258 			final int count=Tools.max(0, table.getValue(kmer, cell));
1259 			assert(count==NOT_PRESENT || count>=0);
1260 			counts[i]=count;
1261 			if(count>max){
1262 				max=count;
1263 				maxPos=i;
1264 			}
1265 			kmer.addRightNumeric(old);
1266 		}
1267 		return maxPos;
1268 	}
1269 
1270 
fillLeftCounts_safe(final Kmer kmer, int[] counts)1271 	public int fillLeftCounts_safe(final Kmer kmer, int[] counts){
1272 		assert(kmer.len>=kbig);
1273 		if(verbose){outstream.println("fillLeftCounts:    "+kmer);}
1274 		int max=-1, maxPos=0;
1275 
1276 //		final Kmer kmer2=new Kmer(kmer);//123 TODO: Slow, for an assertion only;
1277 //		assert(false) : kmer+", "+kmer2;
1278 
1279 		for(int i=0; i<=3; i++){
1280 			if(verbose){
1281 				outstream.println("kmer:               "+kmer+" (key==array1 ? "+(kmer.key()==kmer.array1()));
1282 //				outstream.println("kmer2:              "+kmer2);
1283 			}
1284 			final long old=kmer.addLeftNumeric(i);
1285 			if(verbose){
1286 				outstream.println("after:              "+kmer+" (key==array1 ? "+(kmer.key()==kmer.array1()));
1287 				outstream.println("i="+i+", old="+old);
1288 			}
1289 			int way=kmer.mod(ways);
1290 			int count=tables[way].getValue(kmer);
1291 			assert(count==NOT_PRESENT || count>=0);
1292 			count=Tools.max(count, 0);
1293 			counts[i]=count;
1294 			if(count>max){
1295 				max=count;
1296 				maxPos=i;
1297 			}
1298 			kmer.addRightNumeric(old);
1299 			if(verbose){outstream.println("restored:           "+kmer);}
1300 //			assert(kmer.equals(kmer2)) : kmer+", "+kmer2+", "+kmer.xor()+", "+kmer2.xor();
1301 		}
1302 		return maxPos;
1303 	}
1304 
1305 	/*--------------------------------------------------------------*/
1306 	/*----------------       Printing Methods       ----------------*/
1307 	/*--------------------------------------------------------------*/
1308 
1309 	@Override
dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining)1310 	public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
1311 		if(fname==null){return false;}
1312 		Timer t=new Timer();
1313 
1314 		ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
1315 		bsw.start();
1316 		for(AbstractKmerTableU set : tables){
1317 			set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining);
1318 		}
1319 		bsw.poisonAndWait();
1320 
1321 		t.stop();
1322 		if(printTime){outstream.println("Kmer Dump Time:             \t"+t);}
1323 		return bsw.errorState;
1324 	}
1325 
1326 	@Override
dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining)1327 	public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
1328 
1329 		final int threads=Tools.min(Shared.threads(), tables.length);
1330 		if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);}
1331 
1332 		if(fname==null){return false;}
1333 		Timer t=new Timer();
1334 
1335 		ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
1336 		bsw.start();
1337 		DumpThreadU.dump(k, mincount, maxcount, tables, bsw, remaining);
1338 		bsw.poisonAndWait();
1339 
1340 		t.stop();
1341 		if(printTime){outstream.println("Kmer Dump Time:             \t"+t);}
1342 		return bsw.errorState;
1343 	}
1344 
1345 	/*--------------------------------------------------------------*/
1346 	/*----------------        Recall Methods        ----------------*/
1347 	/*--------------------------------------------------------------*/
1348 
toText(long[] kmer)1349 	private final StringBuilder toText(long[] kmer){return AbstractKmerTableU.toText(kmer, k);}
1350 
1351 	/*--------------------------------------------------------------*/
1352 	/*----------------       Final Primitives       ----------------*/
1353 	/*--------------------------------------------------------------*/
1354 
1355 	@Override
kbig()1356 	public int kbig(){return kbig;}
1357 	@Override
filterMemory(int pass)1358 	public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;}
1359 	@Override
ecco()1360 	public boolean ecco(){return ecco;}
1361 	@Override
qtrimLeft()1362 	public boolean qtrimLeft(){return qtrimLeft;}
1363 	@Override
qtrimRight()1364 	public boolean qtrimRight(){return qtrimRight;}
1365 	@Override
minAvgQuality()1366 	public float minAvgQuality(){return minAvgQuality;}
1367 	@Override
tableMemory()1368 	public long tableMemory(){return tableMemory;}
1369 	@Override
estimatedKmerCapacity()1370 	public long estimatedKmerCapacity(){return estimatedKmerCapacity;}
1371 	@Override
ways()1372 	public int ways(){return ways;}
1373 	@Override
rcomp()1374 	public boolean rcomp(){return rcomp;}
1375 
kmerToWay(final Kmer kmer)1376 	public final int kmerToWay(final Kmer kmer){
1377 		final int way=(int)(kmer.xor()%ways);
1378 		return way;
1379 	}
1380 
1381 	/** Hold kmers.  A kmer X such that X%WAYS=Y will be stored in tables[Y] */
1382 	private AbstractKmerTableU[] tables;
tables()1383 	public AbstractKmerTableU[] tables(){return tables;}
1384 
1385 	public long filterMemoryOverride=0;
1386 
1387 	private final int bytesPerKmer;
1388 
1389 	private final long usableMemory;
1390 	private final long filterMemory0;
1391 	private final long filterMemory1;
1392 	private final long tableMemory;
1393 	private final long estimatedKmerCapacity;
1394 
1395 	/** Number of tables (and threads, during loading) */
1396 	private final boolean prealloc;
1397 
1398 	/** Number of tables (and threads, during loading) */
1399 	public final int ways;
1400 
1401 	/** Total kmer length */
1402 	public final int kbig;
1403 	/** Normal kmer length */
1404 	public final int k;
1405 	/** kbig-1; used in some expressions */
1406 	public final int kbig2;
1407 	/** Number of little kmers in a big kmer */
1408 	public final int mult;
1409 
1410 	/** Look for reverse-complements as well as forward kmers.  Default: true */
1411 	private final boolean rcomp;
1412 
1413 	/** Quality-trim the left side */
1414 	public final boolean qtrimLeft;
1415 	/** Quality-trim the right side */
1416 	public final boolean qtrimRight;
1417 	/** Trim bases at this quality or below.  Default: 4 */
1418 	public final float trimq;
1419 	/** Error rate for trimming (derived from trimq) */
1420 	private final float trimE;
1421 
1422 	/** Throw away reads below this average quality after trimming.  Default: 0 */
1423 	public final float minAvgQuality;
1424 	/** If positive, calculate average quality from the first X bases only.  Default: 0 */
1425 	public final int minAvgQualityBases;
1426 
1427 	/** Ignore kmers with probability of correctness less than this */
1428 	public final float minProb;
1429 
1430 	/** Correct via overlap */
1431 	private final boolean ecco;
1432 
1433 	/** Attempt to merge via overlap prior to counting kmers */
1434 	private final boolean merge;
1435 
1436 	/*--------------------------------------------------------------*/
1437 	/*----------------            Walker            ----------------*/
1438 	/*--------------------------------------------------------------*/
1439 
walk()1440 	public WalkerU walk(){
1441 		return new WalkerKSTU();
1442 	}
1443 
1444 	public class WalkerKSTU extends WalkerU {
1445 
WalkerKSTU()1446 		WalkerKSTU(){
1447 			w=tables[0].walk();
1448 		}
1449 
1450 		/**
1451 		 * Fills this object with the next key and value.
1452 		 * @return True if successful.
1453 		 */
next()1454 		public boolean next(){
1455 			if(w==null){return false;}
1456 			if(w.next()){return true;}
1457 			if(tnum<tables.length){tnum++;}
1458 			w=(tnum<tables.length ? tables[tnum].walk() : null);
1459 			return w==null ? false : w.next();
1460 		}
1461 
kmer()1462 		public Kmer kmer(){return w.kmer();}
value()1463 		public int value(){return w.value();}
1464 
1465 		private WalkerU w=null;
1466 
1467 		/** current table number */
1468 		private int tnum=0;
1469 	}
1470 
1471 }
1472