1 package kmer;
2 
3 import java.io.PrintStream;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.concurrent.ArrayBlockingQueue;
7 
8 import dna.AminoAcid;
9 import fileIO.FileFormat;
10 import fileIO.ReadWrite;
11 import fileIO.TextStreamWriter;
12 import jgi.BBMerge;
13 import shared.PreParser;
14 import shared.Shared;
15 import shared.Timer;
16 import shared.Tools;
17 import stream.ConcurrentReadInputStream;
18 import stream.Read;
19 import structures.IntList;
20 import structures.ListNum;
21 
22 /**
23  * @author Brian Bushnell
24  * @date Mar 4, 2015
25  *
26  */
27 public class TableLoaderLockFree {
28 
29 	/*--------------------------------------------------------------*/
30 	/*----------------        Initialization        ----------------*/
31 	/*--------------------------------------------------------------*/
32 
33 	/**
34 	 * Code entrance from the command line.
35 	 * @param args Command line arguments
36 	 */
main(String[] args)37 	public static void main(String[] args){
38 
39 		{//Preparse block for help, config files, and outstream
40 			PreParser pp=new PreParser(args, null, false);
41 			args=pp.args;
42 			outstream=pp.outstream;
43 		}
44 
45 		Timer t=new Timer();
46 
47 		AbstractKmerTable[] tables=makeTables(AbstractKmerTable.ARRAY1D, 12, -1L, false, 1.0);
48 
49 		int k=31;
50 		int mink=0;
51 		int speed=0;
52 		int hdist=0;
53 		int edist=0;
54 		boolean rcomp=true;
55 		boolean maskMiddle=false;
56 
57 		//Create a new Loader instance
58 		TableLoaderLockFree loader=new TableLoaderLockFree(tables, k, mink, speed, hdist, edist, rcomp, maskMiddle);
59 		loader.setRefSkip(0);
60 		loader.hammingDistance2=0;
61 		loader.editDistance2=0;
62 		loader.storeMode(SET_IF_NOT_PRESENT);
63 
64 		///And run it
65 		String[] refs=args;
66 		String[] literals=null;
67 		boolean keepNames=false;
68 		boolean useRefNames=false;
69 		long kmers=loader.processData(refs, literals, keepNames, useRefNames, false);
70 		t.stop();
71 
72 		outstream.println("Time:     \t"+t);
73 		outstream.println("Return:   \t"+kmers);
74 		outstream.println("refKmers: \t"+loader.refKmers);
75 		outstream.println("refBases: \t"+loader.refBases);
76 		outstream.println("refReads: \t"+loader.refReads);
77 
78 		//Close the print stream if it was redirected
79 		Shared.closeStream(outstream);
80 	}
81 
TableLoaderLockFree(AbstractKmerTable[] tables_, int k_)82 	public TableLoaderLockFree(AbstractKmerTable[] tables_, int k_){
83 		this(tables_, k_, 0, 0, 0, 0, true, false);
84 	}
85 
TableLoaderLockFree(AbstractKmerTable[] tables_, int k_, int mink_, int speed_, int hdist_, int edist_, boolean rcomp_, boolean maskMiddle_)86 	public TableLoaderLockFree(AbstractKmerTable[] tables_, int k_, int mink_, int speed_, int hdist_, int edist_, boolean rcomp_, boolean maskMiddle_){
87 		tables=tables_;
88 		k=k_;
89 		k2=k-1;
90 		mink=mink_;
91 		rcomp=rcomp_;
92 		useShortKmers=(mink>0 && mink<k);
93 		speed=speed_;
94 		hammingDistance=hdist_;
95 		editDistance=edist_;
96 		middleMask=maskMiddle ? ~(3L<<(2*(k/2))) : -1L;
97 	}
98 
99 
100 	/*--------------------------------------------------------------*/
101 	/*----------------         Outer Methods        ----------------*/
102 	/*--------------------------------------------------------------*/
103 
104 
105 //	public static AbstractKmerTable[] makeTables(int tableType, int initialSize, long coreMask, boolean growable){
106 //		return AbstractKmerTable.preallocate(WAYS, tableType, initialSize, coreMask, growable);
107 //	}
108 
109 //	public static AbstractKmerTable[] makeTables(int tableType, int[] schedule, long coreMask){
110 //		return AbstractKmerTable.preallocate(WAYS, tableType, schedule, coreMask);
111 //	}
112 
makeTables(int tableType, int bytesPerKmer, long coreMask, boolean prealloc, double memRatio)113 	public static AbstractKmerTable[] makeTables(int tableType, int bytesPerKmer, long coreMask,
114 			boolean prealloc, double memRatio){
115 		ScheduleMaker scheduleMaker=new ScheduleMaker(WAYS, bytesPerKmer, prealloc, memRatio);
116 		int[] schedule=scheduleMaker.makeSchedule();
117 		return AbstractKmerTable.preallocate(WAYS, tableType, schedule, coreMask);
118 	}
119 
120 	ScheduleMaker scheduleMaker=new ScheduleMaker(WAYS, 12, false, 0.8);
121 	int[] schedule=scheduleMaker.makeSchedule();
122 
processData(String[] ref, String[] literal, boolean keepNames, boolean useRefNames, boolean ecc_)123 	public long processData(String[] ref, String[] literal, boolean keepNames, boolean useRefNames, boolean ecc_){
124 
125 		scaffoldNames=null;
126 		refNames=null;
127 		refScafCounts=null;
128 		scaffoldLengths=null;
129 		ecc=ecc_;
130 
131 		if(keepNames){
132 			scaffoldNames=new ArrayList<String>();
133 			refNames=new ArrayList<String>();
134 			scaffoldLengths=new IntList();
135 
136 			if(ref!=null){
137 				for(String s : ref){refNames.add(s);}
138 			}
139 			if(literal!=null){refNames.add("literal");}
140 			refScafCounts=new int[refNames.size()];
141 
142 			if(useRefNames){toRefNames();}
143 		}
144 
145 		return spawnLoadThreads(ref, literal);
146 	}
147 
setRefSkip(int x)148 	public void setRefSkip(int x){setRefSkip(x, x);}
149 
setRefSkip(int min, int max)150 	public void setRefSkip(int min, int max){
151 		max=Tools.max(min, max);
152 		if(min==max){
153 			minRefSkip=maxRefSkip=min;
154 		}else{
155 			minRefSkip=min;
156 			maxRefSkip=max;
157 		}
158 		variableRefSkip=(minRefSkip!=maxRefSkip);
159 	}
160 
storeMode(final int x)161 	public void storeMode(final int x){
162 		assert(x==SET_IF_NOT_PRESENT || x==SET_ALWAYS || x==INCREMENT);
163 		storeMode=x;
164 	}
165 
166 	/*--------------------------------------------------------------*/
167 	/*----------------         Inner Methods        ----------------*/
168 	/*--------------------------------------------------------------*/
169 
170 
171 	/**
172 	 * Fills tables with kmers from references, using multiple LoadThread.
173 	 * @return Number of kmers stored.
174 	 */
spawnLoadThreads(String[] ref, String[] literal)175 	private long spawnLoadThreads(String[] ref, String[] literal){
176 		Timer t=new Timer();
177 		if((ref==null || ref.length<1) && (literal==null || literal.length<1)){return 0;}
178 		long added=0;
179 
180 		/* Create load threads */
181 		LoadThread[] loaders=new LoadThread[WAYS];
182 		for(int i=0; i<loaders.length; i++){
183 			loaders[i]=new LoadThread(i);
184 			loaders[i].start();
185 		}
186 
187 		/* For each reference file... */
188 		int refNum=0;
189 		if(ref!=null){
190 			for(String refname : ref){
191 
192 				/* Start an input stream */
193 				FileFormat ff=FileFormat.testInput(refname, FileFormat.FASTA, null, false, true);
194 				ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(-1L, false, ff, null, null, null, Shared.USE_MPI, true);
195 				cris.start(); //4567
196 				ListNum<Read> ln=cris.nextList();
197 				ArrayList<Read> reads=(ln!=null ? ln.list : null);
198 
199 				/* Iterate through read lists from the input stream */
200 				while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
201 					{
202 						/* Assign a unique ID number to each scaffold */
203 						ArrayList<Read> reads2=new ArrayList<Read>(reads);
204 						if(scaffoldNames!=null){
205 							for(Read r1 : reads2){
206 								final Read r2=r1.mate;
207 								final Integer id=scaffoldNames.size();
208 								if(ecc && r1!=null && r2!=null){BBMerge.findOverlapStrict(r1, r2, true);}
209 								refScafCounts[refNum]++;
210 								scaffoldNames.add(r1.id==null ? id.toString() : r1.id);
211 								int len=r1.length();
212 								r1.obj=id;
213 								if(r2!=null){
214 									r2.obj=id;
215 									len+=r2.length();
216 								}
217 								scaffoldLengths.add(len);
218 							}
219 						}
220 
221 						if(REPLICATE_AMBIGUOUS){
222 							reads2=Tools.replicateAmbiguous(reads2, Tools.min(k, mink));
223 						}
224 
225 						/* Send a pointer to the read list to each LoadThread */
226 						for(LoadThread lt : loaders){
227 							boolean b=true;
228 							while(b){
229 								try {
230 									lt.queue.put(reads2);
231 									b=false;
232 								} catch (InterruptedException e) {
233 									//TODO:  This will hang due to still-running threads.
234 									throw new RuntimeException(e);
235 								}
236 							}
237 						}
238 					}
239 
240 					/* Dispose of the old list and fetch a new one */
241 					cris.returnList(ln);
242 					ln=cris.nextList();
243 					reads=(ln!=null ? ln.list : null);
244 				}
245 				/* Cleanup */
246 				cris.returnList(ln);
247 				errorState|=ReadWrite.closeStream(cris);
248 				refNum++;
249 			}
250 		}
251 
252 		/* If there are literal sequences to use as references */
253 		if(literal!=null){
254 			ArrayList<Read> list=new ArrayList<Read>(literal.length);
255 			if(verbose){outstream.println("Adding literals "+Arrays.toString(literal));}
256 
257 			/* Assign a unique ID number to each literal sequence */
258 			for(int i=0; i<literal.length; i++){
259 				if(scaffoldNames!=null){
260 					final Integer id=scaffoldNames.size();
261 					final Read r=new Read(literal[i].getBytes(), null, id);
262 					refScafCounts[refNum]++;
263 					scaffoldNames.add(id.toString());
264 					scaffoldLengths.add(r.length());
265 					r.obj=id;
266 				}else{
267 					final Read r=new Read(literal[i].getBytes(), null, i);
268 					list.add(r);
269 				}
270 			}
271 
272 			if(REPLICATE_AMBIGUOUS){
273 				list=Tools.replicateAmbiguous(list, Tools.min(k, mink));
274 			}
275 
276 			/* Send a pointer to the read list to each LoadThread */
277 			for(LoadThread lt : loaders){
278 				boolean b=true;
279 				while(b){
280 					try {
281 						lt.queue.put(list);
282 						b=false;
283 					} catch (InterruptedException e) {
284 						//TODO:  This will hang due to still-running threads.
285 						throw new RuntimeException(e);
286 					}
287 				}
288 			}
289 		}
290 
291 		/* Signal loaders to terminate */
292 		for(LoadThread lt : loaders){
293 			boolean b=true;
294 			while(b){
295 				try {
296 					lt.queue.put(POISON);
297 					b=false;
298 				} catch (InterruptedException e) {
299 					//TODO:  This will hang due to still-running threads.
300 					throw new RuntimeException(e);
301 				}
302 			}
303 		}
304 
305 		/* Wait for loaders to die, and gather statistics */
306 		for(LoadThread lt : loaders){
307 			while(lt.getState()!=Thread.State.TERMINATED){
308 				try {
309 					lt.join();
310 				} catch (InterruptedException e) {
311 					// TODO Auto-generated catch block
312 					e.printStackTrace();
313 				}
314 			}
315 			added+=lt.addedT;
316 			refKmers+=lt.refKmersT;
317 			refBases+=lt.refBasesT;
318 			refReads+=lt.refReadsT;
319 		}
320 		//Correct statistics for number of threads, since each thread processes all reference data
321 		refKmers/=WAYS;
322 		refBases/=WAYS;
323 		refReads/=WAYS;
324 
325 		t.stop();
326 		if(DISPLAY_PROGRESS){
327 			outstream.println("Added "+added+" kmers; time: \t"+t);
328 			Shared.printMemory();
329 			outstream.println();
330 		}
331 
332 		if(verbose){
333 			TextStreamWriter tsw=new TextStreamWriter("stdout", false, false, false, FileFormat.TEXT);
334 			tsw.start();
335 			for(AbstractKmerTable table : tables){
336 				table.dumpKmersAsText(tsw, k, 1, Integer.MAX_VALUE);
337 			}
338 			tsw.poisonAndWait();
339 		}
340 
341 		return added;
342 	}
343 
344 
345 
346 	/**
347 	 * Fills the scaffold names array with reference names.
348 	 */
toRefNames()349 	public void toRefNames(){
350 		final int numRefs=refNames.size();
351 		for(int r=0, s=1; r<numRefs; r++){
352 			final int scafs=refScafCounts[r];
353 			final int lim=s+scafs;
354 			final String name=ReadWrite.stripToCore(refNames.get(r));
355 //			outstream.println("r="+r+", s="+s+", scafs="+scafs+", lim="+lim+", name="+name);
356 			while(s<lim){
357 //				outstream.println(r+", "+s+". Setting "+scaffoldNames.get(s)+" -> "+name);
358 				scaffoldNames.set(s, name);
359 				s++;
360 			}
361 		}
362 	}
363 
364 	/*--------------------------------------------------------------*/
365 	/*----------------         Inner Classes        ----------------*/
366 	/*--------------------------------------------------------------*/
367 
368 	/**
369 	 * Loads kmers into a table.  Each thread handles all kmers X such that X%WAYS==tnum.
370 	 */
371 	private class LoadThread extends Thread{
372 
LoadThread(final int tnum_)373 		public LoadThread(final int tnum_){
374 			tnum=tnum_;
375 			map=tables[tnum];
376 		}
377 
378 		/**
379 		 * Get the next list of reads (or scaffolds) from the queue.
380 		 * @return List of reads
381 		 */
fetch()382 		private ArrayList<Read> fetch(){
383 			ArrayList<Read> list=null;
384 			while(list==null){
385 				try {
386 					list=queue.take();
387 				} catch (InterruptedException e) {
388 					// TODO Auto-generated catch block
389 					e.printStackTrace();
390 				}
391 			}
392 			return list;
393 		}
394 
395 		@Override
run()396 		public void run(){
397 			ArrayList<Read> reads=fetch();
398 			while(reads!=POISON){
399 				for(Read r1 : reads){
400 					assert(r1.pairnum()==0);
401 					final Read r2=r1.mate;
402 
403 					addedT+=addToMap(r1, minRefSkip);
404 					if(r2!=null){addedT+=addToMap(r2, minRefSkip);}
405 				}
406 				reads=fetch();
407 			}
408 
409 			if(map.canRebalance() && map.size()>2L*map.arrayLength()){
410 				map.rebalance();
411 			}
412 		}
413 
414 		/**
415 		 * @param r The current read to process
416 		 * @param skip Number of bases to skip between kmers
417 		 * @return Number of kmers stored
418 		 */
addToMap(Read r, int skip)419 		private long addToMap(Read r, int skip){
420 			if(variableRefSkip){
421 				int rblen=r.length();
422 				skip=(rblen>20000000 ? k : rblen>5000000 ? 11 : rblen>500000 ? 2 : 0);
423 				skip=Tools.mid(minRefSkip, maxRefSkip, skip);
424 			}
425 			final byte[] bases=r.bases;
426 			final int shift=2*k;
427 			final int shift2=shift-2;
428 			final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
429 			final long kmask=kMasks[k];
430 			long kmer=0;
431 			long rkmer=0;
432 			long added=0;
433 			int len=0;
434 
435 			if(bases!=null){
436 				refReadsT++;
437 				refBasesT+=bases.length;
438 			}
439 			if(bases==null || bases.length<k){return 0;}
440 
441 			final int id=(r.obj==null ? 1 : ((Integer)r.obj).intValue());
442 
443 			if(skip>1){ //Process while skipping some kmers
444 				for(int i=0; i<bases.length; i++){
445 					byte b=bases[i];
446 					long x=AminoAcid.baseToNumber[b];
447 					long x2=AminoAcid.baseToComplementNumber[b];
448 					kmer=((kmer<<2)|x)&mask;
449 					rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
450 					if(x<0){len=0; rkmer=0;}else{len++;}
451 					if(verbose){outstream.println("Scanning1 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
452 					if(len>=k){
453 						refKmersT++;
454 						if(len%skip==0){
455 							final long extraBase=(i>=bases.length-1 ? -1 : AminoAcid.baseToNumber[bases[i+1]]);
456 							added+=addToMap(kmer, rkmer, k, extraBase, id, kmask, hammingDistance, editDistance);
457 							if(useShortKmers){
458 								if(i==k2){added+=addToMapRightShift(kmer, rkmer, id);}
459 								if(i==bases.length-1){added+=addToMapLeftShift(kmer, rkmer, extraBase, id);}
460 							}
461 						}
462 					}
463 				}
464 			}else{ //Process all kmers
465 				for(int i=0; i<bases.length; i++){
466 					byte b=bases[i];
467 					long x=AminoAcid.baseToNumber[b];
468 					long x2=AminoAcid.baseToComplementNumber[b];
469 					kmer=((kmer<<2)|x)&mask;
470 					rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
471 					if(x<0){len=0; rkmer=0;}else{len++;}
472 					if(verbose){outstream.println("Scanning2 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
473 					if(len>=k){
474 						refKmersT++;
475 						final long extraBase=(i>=bases.length-1 ? -1 : AminoAcid.baseToNumber[bases[i+1]]);
476 						final long atm=addToMap(kmer, rkmer, k, extraBase, id, kmask, hammingDistance, editDistance);
477 						added+=atm;
478 //						assert(false) : atm+", "+map.contains(toValue(kmer, rkmer, kmask));
479 						if(useShortKmers){
480 							if(i==k2){added+=addToMapRightShift(kmer, rkmer, id);}
481 							if(i==bases.length-1){added+=addToMapLeftShift(kmer, rkmer, extraBase, id);}
482 						}
483 					}
484 				}
485 			}
486 			return added;
487 		}
488 
489 
490 		/**
491 		 * Adds short kmers on the left end of the read.
492 		 * @param kmer Forward kmer
493 		 * @param rkmer Reverse kmer
494 		 * @param extraBase Base added to end in case of deletions
495 		 * @param id Scaffold number
496 		 * @return Number of kmers stored
497 		 */
addToMapLeftShift(long kmer, long rkmer, final long extraBase, final int id)498 		private long addToMapLeftShift(long kmer, long rkmer, final long extraBase, final int id){
499 			if(verbose){outstream.println("addToMapLeftShift");}
500 			long added=0;
501 			for(int i=k-1; i>=mink; i--){
502 				kmer=kmer&rightMasks[i];
503 				rkmer=rkmer>>>2;
504 				long x=addToMap(kmer, rkmer, i, extraBase, id, kMasks[i], hammingDistance2, editDistance2);
505 				added+=x;
506 				if(verbose){
507 					if((toValue(kmer, rkmer, kMasks[i]))%WAYS==tnum){
508 						outstream.println("added="+x+"; i="+i+"; tnum="+tnum+"; Added left-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i)+"; value="+(toValue(kmer, rkmer, kMasks[i]))+"; kmer="+kmer+"; rkmer="+rkmer+"; kmask="+kMasks[i]+"; rightMasks[i+1]="+rightMasks[i+1]);
509 						outstream.println("i="+i+"; tnum="+tnum+"; Looking for left-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i));
510 						final long value=toValue(kmer, rkmer, kMasks[i]);
511 						if(map.contains(value)){outstream.println("Found "+value);}
512 					}
513 				}
514 			}
515 			return added;
516 		}
517 
518 
519 		/**
520 		 * Adds short kmers on the right end of the read.
521 		 * @param kmer Forward kmer
522 		 * @param rkmer Reverse kmer
523 		 * @param id Scaffold number
524 		 * @return Number of kmers stored
525 		 */
addToMapRightShift(long kmer, long rkmer, final int id)526 		private long addToMapRightShift(long kmer, long rkmer, final int id){
527 			if(verbose){outstream.println("addToMapRightShift");}
528 			long added=0;
529 			for(int i=k-1; i>=mink; i--){
530 				long extraBase=kmer&3L;
531 				kmer=kmer>>>2;
532 				rkmer=rkmer&rightMasks[i];
533 //				assert(Long.numberOfLeadingZeros(kmer)>=2*(32-i)) : Long.numberOfLeadingZeros(kmer)+", "+i+", "+kmer+", "+kMasks[i];
534 //				assert(Long.numberOfLeadingZeros(rkmer)>=2*(32-i)) : Long.numberOfLeadingZeros(rkmer)+", "+i+", "+rkmer+", "+kMasks[i];
535 				long x=addToMap(kmer, rkmer, i, extraBase, id, kMasks[i], hammingDistance2, editDistance2);
536 				added+=x;
537 				if(verbose){
538 					if((toValue(kmer, rkmer, kMasks[i]))%WAYS==tnum){
539 						outstream.println("added="+x+"; i="+i+"; tnum="+tnum+"; Added right-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i)+"; value="+(toValue(kmer, rkmer, kMasks[i]))+"; kmer="+kmer+"; rkmer="+rkmer+"; kmask="+kMasks[i]+"; rightMasks[i+1]="+rightMasks[i+1]);
540 						outstream.println("i="+i+"; tnum="+tnum+"; Looking for right-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i));
541 						final long value=toValue(kmer, rkmer, kMasks[i]);
542 						if(map.contains(value)){outstream.println("Found "+value);}
543 					}
544 				}
545 			}
546 			return added;
547 		}
548 
549 
550 		/**
551 		 * Adds this kmer to the table, including any mutations implied by editDistance or hammingDistance.
552 		 * @param kmer Forward kmer
553 		 * @param rkmer Reverse kmer
554 		 * @param len Kmer length
555 		 * @param extraBase Base added to end in case of deletions
556 		 * @param id Scaffold number
557 		 * @param kmask0
558 		 * @return Number of kmers stored
559 		 */
addToMap(final long kmer, final long rkmer, final int len, final long extraBase, final int id, final long kmask0, final int hdist, final int edist)560 		private long addToMap(final long kmer, final long rkmer, final int len, final long extraBase, final int id, final long kmask0, final int hdist, final int edist){
561 
562 			assert(kmask0==kMasks[len]) : kmask0+", "+len+", "+kMasks[len]+", "+Long.numberOfTrailingZeros(kmask0)+", "+Long.numberOfTrailingZeros(kMasks[len]);
563 
564 			if(verbose){outstream.println("addToMap_A; len="+len+"; kMasks[len]="+kMasks[len]);}
565 			assert((kmer&kmask0)==0);
566 			final long added;
567 			if(hdist==0){
568 				final long key=toValue(kmer, rkmer, kmask0);
569 				if(failsSpeed(key)){return 0;}
570 				if(key%WAYS!=tnum){return 0;}
571 				if(verbose){outstream.println("addToMap_B: "+AminoAcid.kmerToString(kmer&~kMasks[len], len)+" = "+key);}
572 				if(storeMode==SET_IF_NOT_PRESENT){
573 					added=map.setIfNotPresent(key, id);
574 				}else if(storeMode==SET_ALWAYS){
575 					added=map.set(key, id);
576 				}else{
577 					assert(storeMode==INCREMENT);
578 					added=map.increment(key, 1);
579 				}
580 			}else if(edist>0){
581 //				long extraBase=(i>=bases.length-1 ? -1 : AminoAcid.baseToNumber[bases[i+1]]);
582 				added=mutate(kmer, rkmer, len, id, edist, extraBase);
583 			}else{
584 				added=mutate(kmer, rkmer, len, id, hdist, -1);
585 			}
586 			if(verbose){outstream.println("addToMap added "+added+" keys.");}
587 			return added;
588 		}
589 
590 		/**
591 		 * Mutate and store this kmer through 'dist' recursions.
592 		 * @param kmer Forward kmer
593 		 * @param rkmer Reverse kmer
594 		 * @param id Scaffold number
595 		 * @param dist Number of mutations
596 		 * @param extraBase Base added to end in case of deletions
597 		 * @return Number of kmers stored
598 		 */
mutate(final long kmer, final long rkmer, final int len, final int id, final int dist, final long extraBase)599 		private long mutate(final long kmer, final long rkmer, final int len, final int id, final int dist, final long extraBase){
600 			long added=0;
601 
602 			final long key=toValue(kmer, rkmer, kMasks[len]);
603 
604 			if(verbose){outstream.println("mutate_A; len="+len+"; kmer="+kmer+"; rkmer="+rkmer+"; kMasks[len]="+kMasks[len]);}
605 			if(key%WAYS==tnum){
606 				if(verbose){outstream.println("mutate_B: "+AminoAcid.kmerToString(kmer&~kMasks[len], len)+" = "+key);}
607 				int x;
608 				if(storeMode==SET_IF_NOT_PRESENT){
609 					x=map.setIfNotPresent(key, id);
610 				}else if(storeMode==SET_ALWAYS){
611 					x=map.set(key, id);
612 				}else{
613 					assert(storeMode==INCREMENT);
614 					x=map.increment(key, 1);
615 					x=(x==1 ? 1 : 0);
616 				}
617 				if(verbose){outstream.println("mutate_B added "+x+" keys.");}
618 				added+=x;
619 				assert(map.contains(key));
620 			}
621 
622 			if(dist>0){
623 				final int dist2=dist-1;
624 
625 				//Sub
626 				for(int j=0; j<4; j++){
627 					for(int i=0; i<len; i++){
628 						final long temp=(kmer&clearMasks[i])|setMasks[j][i];
629 						if(temp!=kmer){
630 							long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
631 							added+=mutate(temp, rtemp, len, id, dist2, extraBase);
632 						}
633 					}
634 				}
635 
636 				if(editDistance>0){
637 					//Del
638 					if(extraBase>=0 && extraBase<=3){
639 						for(int i=1; i<len; i++){
640 							final long temp=(kmer&leftMasks[i])|((kmer<<2)&rightMasks[i])|extraBase;
641 							if(temp!=kmer){
642 								long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
643 								added+=mutate(temp, rtemp, len, id, dist2, -1);
644 							}
645 						}
646 					}
647 
648 					//Ins
649 					final long eb2=kmer&3;
650 					for(int i=1; i<len; i++){
651 						final long temp0=(kmer&leftMasks[i])|((kmer&rightMasks[i])>>2);
652 						for(int j=0; j<4; j++){
653 							final long temp=temp0|setMasks[j][i-1];
654 							if(temp!=kmer){
655 								long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
656 								added+=mutate(temp, rtemp, len, id, dist2, eb2);
657 							}
658 						}
659 					}
660 				}
661 
662 			}
663 
664 			return added;
665 		}
666 
667 		/*--------------------------------------------------------------*/
668 
669 		/** Number of kmers stored by this thread */
670 		public long addedT=0;
671 		/** Number of items encountered by this thread */
672 		public long refKmersT=0, refReadsT=0, refBasesT=0;
673 		/** Thread number; used to determine which kmers to store */
674 		public final int tnum;
675 		/** Buffer of input read lists */
676 		public final ArrayBlockingQueue<ArrayList<Read>> queue=new ArrayBlockingQueue<ArrayList<Read>>(32);
677 
678 		/** Destination for storing kmers */
679 		private final AbstractKmerTable map;
680 
681 	}
682 
683 	/*--------------------------------------------------------------*/
684 	/*----------------        Static Methods        ----------------*/
685 	/*--------------------------------------------------------------*/
686 
687 	/**
688 	 * Transforms a kmer into a canonical value stored in the table.  Expected to be inlined.
689 	 * @param kmer Forward kmer
690 	 * @param rkmer Reverse kmer
691 	 * @param lengthMask Bitmask with single '1' set to left of kmer
692 	 * @return Canonical value
693 	 */
toValue(long kmer, long rkmer, long lengthMask)694 	private final long toValue(long kmer, long rkmer, long lengthMask){
695 		assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer;
696 		long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
697 		return (value&middleMask)|lengthMask;
698 	}
699 
passesSpeed(long key)700 	final boolean passesSpeed(long key){
701 		return speed<1 || ((key&Long.MAX_VALUE)%17)>=speed;
702 	}
703 
failsSpeed(long key)704 	final boolean failsSpeed(long key){
705 		return speed>0 && ((key&Long.MAX_VALUE)%17)<speed;
706 	}
707 
708 	/*--------------------------------------------------------------*/
709 	/*----------------            Fields            ----------------*/
710 	/*--------------------------------------------------------------*/
711 
712 	/** Has this class encountered errors while processing? */
713 	public boolean errorState=false;
714 
715 	/** How to associate values with kmers */
716 	private int storeMode=SET_IF_NOT_PRESENT;
717 
718 	/** Hold kmers.  A kmer X such that X%WAYS=Y will be stored in keySets[Y] */
719 	public AbstractKmerTable[] tables;
720 	/** A scaffold's name is stored at scaffoldNames.get(id).
721 	 * scaffoldNames[0] is reserved, so the first id is 1. */
722 	public ArrayList<String> scaffoldNames;
723 	/** Names of reference files (refNames[0] is valid). */
724 	public ArrayList<String> refNames;
725 	/** Number of scaffolds per reference. */
726 	public int[] refScafCounts;
727 	/** scaffoldLengths[id] stores the length of that scaffold */
728 	public IntList scaffoldLengths=new IntList();
729 
730 	/** Make the middle base in a kmer a wildcard to improve sensitivity */
731 	public final boolean maskMiddle=false;
732 	/** Correct errors via read overlap */
733 	public boolean ecc=false;
734 
735 	/** Store reference kmers with up to this many substitutions */
736 	public final int hammingDistance;
737 	/** Store reference kmers with up to this many edits (including indels) */
738 	public final int editDistance;
739 	/** Store short reference kmers with up to this many substitutions */
740 	public int hammingDistance2=-1;
741 	/** Store short reference kmers with up to this many edits (including indels) */
742 	public int editDistance2=-1;
743 	/** Always skip at least this many consecutive kmers when hashing reference.
744 	 * 1 means every kmer is used, 2 means every other, etc. */
745 	private int minRefSkip=0;
746 	/** Never skip more than this many consecutive kmers when hashing reference. */
747 	private int maxRefSkip=0;
748 
749 	private boolean variableRefSkip=false;
750 
751 	/*--------------------------------------------------------------*/
752 	/*----------------          Statistics          ----------------*/
753 	/*--------------------------------------------------------------*/
754 
755 	long refReads=0;
756 	long refBases=0;
757 	long refKmers=0;
758 
759 	long storedKmers=0;
760 
761 	/*--------------------------------------------------------------*/
762 	/*----------------       Final Primitives       ----------------*/
763 	/*--------------------------------------------------------------*/
764 
765 	/** Look for reverse-complements as well as forward kmers.  Default: true */
766 	private final boolean rcomp;
767 	/** AND bitmask with 0's at the middle base */
768 	private final long middleMask;
769 
770 	/** Normal kmer length */
771 	private final int k;
772 	/** k-1; used in some expressions */
773 	private final int k2;
774 	/** Shortest kmer to use for trimming */
775 	private final int mink;
776 	/** Attempt to match kmers shorter than normal k on read ends when doing kTrimming. */
777 	private final boolean useShortKmers;
778 
779 	/** Fraction of kmers to skip, 0 to 16 out of 17 */
780 	private final int speed;
781 
782 	/*--------------------------------------------------------------*/
783 	/*----------------         Static Fields        ----------------*/
784 	/*--------------------------------------------------------------*/
785 
786 	/** Default initial size of data structures */
787 	private static final int initialSizeDefault=128000;
788 
789 	/** Number of tables (and threads, during loading) */
790 	private static final int WAYS=7; //123
791 	/** Verbose messages */
792 	public static final boolean verbose=false; //123
793 
794 	/** Print messages to this stream */
795 	private static PrintStream outstream=System.err;
796 	/** Display progress messages such as memory usage */
797 	public static boolean DISPLAY_PROGRESS=true;
798 	/** Indicates end of input stream */
799 	private static final ArrayList<Read> POISON=new ArrayList<Read>(0);
800 	/** Make unambiguous copies of ref sequences with ambiguous bases */
801 	public static boolean REPLICATE_AMBIGUOUS=false;
802 
803 	/** x&clearMasks[i] will clear base i */
804 	private static final long[] clearMasks;
805 	/** x|setMasks[i][j] will set base i to j */
806 	private static final long[][] setMasks;
807 	/** x&leftMasks[i] will clear all bases to the right of i (exclusive) */
808 	private static final long[] leftMasks;
809 	/** x&rightMasks[i] will clear all bases to the left of i (inclusive) */
810 	private static final long[] rightMasks;
811 	/** x|kMasks[i] will set the bit to the left of the leftmost base */
812 	private static final long[] kMasks;
813 
814 	public static final int SET_IF_NOT_PRESENT=1, SET_ALWAYS=2, INCREMENT=3;
815 
816 	/*--------------------------------------------------------------*/
817 	/*----------------      Static Initializers     ----------------*/
818 	/*--------------------------------------------------------------*/
819 
820 	static{
821 		clearMasks=new long[32];
822 		leftMasks=new long[32];
823 		rightMasks=new long[32];
824 		kMasks=new long[32];
825 		setMasks=new long[4][32];
826 		for(int i=0; i<32; i++){
827 			clearMasks[i]=~(3L<<(2*i));
828 		}
829 		for(int i=0; i<32; i++){
830 			leftMasks[i]=((-1L)<<(2*i));
831 		}
832 		for(int i=0; i<32; i++){
833 			rightMasks[i]=~((-1L)<<(2*i));
834 		}
835 		for(int i=0; i<32; i++){
836 			kMasks[i]=((1L)<<(2*i));
837 		}
838 		for(int i=0; i<32; i++){
839 			for(long j=0; j<4; j++){
840 				setMasks[(int)j][i]=(j<<(2*i));
841 			}
842 		}
843 	}
844 
845 }
846