1 package assemble;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.BitSet;
6 import java.util.concurrent.atomic.AtomicInteger;
7 
8 import dna.AminoAcid;
9 import jgi.BBMerge;
10 import kmer.AbstractKmerTable;
11 import kmer.HashArray1D;
12 import kmer.HashForest;
13 import kmer.KmerNode;
14 import kmer.KmerTableSet;
15 import shared.KillSwitch;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import stream.ConcurrentReadInputStream;
20 import stream.Read;
21 import structures.ByteBuilder;
22 import structures.IntList;
23 import structures.ListNum;
24 import structures.LongList;
25 import ukmer.Kmer;
26 
27 
28 /**
29  * Short-kmer assembler based on KmerCountExact.
30  * @author Brian Bushnell
31  * @date May 15, 2015
32  *
33  */
34 public class Tadpole1 extends Tadpole {
35 
36 	/**
37 	 * Code entrance from the command line.
38 	 * @param args Command line arguments
39 	 */
main(String[] args)40 	public static void main(String[] args){
41 		Timer t=new Timer(), t2=new Timer();
42 		t.start();
43 		t2.start();
44 
45 		//Create a new CountKmersExact instance
46 		Tadpole1 wog=new Tadpole1(args, true);
47 		t2.stop();
48 		outstream.println("Initialization Time:      \t"+t2);
49 
50 		///And run it
51 		wog.process(t);
52 	}
53 
54 	/**
55 	 * Constructor.
56 	 * @param args Command line arguments
57 	 */
Tadpole1(String[] args, boolean setDefaults)58 	public Tadpole1(String[] args, boolean setDefaults){
59 		super(args, setDefaults);
60 
61 		final int bytesPerKmer;
62 		{
63 			int mult=12;
64 			if(useOwnership){mult+=4;}
65 			if(processingMode==correctMode || processingMode==discardMode){}
66 			else if(processingMode==contigMode || processingMode==extendMode){mult+=1;}
67 			bytesPerKmer=mult;
68 		}
69 
70 		tables=new KmerTableSet(args, bytesPerKmer);
71 		k=tables.k;
72 		k2=tables.k2;
73 
74 		shift=2*k;
75 		shift2=shift-2;
76 		mask=(shift>63 ? -1L : ~((-1L)<<shift));
77 	}
78 
79 
80 	/*--------------------------------------------------------------*/
81 	/*----------------         Outer Methods        ----------------*/
82 	/*--------------------------------------------------------------*/
83 
84 
85 	/*--------------------------------------------------------------*/
86 	/*----------------         Inner Methods        ----------------*/
87 	/*--------------------------------------------------------------*/
88 
89 	@Override
initializeOwnership()90 	void initializeOwnership(){
91 		tables.initializeOwnership();
92 	}
93 
94 	@Override
shave(boolean shave, boolean rinse)95 	long shave(boolean shave, boolean rinse){
96 		long sum=0;
97 
98 		for(int i=0; i<maxShaveDepth; i++){
99 			int a=1, b=maxShaveDepth, c=i+1;
100 			//				if(i>3){Shaver.verbose2=true;}
101 			outstream.println("\nShave("+a+", "+b+", "+c+")");
102 			final Shaver shaver=Shaver.makeShaver(tables, THREADS, a, b, c, minCountExtend, branchMult2, Tools.max(minContigLen, shaveDiscardLen), shaveExploreDist, shave, rinse);
103 			long removed=shaver.shave(a, b);
104 			sum+=removed;
105 			if(removed<100 || i>2){break;}
106 		}
107 
108 		outstream.println();
109 		return sum;
110 	}
111 
112 	@Override
loadKmers(Timer t)113 	public long loadKmers(Timer t){
114 		tables.process(t);
115 		return tables.kmersLoaded;
116 	}
117 
118 
119 	/*--------------------------------------------------------------*/
120 	/*----------------        Recall Methods        ----------------*/
121 	/*--------------------------------------------------------------*/
122 
rcomp(long kmer)123 	final long rcomp(long kmer){return AminoAcid.reverseComplementBinaryFast(kmer, k);}
toValue(long kmer, long rkmer)124 	final long toValue(long kmer, long rkmer){return tables.toValue(kmer, rkmer);}
getCount(long kmer, long rkmer)125 	public final int getCount(long kmer, long rkmer){return tables.getCount(kmer, rkmer);}
claim(long kmer, int id)126 	final boolean claim(long kmer, int id){return claim(kmer, rcomp(kmer), id);}
claim(long kmer, long rkmer, int id)127 	final boolean claim(long kmer, long rkmer, int id){return tables.claim(kmer, rkmer, id);}
doubleClaim(ByteBuilder bb, int id )128 	final boolean doubleClaim(ByteBuilder bb, int id/*, long rid*/){return tables.doubleClaim(bb, id/*, rid*/);}
claim(ByteBuilder bb, int id, boolean earlyExit)129 	final boolean claim(ByteBuilder bb, int id, /*long rid, */boolean earlyExit){return tables.claim(bb, id/*, rid*/, earlyExit);}
claim(byte[] array, int len, int id, boolean earlyExit)130 	final boolean claim(byte[] array, int len, int id, /*long rid, */boolean earlyExit){return tables.claim(array, len, id/*, rid*/, earlyExit);}
findOwner(long kmer)131 	final int findOwner(long kmer){return tables.findOwner(kmer);}
findOwner(ByteBuilder bb, int id)132 	final int findOwner(ByteBuilder bb, int id){return tables.findOwner(bb, id);}
findOwner(byte[] array, int len, int id)133 	final int findOwner(byte[] array, int len, int id){return tables.findOwner(array, len, id);}
release(long key, int id)134 	final void release(long key, int id){tables.release(key, id);}
release(ByteBuilder bb, int id)135 	final void release(ByteBuilder bb, int id){tables.release(bb, id);}
release(byte[] array, int len, int id)136 	final void release(byte[] array, int len, int id){tables.release(array, len, id);}
fillRightCounts(long kmer, long rkmer, int[] counts)137 	final int fillRightCounts(long kmer, long rkmer, int[] counts){return tables.fillRightCounts(kmer, rkmer, counts, mask, shift2);}
fillLeftCounts(long kmer, long rkmer, int[] counts)138 	final int fillLeftCounts(long kmer, long rkmer, int[] counts){return tables.fillLeftCounts(kmer, rkmer, counts, mask, shift2);}
toText(long kmer)139 	final StringBuilder toText(long kmer){return AbstractKmerTable.toText(kmer, k);}
140 
141 	/*--------------------------------------------------------------*/
142 	/*----------------         Inner Classes        ----------------*/
143 	/*--------------------------------------------------------------*/
144 
145 	/*--------------------------------------------------------------*/
146 	/*----------------          BuildThread         ----------------*/
147 	/*--------------------------------------------------------------*/
148 
149 	@Override
makeBuildThread(int id, int mode, ConcurrentReadInputStream[] crisa)150 	BuildThread makeBuildThread(int id, int mode, ConcurrentReadInputStream[] crisa){
151 		return new BuildThread(id, mode, crisa);
152 	}
153 
154 	/**
155 	 * Builds contigs.
156 	 */
157 	private class BuildThread extends AbstractBuildThread{
158 
BuildThread(int id_, int mode_, ConcurrentReadInputStream[] crisa_)159 		public BuildThread(int id_, int mode_, ConcurrentReadInputStream[] crisa_){
160 			super(id_, mode_, crisa_);
161 		}
162 
163 		@Override
run()164 		public void run(){
165 			if(crisa==null || crisa.length==0){
166 				//Build from kmers
167 
168 				if(id==0){outstream.print("Seeding with min count = ");}
169 				String comma="";
170 				for(int i=contigPasses-1; i>0; i--){
171 					minCountSeedCurrent=(int)Tools.min(Integer.MAX_VALUE, Tools.max(minCountSeed+i, (long)Math.floor((minCountSeed)*Math.pow(contigPassMult, i)*0.92-0.25) ));
172 					if(id==0){
173 						outstream.print(comma+minCountSeedCurrent);
174 						comma=", ";
175 					}
176 					while(processNextTable(nextTable[i])){}
177 					while(processNextVictims(nextVictims[i])){}
178 				}
179 				//Final pass
180 				minCountSeedCurrent=minCountSeed;
181 				if(id==0){outstream.println(comma+minCountSeedCurrent);}
182 				while(processNextTable(nextTable[0])){}
183 				while(processNextVictims(nextVictims[0])){}
184 			}else{
185 				//Extend reads
186 				for(ConcurrentReadInputStream cris : crisa){
187 					synchronized(crisa){
188 						if(!cris.started()){
189 							cris.start();
190 						}
191 					}
192 					run(cris);
193 				}
194 			}
195 		}
196 
processNextTable(AtomicInteger aint)197 		private boolean processNextTable(AtomicInteger aint){
198 			final int tnum=aint.getAndAdd(1);
199 			if(tnum>=tables.ways){return false;}
200 			final HashArray1D table=tables.getTable(tnum);
201 			if(verbose2 && id==0){outstream.println("Processing table "+tnum+", size "+table.size());}
202 			final int max=table.arrayLength();
203 			for(int cell=0; cell<max; cell++){
204 				int x=processCell(table, cell);
205 			}
206 			return true;
207 		}
208 
processNextVictims(AtomicInteger aint)209 		private boolean processNextVictims(AtomicInteger aint){
210 			final int tnum=aint.getAndAdd(1);
211 			if(tnum>=tables.ways){return false;}
212 			final HashArray1D table=tables.getTable(tnum);
213 			final HashForest forest=table.victims();
214 			if(verbose2 && id==0){outstream.println("Processing forest "+tnum+", size "+forest.size());}
215 			final int max=forest.arrayLength();
216 			for(int cell=0; cell<max; cell++){
217 				KmerNode kn=forest.getNode(cell);
218 				int x=traverseKmerNode(kn);
219 			}
220 			return true;
221 		}
222 
processCell(HashArray1D table, int cell)223 		private int processCell(HashArray1D table, int cell){
224 			int count=table.readCellValue(cell);
225 			if(count<minCountSeedCurrent){return 0;}
226 
227 			long key=table.getKmer(cell);
228 
229 			if(verbose){outstream.println("id="+id+" processing cell "+cell+"; \tkmer="+key+"\t"+toText(key));}
230 			if(useOwnership){
231 				int owner=table.getCellOwner(cell);
232 				if(verbose){outstream.println("Owner is initially "+owner);}
233 				if(owner>-1){return 0;}
234 				owner=table.setOwner(key, id, cell);
235 				if(verbose){outstream.println("Owner is now "+owner);}
236 				if(owner!=id){return 0;}
237 			}
238 			return processKmer(key);
239 		}
240 
traverseKmerNode(KmerNode kn)241 		private int traverseKmerNode(KmerNode kn){
242 			int sum=0;
243 			if(kn!=null){
244 				sum+=processKmerNode(kn);
245 				if(kn.left()!=null){
246 					sum+=traverseKmerNode(kn.left());
247 				}
248 				if(kn.right()!=null){
249 					sum+=traverseKmerNode(kn.right());
250 				}
251 			}
252 			return sum;
253 		}
254 
processKmerNode(KmerNode kn)255 		private int processKmerNode(KmerNode kn){
256 			final long key=kn.pivot();
257 			final int count=kn.getValue(key);
258 			if(count<minCountSeedCurrent){return 0;}
259 
260 			if(verbose){outstream.println("id="+id+" processing KmerNode; \tkmer="+key+"\t"+toText(key));}
261 			if(useOwnership){
262 				int owner=kn.getOwner(key);
263 				if(verbose){outstream.println("Owner is initially "+owner);}
264 				if(owner>-1){return 0;}
265 				owner=kn.setOwner(key, id);
266 				if(verbose){outstream.println("Owner is now "+owner);}
267 				if(owner!=id){return 0;}
268 			}
269 			return processKmer(key);
270 		}
271 
processKmer(long key)272 		private int processKmer(long key){
273 			Contig contig=makeContig(key, builderT, true);
274 			if(contig!=null){
275 				float coverage=tables.calcCoverage(contig);
276 				if(coverage<minCoverage || coverage>maxCoverage){return 0;}
277 				if(verbose){outstream.println("Added "+contig.length());}
278 				contig.id=(int)contigNum.incrementAndGet();
279 				contigs.add(contig);
280 				return contig.length();
281 			}else{
282 				if(verbose){outstream.println("Created null contig.");}
283 			}
284 			return 0;
285 		}
286 
run(ConcurrentReadInputStream cris)287 		private void run(ConcurrentReadInputStream cris){
288 
289 			ListNum<Read> ln=cris.nextList();
290 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
291 
292 			//While there are more reads lists...
293 			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
294 
295 				//For each read (or pair) in the list...
296 				for(int i=0; i<reads.size(); i++){
297 					final Read r1=reads.get(i);
298 					final Read r2=r1.mate;
299 
300 					processReadPair(r1, r2);
301 				}
302 
303 				//Fetch a new read list
304 				cris.returnList(ln);
305 				ln=cris.nextList();
306 				reads=(ln!=null ? ln.list : null);
307 			}
308 			cris.returnList(ln);
309 		}
310 
processReadPair(Read r1, Read r2)311 		private void processReadPair(Read r1, Read r2){
312 			if(verbose){outstream.println("Considering read "+r1.id+" "+new String(r1.bases));}
313 
314 			readsInT++;
315 			basesInT+=r1.length();
316 			if(r2!=null){
317 				readsInT++;
318 				basesInT+=r2.length();
319 			}
320 
321 			if(mode==insertMode){
322 				int x=BBMerge.findOverlapStrict(r1, r2, false);
323 				if(x<1){
324 					x=findInsertSize(r1, r2, rightCounts);
325 				}
326 				insertSizes.increment(Tools.max(x, 0));
327 				return;
328 			}
329 
330 			if(ecco && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){BBMerge.findOverlapStrict(r1, r2, true);}
331 
332 			if(r1!=null){
333 				if(r1.discarded()){
334 					lowqBasesT+=r1.length();
335 					lowqReadsT++;
336 				}else{
337 					byte[] bases=makeContig(r1.bases, builderT, r1.numericID);
338 					if(bases!=null){
339 						if(verbose){outstream.println("Added "+bases.length);}
340 						final long num=contigNum.incrementAndGet();
341 						Contig temp=new Contig(bases, "contig_"+num+"_length_"+bases.length, (int)num);
342 						contigs.add(temp);
343 					}
344 				}
345 			}
346 			if(r2!=null){
347 				if(r2.discarded()){
348 					lowqBasesT+=r2.length();
349 					lowqReadsT++;
350 				}else{
351 					byte[] bases=makeContig(r2.bases, builderT, r2.numericID);
352 					if(bases!=null){
353 						if(verbose){outstream.println("Added "+bases.length);}
354 						final long num=contigNum.incrementAndGet();
355 						Contig temp=new Contig(bases, "contig_"+num+"_length_"+bases.length, (int)num);
356 						contigs.add(temp);
357 					}
358 				}
359 			}
360 		}
361 
362 		/** From kmers */
makeContig(final long key, final ByteBuilder bb, boolean alreadyClaimed)363 		private Contig makeContig(final long key, final ByteBuilder bb, boolean alreadyClaimed){
364 			builderT.setLength(0);
365 			builderT.appendKmer(key, k);
366 			if(verbose){outstream.println("Filled builder: "+builderT);}
367 
368 			final int initialLength=bb.length();
369 			assert(initialLength==k);
370 			if(initialLength<k){return null;}
371 //			outstream.print("A");
372 
373 			boolean success=(alreadyClaimed || !useOwnership ? true : claim(key, id));
374 			if(verbose){outstream.println("Thread "+id+" checking owner after setting: "+findOwner(bb, id));}
375 			if(!success){
376 				assert(bb.length()==k);
377 //				release(bb, id); //no need to release
378 				return null;
379 			}
380 //			outstream.print("B");
381 			if(verbose  /*|| true*/){outstream.println("Thread "+id+" building contig; initial length "+bb.length());}
382 			if(verbose){outstream.println("Extending to right.");}
383 			final int rightStatus, leftStatus;
384 			float leftRatio=0, rightRatio=0;
385 			{
386 				final int status=extendToRight(bb, leftCounts, rightCounts, id);
387 
388 				if(status==DEAD_END){
389 					//do nothing
390 				}else if(status==LOOP){//TODO
391 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
392 					//Perhaps, the last kmer should be reclassified as a junction and removed.
393 				}else if(status==BAD_SEED){
394 					assert(bb.length()==k);
395 					release(key, id);
396 //					outstream.print("B1");
397 					return null;
398 				}else{
399 					if(bb.length()==k){
400 						if(status==BAD_OWNER){
401 							if(IGNORE_BAD_OWNER){
402 								//do nothing
403 							}else{
404 								release(key, id);
405 //								outstream.print("B2");
406 								return null;
407 							}
408 						}else if(isBranchCode(status)){
409 							release(key, id);
410 //							outstream.print("B3");
411 							return null;
412 						}else{
413 							throw new RuntimeException("Bad return value: "+status);
414 						}
415 					}else{
416 						if(status==BAD_OWNER){
417 							if(IGNORE_BAD_OWNER){
418 								bb.length--;
419 							}else{
420 								release(bb, id);
421 //								outstream.print("B4");
422 								return null;
423 							}
424 						}else if(status==F_BRANCH || status==D_BRANCH){
425 							rightRatio=calcRatio(rightCounts);
426 						}else if(status==B_BRANCH){
427 							rightRatio=calcRatio(leftCounts);
428 						}else{
429 							throw new RuntimeException("Bad return value: "+status);
430 						}
431 					}
432 				}
433 				rightStatus=status;
434 			}
435 //			outstream.print("C");
436 			bb.reverseComplementInPlace();
437 			if(verbose  /*|| true*/){outstream.println("Extending rcomp to right; current length "+bb.length());}
438 			{
439 				final int status=extendToRight(bb, leftCounts, rightCounts, id);
440 
441 				if(status==DEAD_END){
442 					//do nothing
443 				}else if(status==LOOP){//TODO
444 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
445 					//Perhaps, the last kmer should be reclassified as a junction and removed.
446 				}else if(status==BAD_SEED){
447 					assert(false) : bb;//This should never happen.
448 					assert(bb.length()==k);
449 					release(key, id);
450 					return null;
451 				}else{
452 					if(status==BAD_OWNER){
453 						if(IGNORE_BAD_OWNER){
454 							if(bb.length()>k){bb.length--;}
455 						}else{
456 							release(bb, id);
457 //							outstream.print("C1");
458 							return null;
459 						}
460 					}else if(status==F_BRANCH || status==D_BRANCH){
461 						leftRatio=calcRatio(rightCounts);
462 					}else if(status==B_BRANCH){
463 						leftRatio=calcRatio(leftCounts);
464 					}else{
465 						throw new RuntimeException("Bad return value: "+status);
466 					}
467 				}
468 				leftStatus=status;
469 			}
470 //			outstream.print("D");
471 
472 			if(verbose  /*|| true*/){outstream.println("A: Final length for thread "+id+": "+bb.length());}
473 
474 			//				if(useOwnership && THREADS==1){assert(claim(bases, bases.length, id, rid));}
475 			success=(useOwnership ? doubleClaim(bb, id) : true);
476 			if(verbose  /*|| true*/){outstream.println("Success for thread "+id+": "+success);}
477 
478 			if(trimEnds>0){bb.trimByAmount(trimEnds, trimEnds);}
479 			else if(trimCircular && leftStatus==LOOP && rightStatus==LOOP){bb.trimByAmount(0, k-1);}
480 			if(joinContigs || (bb.length()>=initialLength+minExtension && (bb.length()>=minContigLen || popBubbles))){
481 				if(success){
482 					bb.reverseComplementInPlace();
483 					byte[] bases=bb.toBytes();
484 					Contig c=new Contig(bases);
485 					c.leftCode=leftStatus;
486 					c.rightCode=rightStatus;
487 					c.rightRatio=rightRatio;
488 					c.leftRatio=leftRatio;
489 					if(!c.canonical()){c.rcomp();}
490 					return c;
491 				}else{
492 //					outstream.print("E");
493 					//					assert(false) : bb.length()+", "+id;
494 					release(bb, id);
495 					return null;
496 				}
497 			}
498 			if(verbose  /*|| true*/){outstream.println("A: Contig was too short for "+id+": "+bb.length());}
499 //			assert(false) : bb.length()+", "+initialLength+", "+minExtension+", "+minContigLen;
500 //			outstream.print("F");
501 			return null;
502 		}
503 
504 		/** From a seed */
makeContig(final byte[] bases, final ByteBuilder bb, long rid)505 		private byte[] makeContig(final byte[] bases, final ByteBuilder bb, long rid){
506 			if(bases==null || bases.length<k){return null;}
507 //			if(verbose  /*|| true*/){outstream.println("Thread "+id+" checking owner: "+findOwner(bases, bases.length, id));}
508 			int owner=useOwnership ? findOwner(bases, bases.length, id) : -1;
509 			if(owner>=id){return null;}
510 			boolean success=(useOwnership ? claim(bases, bases.length, id, true) : true);
511 			if(verbose  /*|| true*/){outstream.println("Thread "+id+" checking owner after setting: "+findOwner(bases, bases.length, id));}
512 			if(!success){
513 				release(bases, bases.length, id);
514 				return null;
515 			}
516 			if(verbose  /*|| true*/){outstream.println("Thread "+id+" building contig; initial length "+bases.length);}
517 			bb.setLength(0);
518 			bb.append(bases);
519 			if(verbose){outstream.println("Extending to right.");}
520 			{
521 				final int status=extendToRight(bb, leftCounts, rightCounts, id);
522 
523 				if(status==DEAD_END){
524 					//do nothing
525 				}else if(status==LOOP){//TODO
526 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
527 					//Perhaps, the last kmer should be reclassified as a junction and removed.
528 				}else if(status==BAD_SEED){
529 					//do nothing
530 				}else{
531 					if(status==BAD_OWNER){
532 						release(bb, id);
533 						return null;
534 					}else if(isBranchCode(status)){
535 						//do nothing
536 					}else{
537 						throw new RuntimeException("Bad return value: "+status);
538 					}
539 				}
540 			}
541 			bb.reverseComplementInPlace();
542 			if(verbose  /*|| true*/){outstream.println("Extending rcomp to right; current length "+bb.length());}
543 			{
544 				final int status=extendToRight(bb, leftCounts, rightCounts, id);
545 
546 				if(status==DEAD_END){
547 					//do nothing
548 				}else if(status==LOOP){//TODO
549 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
550 					//Perhaps, the last kmer should be reclassified as a junction and removed.
551 				}else if(status==BAD_SEED){
552 					//do nothing
553 				}else{
554 					if(status==BAD_OWNER){
555 						release(bb, id);
556 						return null;
557 					}else if(isBranchCode(status)){
558 						//do nothing
559 					}else{
560 						throw new RuntimeException("Bad return value: "+status);
561 					}
562 				}
563 			}
564 			if(verbose  /*|| true*/){outstream.println("B: Final length for thread "+id+": "+bb.length());}
565 
566 			//				if(useOwnership && THREADS==1){assert(claim(bases, bases.length, id, rid));}
567 			success=(useOwnership ? doubleClaim(bb, id) : true);
568 			if(verbose  /*|| true*/){outstream.println("Success for thread "+id+": "+success);}
569 			if(bb.length()>=bases.length+minExtension && (bb.length()>=minContigLen || popBubbles)){
570 				if(success){
571 					bb.reverseComplementInPlace();
572 					return bb.toBytes();
573 				}else{
574 					//					assert(false) : bb.length()+", "+id;
575 					release(bb.array, bb.length(), id);
576 					return null;
577 				}
578 			}
579 
580 			if(verbose  /*|| true*/){outstream.println("B: Contig was too short for "+id+": "+bb.length());}
581 			return null;
582 		}
583 
584 	}
585 
586 
587 	/*--------------------------------------------------------------*/
588 	/*----------------       Contig Processing      ----------------*/
589 	/*--------------------------------------------------------------*/
590 
591 	@Override
makeProcessContigThread(ArrayList<Contig> contigs, AtomicInteger next)592 	ProcessContigThread makeProcessContigThread(ArrayList<Contig> contigs, AtomicInteger next){
593 		return new ProcessContigThread(contigs, next);
594 	}
595 
596 	@Override
initializeContigs(ArrayList<Contig> contigs)597 	public void initializeContigs(ArrayList<Contig> contigs){
598 		tables.clearOwnership();
599 		tables.initializeOwnership();
600 		{
601 			int cnum=0;
602 			for(Contig c : contigs){
603 				c.id=cnum;
604 //				if(c.leftBranch()){
605 				if(true){
606 					long kmer=c.leftKmer(k);
607 					tables.claim(kmer, rcomp(kmer), cnum);
608 				}
609 //				if(c.rightBranch()){
610 				if(true){
611 					long kmer=c.rightKmer(k);
612 					tables.claim(kmer, rcomp(kmer), cnum);
613 				}
614 				cnum++;
615 			}
616 		}
617 	}
618 
619 	class ProcessContigThread extends AbstractProcessContigThread {
620 
ProcessContigThread(ArrayList<Contig> contigs_, AtomicInteger next_)621 		ProcessContigThread(ArrayList<Contig> contigs_, AtomicInteger next_){
622 			super(contigs_, next_);
623 			lastExitCondition=BAD_SEED;
624 		}
625 
626 		@Override
processContigLeft(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb)627 		public void processContigLeft(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb){
628 			if(c.leftCode==DEAD_END){return;}
629 
630 			final long kmer0=c.leftKmer(k);
631 			final long rkmer0=rcomp(kmer0);
632 
633 			assert(tables.getCount(kmer0, rkmer0)>0);
634 			assert(tables.findOwner(kmer0)==c.id) : tables.findOwner(kmer0)+", "+c.id;
635 //			System.err.println(tables.findOwner(kmer0));
636 
637 			int leftMaxPos=fillLeftCounts(kmer0, rkmer0, leftCounts);
638 			int leftMax=leftCounts[leftMaxPos];
639 			int leftSecondPos=Tools.secondHighestPosition(leftCounts);
640 			int leftSecond=leftCounts[leftSecondPos];
641 
642 			for(int x=0; x<leftCounts.length; x++){
643 				bb.clear();
644 				final int count=leftCounts[x];
645 				int target=-1;
646 				if(count>0 && isJunction(leftMax, count)){
647 					long x2=3-x;
648 					long rkmer=((rkmer0<<2)|(long)x2)&mask;
649 					long kmer=(kmer0>>>2)|(((long)x)<<shift2);
650 					assert(kmer==rcomp(rkmer));
651 					assert(tables.getCount(kmer, rkmer)==count) : count+", "+tables.getCount(kmer, rkmer);
652 					bb.append(AminoAcid.numberToBase[x]);
653 					target=exploreRight(rkmer, kmer, extraCounts, rightCounts, bb);
654 					if(verbose){
655 						outstream.println(c.id+"L_F: x="+x+", cnt="+count+", dest="+target
656 								+", "+codeStrings[lastExitCondition]+", len="+lastLength+", orient="+lastOrientation);
657 					}
658 				}
659 				if(target>=0){
660 					Edge se=new Edge(c.id, target, lastLength, lastOrientation, count, bb.toBytes());
661 					c.addLeftEdge(se);
662 					edgesMadeT++;
663 				}
664 			}
665 		}
666 
667 		@Override
processContigRight(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb)668 		public void processContigRight(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb){
669 			if(c.rightCode==DEAD_END){return;}
670 
671 			final long kmer0=c.rightKmer(k);
672 			final long rkmer0=rcomp(kmer0);
673 
674 			int rightMaxPos=fillRightCounts(kmer0, rkmer0, rightCounts);
675 			int rightMax=rightCounts[rightMaxPos];
676 			int rightSecondPos=Tools.secondHighestPosition(rightCounts);
677 			int rightSecond=rightCounts[rightSecondPos];
678 
679 			for(int x=0; x<rightCounts.length; x++){
680 				bb.clear();
681 				final int count=rightCounts[x];
682 				int target=-1;
683 				if(count>0 && isJunction(rightMax, count)){
684 					long x2=3-x;
685 					long kmer=((kmer0<<2)|(long)x)&mask;
686 					long rkmer=(rkmer0>>>2)|(((long)x2)<<shift2);
687 					assert(kmer==rcomp(rkmer));
688 					assert(tables.getCount(kmer, rkmer)==count) : count+", "+tables.getCount(kmer, rkmer);
689 					bb.append(AminoAcid.numberToBase[x]);
690 					target=exploreRight(kmer, rkmer, leftCounts, extraCounts, bb);
691 					if(verbose){
692 						outstream.println(c.id+"R_F: x="+x+", cnt="+count+", dest="+target+", "+codeStrings[lastExitCondition]+", len="+lastLength+", orient="+lastOrientation);
693 					}
694 				}
695 				if(target>=0){
696 					lastOrientation|=1;
697 					Edge se=new Edge(c.id, target, lastLength, lastOrientation, count, bb.toBytes());
698 					c.addRightEdge(se);
699 					edgesMadeT++;
700 				}
701 			}
702 		}
703 
exploreRight(long kmer, long rkmer, int[] leftCounts, int[] rightCounts, ByteBuilder bb)704 		private int exploreRight(long kmer, long rkmer, int[] leftCounts, int[] rightCounts, ByteBuilder bb){
705 			int length=1;
706 			int owner=-1;
707 			lastTarget=-1;
708 			for(; length<500; length++){
709 				owner=tables.findOwner(kmer, rkmer);
710 				if(owner>=0){break;}
711 
712 				final int leftMaxPos=fillLeftCounts(kmer, rkmer, leftCounts);
713 				final int leftMax=leftCounts[leftMaxPos];
714 				final int leftSecondPos=Tools.secondHighestPosition(leftCounts);
715 				final int leftSecond=leftCounts[leftSecondPos];
716 				if(isJunction(leftMax, leftSecond)){
717 					lastExitCondition=B_BRANCH;
718 					lastLength=length;
719 					return -1;
720 				}
721 
722 				final int rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
723 				final int rightMax=rightCounts[rightMaxPos];
724 				final int rightSecondPos=Tools.secondHighestPosition(rightCounts);
725 				final int rightSecond=rightCounts[rightSecondPos];
726 
727 //				outstream.println("* "+Arrays.toString(leftCounts)+", "+Arrays.toString(rightCounts)+", "+rightMaxPos);
728 
729 				if(rightMax<minCountExtend){
730 //					assert(false) : Arrays.toString(rightCounts);
731 					lastExitCondition=DEAD_END;
732 					lastLength=length;
733 					return -1;
734 				}else if(isJunction(rightMax, rightSecond)){
735 					lastExitCondition=F_BRANCH;
736 					lastLength=length;
737 					return -1;
738 				}
739 				bb.append(AminoAcid.numberToBase[rightMaxPos]);
740 				long x=rightMaxPos;
741 				long x2=3-x;
742 				kmer=((kmer<<2)|x)&mask;
743 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
744 			}
745 			lastLength=length;
746 			lastTarget=owner;
747 			if(owner>=0){
748 				lastExitCondition=SUCCESS;
749 				Contig dest=contigs.get(owner);
750 				long left=dest.leftKmer(k);
751 //				if(left==kmer){
752 //					lastOrientation=0;
753 //				}else if(left==rkmer){
754 //					lastOrientation=1;
755 //				}else{
756 //					long right=dest.rightKmer(k);
757 //					if(right==kmer){
758 //						lastOrientation=2;
759 //					}else if(right==rkmer){
760 //						lastOrientation=3;
761 //					}
762 //				}
763 				if(left==kmer || left==rkmer){
764 					lastOrientation=0;
765 				}else{
766 					long right=dest.rightKmer(k);
767 					if(right==kmer || right==rkmer){
768 						lastOrientation=2;
769 					}else{
770 						assert(false);
771 					}
772 				}
773 			}else{
774 				lastExitCondition=TOO_LONG;
775 			}
776 			return owner;
777 		}
778 
779 	}
780 
781 	/*--------------------------------------------------------------*/
782 	/*----------------       Extension Methods      ----------------*/
783 	/*--------------------------------------------------------------*/
784 
785 
findInsertSize(Read r1, Read r2, int[] rightCounts)786 	public int findInsertSize(Read r1, Read r2, int[] rightCounts){
787 		final long kmer1=tables.rightmostKmer(r1.bases, r1.length());
788 		final long kmer2=tables.rightmostKmer(r2.bases, r2.length());
789 		if(kmer1<0 || kmer2<0){return -1;}
790 		final long rkmer1=rcomp(kmer1);
791 		final long rkmer2=rcomp(kmer2);
792 		final int x=measureInsert(kmer1, rkmer1, kmer2, rkmer2, 24000, rightCounts);
793 		if(x<0){return -1;}
794 		return r1.length()+r2.length()+x-k;//TODO: May be off by 1.
795 	}
796 
797 	@Override
extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance, final Kmer kmer)798 	public int extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance, final Kmer kmer){
799 		return extendRead(r, bb, leftCounts, rightCounts, distance);
800 	}
801 
802 	@Override
extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance)803 	public int extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance){
804 		final int initialLen=r.length();
805 		if(initialLen<k){return 0;}
806 		bb.setLength(0);
807 		bb.append(r.bases);
808 		final int extension=extendToRight2(bb, leftCounts, rightCounts, distance, true);
809 		if(extension>0){
810 			r.bases=bb.toBytes();
811 			if(r.quality!=null){
812 				final byte q=Shared.FAKE_QUAL;
813 				r.quality=KillSwitch.copyOf(r.quality, r.bases.length);
814 				for(int i=initialLen; i<r.quality.length; i++){
815 					r.quality[i]=q;
816 				}
817 			}
818 		}
819 		assert(extension==r.length()-initialLen);
820 		return extension;
821 	}
822 
823 	/** Returns distance between the two kmers, or -1 */
measureInsert(final long kmer1, final long rkmer1, final long kmer2, final long rkmer2, final int maxlen, final int[] rightCounts)824 	public int measureInsert(final long kmer1, final long rkmer1, final long kmer2, final long rkmer2, final int maxlen, final int[] rightCounts){
825 		final long key2=toValue(kmer2, rkmer2);
826 		long kmer=kmer1;
827 		long rkmer=rkmer1;
828 		int len=0;
829 
830 		{
831 			int count=tables.getCount(key2);
832 			if(count<minCountSeed){return -1;}
833 		}
834 
835 		long key=toValue(kmer, rkmer);
836 		int count=tables.getCount(key);
837 		if(count<minCountSeed){return -1;}
838 		if(count<minCountSeed){
839 			if(verbose){outstream.println("Returning because count was too low: "+count);}
840 			return -1;
841 		}
842 
843 		int rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
844 		int rightMax=rightCounts[rightMaxPos];
845 //		int rightSecondPos=Tools.secondHighestPosition(rightCounts);
846 //		int rightSecond=rightCounts[rightSecondPos];
847 
848 		if(rightMax<minCountExtend){return -1;}
849 //		if(isJunction(rightMax, rightSecond)){return -1;}
850 
851 		while(key!=key2 && len<maxlen){
852 
853 			//Generate the new kmer
854 //			final byte b=AminoAcid.numberToBase[rightMaxPos];
855 			final long x=rightMaxPos;
856 			final long x2=AminoAcid.numberToComplement[(int)x];
857 
858 			//Now consider the next kmer
859 			kmer=((kmer<<2)|(long)x)&mask;
860 			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
861 
862 			assert(tables.getCount(kmer, rkmer)==rightMax);
863 			count=rightMax;
864 
865 			assert(count>=minCountExtend) : count;
866 
867 			rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
868 			rightMax=rightCounts[rightMaxPos];
869 //			rightSecondPos=Tools.secondHighestPosition(rightCounts);
870 //			rightSecond=rightCounts[rightSecondPos];
871 
872 			if(verbose){
873 				outstream.println("kmer: "+toText(kmer)+", "+toText(rkmer));
874 				outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts));
875 				outstream.println("rightMaxPos="+rightMaxPos);
876 				outstream.println("rightMax="+rightMax);
877 //				outstream.println("rightSecondPos="+rightSecondPos);
878 //				outstream.println("rightSecond="+rightSecond);
879 			}
880 
881 			if(rightMax<minCountExtend){
882 				if(verbose){outstream.println("A: Breaking because highest right was too low:"+rightMax);}
883 				break;
884 			}
885 
886 //			if(isJunction(rightMax, rightSecond)){return -1;}
887 
888 			len++;
889 		}
890 		return len>=maxlen ? -1 : len;
891 	}
892 
893 
894 
895 	/**
896 	 * Extend these bases into a contig.
897 	 * Stops at both left and right junctions.
898 	 * Claims ownership.
899 	 */
extendToRight(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int id)900 	public int extendToRight(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int id){
901 		if(bb.length()<k){return BAD_SEED;}
902 		long kmer=0;
903 		long rkmer=0;
904 		int len=0;
905 
906 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
907 		{
908 			final int bblen=bb.length();
909 			final byte[] bases=bb.array;
910 			for(int i=bblen-k; i<bblen; i++){
911 				final byte b=bases[i];
912 				final long x=AminoAcid.baseToNumber[b];
913 				final long x2=AminoAcid.baseToComplementNumber[b];
914 				kmer=((kmer<<2)|x)&mask;
915 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
916 				if(x<0){
917 					len=0;
918 					kmer=rkmer=0;
919 				}else{len++;}
920 				if(verbose){outstream.println("A: Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
921 			}
922 		}
923 
924 		if(len<k){return BAD_SEED;}
925 		else{assert(len==k);}
926 
927 		/* Now the trailing kmer has been initialized. */
928 
929 		long key=toValue(kmer, rkmer);
930 		HashArray1D table=tables.getTableForKey(key);
931 		int count=table.getValue(key);
932 		if(count<minCountSeed){
933 			if(verbose){outstream.println("Returning because count was too low: "+count);}
934 			return BAD_SEED;
935 		}
936 
937 		int owner=(useOwnership ? table.getOwner(key) : id);
938 		if(verbose){outstream.println("Owner: "+owner);}
939 		if(owner>id){return BAD_OWNER;}
940 
941 		int leftMaxPos=0;
942 		int leftMax=minCountExtend;
943 		int leftSecondPos=1;
944 		int leftSecond=0;
945 
946 		if(leftCounts!=null){
947 			leftMaxPos=fillLeftCounts(kmer, rkmer, leftCounts);
948 			leftMax=leftCounts[leftMaxPos];
949 			leftSecondPos=Tools.secondHighestPosition(leftCounts);
950 			leftSecond=leftCounts[leftSecondPos];
951 		}
952 
953 		int rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
954 		int rightMax=rightCounts[rightMaxPos];
955 		int rightSecondPos=Tools.secondHighestPosition(rightCounts);
956 		int rightSecond=rightCounts[rightSecondPos];
957 
958 		if(verbose){
959 			outstream.println("kmer: "+toText(kmer)+", "+toText(rkmer));
960 			outstream.println("Counts: "+count+", "+(leftCounts==null ? "null" : Arrays.toString(leftCounts))+", "+Arrays.toString(rightCounts));
961 			outstream.println("leftMaxPos="+leftMaxPos);
962 			outstream.println("leftMax="+leftMax);
963 			outstream.println("leftSecondPos="+leftSecondPos);
964 			outstream.println("leftSecond="+leftSecond);
965 			outstream.println("rightMaxPos="+rightMaxPos);
966 			outstream.println("rightMax="+rightMax);
967 			outstream.println("rightSecondPos="+rightSecondPos);
968 			outstream.println("rightSecond="+rightSecond);
969 		}
970 
971 		if(rightMax<minCountExtend){return DEAD_END;}
972 		if(isJunction(rightMax, rightSecond)){//Returning here is fine because nothing can be added
973 			if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");}
974 			return isJunction(leftMax, leftSecond) ? D_BRANCH : F_BRANCH;
975 		}
976 		if(isJunction(leftMax, leftSecond)){//Returning here is necessary, but this should mean the the length is exactly K
977 			assert(bb.length()==k) : bb.length()+", "+k+", "+leftMax+", "+leftSecond;
978 			if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");}
979 			return B_BRANCH;
980 		}
981 
982 		if(useOwnership){
983 			owner=table.setOwner(key, id);
984 			if(verbose){outstream.println("A. Owner is now "+id+" for key "+key);}
985 			if(owner!=id){
986 				if(verbose){outstream.println("Returning early because owner was "+owner+" for thread "+id+".");}
987 				return BAD_OWNER;
988 			}
989 		}
990 
991 		final int maxLen=Tools.min((extendRight<0 ? maxContigLen : bb.length()+extendRight), maxContigLen);
992 
993 		while(owner==id && bb.length()<maxLen){
994 
995 			//Generate the new kmer
996 			final byte b=AminoAcid.numberToBase[rightMaxPos];
997 			final long x=rightMaxPos;
998 			final long x2=AminoAcid.numberToComplement[(int)x];
999 
1000 			final long evicted=(kmer>>>shift2); //Binary value that falls off the end.
1001 
1002 			//Now consider the next kmer
1003 			kmer=((kmer<<2)|(long)x)&mask;
1004 			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1005 
1006 			key=toValue(kmer, rkmer);
1007 			table=tables.getTableForKey(key);
1008 
1009 			assert(table.getValue(key)==rightMax || rightMax==0) : key+", "+table.getValue(key)+", "+rightMax+", "+Arrays.toString(rightCounts);
1010 			count=rightMax;
1011 
1012 			assert(count>=minCountExtend) : count;
1013 
1014 			if(leftCounts!=null){
1015 				leftMaxPos=fillLeftCounts(kmer, rkmer, leftCounts);
1016 				leftMax=leftCounts[leftMaxPos];
1017 				leftSecondPos=Tools.secondHighestPosition(leftCounts);
1018 				leftSecond=leftCounts[leftSecondPos];
1019 			}
1020 
1021 			rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
1022 			rightMax=rightCounts[rightMaxPos];
1023 			rightSecondPos=Tools.secondHighestPosition(rightCounts);
1024 			rightSecond=rightCounts[rightSecondPos];
1025 
1026 			if(verbose){
1027 				outstream.println("kmer: "+toText(kmer)+", "+toText(rkmer));
1028 				outstream.println("Counts: "+count+", "+(leftCounts==null ? "null" : Arrays.toString(leftCounts))+", "+Arrays.toString(rightCounts));
1029 				outstream.println("leftMaxPos="+leftMaxPos);
1030 				outstream.println("leftMax="+leftMax);
1031 				outstream.println("leftSecondPos="+leftSecondPos);
1032 				outstream.println("leftSecond="+leftSecond);
1033 				outstream.println("rightMaxPos="+rightMaxPos);
1034 				outstream.println("rightMax="+rightMax);
1035 				outstream.println("rightSecondPos="+rightSecondPos);
1036 				outstream.println("rightSecond="+rightSecond);
1037 			}
1038 
1039 			final boolean fbranch=isJunction(rightMax, rightSecond);
1040 			final boolean bbranch=isJunction(leftMax, leftSecond);
1041 			final boolean hbranch=(leftCounts!=null && leftMaxPos!=evicted && branchMult1>0);
1042 			if(bbranch){
1043 				if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+"); "
1044 						+ "("+fbranch+", "+bbranch+", "+hbranch+")");}
1045 				return fbranch ? D_BRANCH : B_BRANCH;
1046 			}else if(hbranch){
1047 				if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "
1048 						+ ""+leftMax+", "+leftSecond+"); ("+fbranch+", "+bbranch+", "+hbranch+")");}
1049 				if(verbose){outstream.println("Hidden branch: leftMaxPos!=evicted ("+leftMaxPos+"!="+evicted+")" +
1050 						"\nleftMaxPos="+leftMaxPos+", leftMax="+leftMax+", leftSecondPos="+leftSecondPos+", leftSecond="+leftSecond);}
1051 				return fbranch ? D_BRANCH : B_BRANCH;
1052 			}
1053 
1054 			bb.append(b);
1055 			if(verbose){outstream.println("Added base "+(char)b);}
1056 
1057 			if(useOwnership){
1058 				owner=table.getOwner(key);
1059 				if(verbose){outstream.println("Owner is initially "+id+" for key "+key);}
1060 				if(owner==id){//loop detection
1061 					if(verbose  /*|| true*/){
1062 //						outstream.println(new String(bb.array, bb.length()-31, 31));
1063 						outstream.println(bb);
1064 						outstream.println(toText(kmer));
1065 						outstream.println(toText(rkmer));
1066 						outstream.println("Breaking because owner was "+owner+" for thread "+id+".");
1067 					}
1068 					return fbranch ? F_BRANCH : LOOP;
1069 				}
1070 				owner=table.setOwner(key, id);
1071 				if(verbose){outstream.println("B. Owner is now "+id+" for key "+key);}
1072 			}
1073 
1074 			if(fbranch){
1075 				if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+"); "
1076 						+ "("+fbranch+", "+bbranch+", "+hbranch+")");}
1077 				return F_BRANCH;
1078 			}else if(rightMax<minCountExtend){
1079 				if(verbose){outstream.println("B: Breaking because highest right was too low:"+rightMax);}
1080 				return DEAD_END;
1081 			}
1082 		}
1083 		assert(owner!=id);
1084 		if(verbose  /*|| true*/){
1085 			outstream.println("Current contig: "+bb+"\nReturning because owner was "+owner+" for thread "+id+".");
1086 		}
1087 		return BAD_OWNER;
1088 	}
1089 
1090 	@Override
extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase)1091 	public int extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase){
1092 		if(verbose || verbose2){outstream.println("Entering extendToRight2 (no kmers).");}
1093 		final int initialLength=bb.length();
1094 		if(initialLength<k){return 0;}
1095 		long kmer=0;
1096 		long rkmer=0;
1097 
1098 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
1099 		{
1100 			int len=0;
1101 			final byte[] bases=bb.array;
1102 			for(int i=initialLength-k; i<initialLength; i++){
1103 				final byte b=bases[i];
1104 				final long x=AminoAcid.baseToNumber[b];
1105 				final long x2=AminoAcid.baseToComplementNumber[b];
1106 				kmer=((kmer<<2)|x)&mask;
1107 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1108 				if(x<0){
1109 					len=0;
1110 					kmer=rkmer=0;
1111 				}else{len++;}
1112 				if(verbose){outstream.println("B: Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1113 			}
1114 			if(len<k){
1115 				if(verbose || verbose2){outstream.println("Returning because len<k: "+len+"<"+k);}
1116 				return 0;
1117 			}
1118 			else{assert(len==k);}
1119 		}
1120 		return extendToRight2(bb, leftCounts, rightCounts, distance, includeJunctionBase, kmer, rkmer);
1121 	}
1122 
1123 	/**
1124 	 * Extend these bases to the right by at most 'distance'.
1125 	 * Stops at right junctions only.
1126 	 * Does not claim ownership.
1127 	 */
extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, long kmer, long rkmer)1128 	public int extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase,
1129 			long kmer, long rkmer){
1130 		if(verbose || verbose2){outstream.println("Entering extendToRight2 (with kmers).");}
1131 		final int initialLength=bb.length();
1132 
1133 		/* Now the trailing kmer has been initialized. */
1134 
1135 		long key=toValue(kmer, rkmer);
1136 		HashArray1D table=tables.getTableForKey(key);
1137 		int count=table.getValue(key);
1138 		if(count<minCountSeed){
1139 			if(verbose || verbose2){outstream.println("Returning because count was too low: "+count+"<"+minCountSeed);}
1140 			return 0;
1141 		}
1142 
1143 		int leftMaxPos=0;
1144 		int leftMax=minCountExtend;
1145 		int leftSecondPos=1;
1146 		int leftSecond=0;
1147 
1148 		if(leftCounts!=null){
1149 			leftMaxPos=fillLeftCounts(kmer, rkmer, leftCounts);
1150 			leftMax=leftCounts[leftMaxPos];
1151 			leftSecondPos=Tools.secondHighestPosition(leftCounts);
1152 			leftSecond=leftCounts[leftSecondPos];
1153 		}
1154 
1155 		int rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
1156 		int rightMax=rightCounts[rightMaxPos];
1157 		int rightSecondPos=Tools.secondHighestPosition(rightCounts);
1158 		int rightSecond=rightCounts[rightSecondPos];
1159 
1160 		if(verbose){
1161 			outstream.println("kmer: "+toText(kmer)+", "+toText(rkmer));
1162 			outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts));
1163 			outstream.println("rightMaxPos="+rightMaxPos);
1164 			outstream.println("rightMax="+rightMax);
1165 			outstream.println("rightSecondPos="+rightSecondPos);
1166 			outstream.println("rightSecond="+rightSecond);
1167 		}
1168 
1169 		if(rightMax<minCountExtend){
1170 			if(verbose || verbose2){outstream.println("Returning because rightMax was too low: "+rightMax+"<"+minCountExtend+"\n"+count+", "+Arrays.toString(rightCounts));}
1171 			return 0;
1172 		}
1173 		if(isJunction(rightMax, rightSecond, leftMax, leftSecond)){
1174 			if(verbose || verbose2){outstream.println("Returning because isJunction: "+rightMax+", "+rightSecond+"; "+leftMax+", "+leftSecond);}
1175 			return 0;
1176 		}
1177 
1178 		final int maxLen=Tools.min(bb.length()+distance, maxContigLen);
1179 
1180 		while(bb.length()<maxLen){
1181 
1182 			//Generate the new kmer
1183 			final byte b=AminoAcid.numberToBase[rightMaxPos];
1184 			final long x=rightMaxPos;
1185 			final long x2=AminoAcid.numberToComplement[(int)x];
1186 
1187 			final long evicted=(kmer>>>shift2); //Binary value that falls off the end.
1188 
1189 			//Now consider the next kmer
1190 			kmer=((kmer<<2)|(long)x)&mask;
1191 			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1192 
1193 			key=toValue(kmer, rkmer);
1194 			table=tables.getTableForKey(key);
1195 
1196 			assert(table.getValue(key)==rightMax || rightMax==0);
1197 			count=rightMax;
1198 
1199 			assert(count>=minCountExtend) : count;
1200 
1201 			if(leftCounts!=null){
1202 				leftMaxPos=fillLeftCounts(kmer, rkmer, leftCounts);
1203 				leftMax=leftCounts[leftMaxPos];
1204 				leftSecondPos=Tools.secondHighestPosition(leftCounts);
1205 				leftSecond=leftCounts[leftSecondPos];
1206 			}
1207 
1208 			rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
1209 			rightMax=rightCounts[rightMaxPos];
1210 			rightSecondPos=Tools.secondHighestPosition(rightCounts);
1211 			rightSecond=rightCounts[rightSecondPos];
1212 
1213 			if(verbose){
1214 				outstream.println("kmer: "+toText(kmer)+", "+toText(rkmer));
1215 				outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts));
1216 				outstream.println("rightMaxPos="+rightMaxPos);
1217 				outstream.println("rightMax="+rightMax);
1218 				outstream.println("rightSecondPos="+rightSecondPos);
1219 				outstream.println("rightSecond="+rightSecond);
1220 			}
1221 
1222 			if(isJunction(rightMax, rightSecond, leftMax, leftSecond)){
1223 				if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");}
1224 				if(includeJunctionBase && kmer>rkmer){
1225 					bb.append(b);
1226 					if(verbose){outstream.println("Added base "+(char)b);}
1227 				}
1228 				break;
1229 			}
1230 
1231 			if(leftCounts!=null && leftMaxPos!=evicted){
1232 				if(verbose){outstream.println("B: Breaking because of hidden branch: leftMaxPos!=evicted ("+leftMaxPos+"!="+evicted+")" +
1233 						"\nleftMaxPos="+leftMaxPos+", leftMax="+leftMax+", leftSecondPos="+leftSecondPos+", leftSecond="+leftSecond);}
1234 				if(includeJunctionBase && kmer>rkmer){
1235 					bb.append(b);
1236 					if(verbose){outstream.println("Added base "+(char)b);}
1237 				}
1238 				break;
1239 			}
1240 
1241 			bb.append(b);
1242 			if(verbose){outstream.println("Added base "+(char)b);}
1243 
1244 			if(rightMax<minCountExtend){
1245 				if(verbose || verbose2){outstream.println("C: Breaking because highest right was too low: "+rightMax+"<"+minCountExtend);}
1246 				break;
1247 			}
1248 		}
1249 		if(verbose || verbose2){outstream.println("Extended by "+(bb.length()-initialLength));}
1250 		return bb.length()-initialLength;
1251 	}
1252 
1253 
1254 	/*--------------------------------------------------------------*/
1255 	/*----------------        Junk Detection        ----------------*/
1256 	/*--------------------------------------------------------------*/
1257 
1258 	@Override
isJunk(Read r)1259 	public boolean isJunk(Read r){
1260 		boolean junk=isJunk(r, localRightCounts.get());
1261 		return junk;
1262 	}
1263 
1264 	@Override
isJunk(Read r, final int[] counts, Kmer kmer)1265 	public boolean isJunk(Read r, final int[] counts, Kmer kmer){
1266 		return isJunk(r, counts);
1267 	}
1268 
isJunk(Read r, final int[] counts)1269 	public boolean isJunk(Read r, final int[] counts){
1270 		final int blen=r.length();
1271 		if(blen<k){return true;}
1272 		final byte[] bases=r.bases;
1273 
1274 		long kmer=0;
1275 		long rkmer=0;
1276 		int len=0;
1277 
1278 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */
1279 		{
1280 			for(int i=0; i<k; i++){
1281 				final byte b=bases[i];
1282 				final long x=AminoAcid.baseToNumber[b];
1283 				final long x2=AminoAcid.baseToComplementNumber[b];
1284 				kmer=((kmer<<2)|x)&mask;
1285 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1286 				if(x<0){
1287 					len=0;
1288 					kmer=rkmer=0;
1289 				}else{len++;}
1290 				if(verbose){outstream.println("C: Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1291 			}
1292 		}
1293 
1294 		if(len>=k){
1295 			int maxPos=fillLeftCounts(kmer, rkmer, counts);
1296 			if(counts[maxPos]>0){
1297 				//outstream.println("Not junk, left counts[maxPos]="+counts[maxPos]+"; right="+fillRightCounts(kmer, rkmer, counts, mask, shift2));
1298 				return false;
1299 			}
1300 		}
1301 
1302 		final boolean paired=(r.mateLength()>=k);
1303 		int maxDepth=0;
1304 		{
1305 			for(int i=k; i<blen; i++){
1306 				final byte b=bases[i];
1307 				final long x=AminoAcid.baseToNumber[b];
1308 				final long x2=AminoAcid.baseToComplementNumber[b];
1309 				kmer=((kmer<<2)|x)&mask;
1310 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1311 				if(x<0){
1312 					len=0;
1313 					kmer=rkmer=0;
1314 				}else{
1315 					len++;
1316 					if(len>=k){
1317 						int depth=getCount(kmer, rkmer);
1318 						if(depth>maxDepth){
1319 							maxDepth=depth;
1320 							if(maxDepth>1 && (!paired || maxDepth>2)){
1321 								//outstream.println("Not junk, maxDepth="+maxDepth);
1322 								return false;
1323 							}
1324 						}
1325 					}
1326 				}
1327 				if(verbose){outstream.println("D: Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1328 			}
1329 		}
1330 
1331 		if(len>=k && !paired){
1332 			int maxPos=fillRightCounts(kmer, rkmer, counts);
1333 			if(counts[maxPos]>0){
1334 				//outstream.println("Not junk, right counts[maxPos]="+counts[maxPos]);
1335 				return false;
1336 			}
1337 		}
1338 		return true;
1339 	}
1340 
1341 	@Override
hasKmersAtOrBelow(Read r, int tooLow, final float fraction, Kmer kmer)1342 	public boolean hasKmersAtOrBelow(Read r, int tooLow, final float fraction, Kmer kmer){
1343 		return hasKmersAtOrBelow(r, tooLow, fraction);
1344 	}
1345 
1346 	@Override
hasKmersAtOrBelow(Read r, final int tooLow, final float fraction)1347 	public boolean hasKmersAtOrBelow(Read r, final int tooLow, final float fraction){
1348 		final int blen=r.length();
1349 		if(blen<k){return true;}
1350 		final byte[] bases=r.bases;
1351 
1352 //		outstream.println("\n"+new String(r.bases)+":");
1353 
1354 		long kmer=0;
1355 		long rkmer=0;
1356 		int len=0;
1357 
1358 		final int limit=Tools.max(1, Math.round((bases.length-kbig+1)*fraction));
1359 		int valid=0, invalid=0;
1360 
1361 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */
1362 		{
1363 			for(int i=0; i<blen; i++){
1364 				final byte b=bases[i];
1365 				final long x=AminoAcid.baseToNumber[b];
1366 				final long x2=AminoAcid.baseToComplementNumber[b];
1367 				kmer=((kmer<<2)|x)&mask;
1368 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1369 				if(x<0){
1370 					len=0;
1371 					kmer=rkmer=0;
1372 				}else{
1373 					len++;
1374 					if(len>=k){
1375 						int depth=getCount(kmer, rkmer);
1376 //						outstream.println("depth="+depth+", kmer="+toText(kmer));
1377 						if(depth>tooLow){valid++;}
1378 						else{
1379 							invalid++;
1380 							if(invalid>=limit){return true;}
1381 						}
1382 					}
1383 				}
1384 				if(verbose){outstream.println("E: Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1385 			}
1386 		}
1387 
1388 		//Compensate for nocalls changing the expected kmer count
1389 		final int limit2=Tools.max(1, Math.round((valid+invalid)*fraction));
1390 		return valid<1 || invalid>=limit2;
1391 	}
1392 
1393 
1394 	/*--------------------------------------------------------------*/
1395 	/*----------------       Error Correction       ----------------*/
1396 	/*--------------------------------------------------------------*/
1397 
1398 	@Override
errorCorrect(Read r)1399 	public int errorCorrect(Read r){
1400 		initializeThreadLocals();
1401 		int corrected=errorCorrect(r, localLeftCounts.get(), localRightCounts.get(), localLongList.get(),
1402 				localIntList.get(), localIntList2.get(), localByteBuilder.get(), localByteBuilder2.get(), localTracker.get(), localBitSet.get());
1403 		return corrected;
1404 	}
1405 
1406 	@Override
errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2, final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, Kmer kmer, Kmer kmer2)1407 	public int errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2,
1408 			final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, Kmer kmer, Kmer kmer2){
1409 		return errorCorrect(r, leftCounts, rightCounts, kmers, counts, counts2, bb, bb2, tracker, bs);
1410 	}
1411 
hasErrorsFast(LongList kmers)1412 	boolean hasErrorsFast(LongList kmers){
1413 		if(kmers.size<1){return false;}
1414 		int prev=-1;
1415 
1416 		final int incr=Tools.mid(1, k/2, 9), mcc=minCountCorrect();
1417 		for(int i=0; i<kmers.size; i+=incr){
1418 			long kmer=kmers.get(i);
1419 			if(kmer<0){
1420 				return true;
1421 			}
1422 			long rkmer=rcomp(kmer);
1423 			int count=getCount(kmer, rkmer);
1424 			final int min=Tools.min(count, prev), max=Tools.max(count, prev);
1425 			if(count<mcc || (i>0 && (isError(max+1, min-1)))){return true;}
1426 			prev=count;
1427 		}
1428 
1429 		long kmer=kmers.get(kmers.size()-1);
1430 		if(kmer<0){return true;}
1431 		long rkmer=rcomp(kmer);
1432 		int count=getCount(kmer, rkmer);
1433 		final int min=Tools.min(count, prev), max=Tools.max(count, prev);
1434 		return count<mcc || isError(max+1, min-1);
1435 	}
1436 
errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2, final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs)1437 	public int errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2,
1438 			final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs){
1439 
1440 		final byte[] bases=r.bases;
1441 		final byte[] quals=r.quality;
1442 		tracker.clear();
1443 		int valid=tables.fillKmers(bases, kmers);
1444 		if(valid<2){return 0;}
1445 		if(!r.containsUndefined() && !hasErrorsFast(kmers)){return 0;}
1446 
1447 		tables.fillCounts(kmers, counts);
1448 		final int possibleErrors=tracker.suspected=countErrors(counts, quals);
1449 		if(possibleErrors<0){return 0;}
1450 		final float expectedErrors=r.expectedErrors(true, r.length());
1451 		final Rollback roll=ECC_ROLLBACK ? new Rollback(r, counts) : null;
1452 
1453 		assert(counts.size>0);
1454 
1455 		int correctedPincer=0;
1456 		int correctedTail=0;
1457 		int correctedBrute=0;
1458 		int correctedReassemble=0;
1459 
1460 		if(ECC_PINCER){
1461 			correctedPincer+=errorCorrectPincer(bases, quals, leftCounts, rightCounts, kmers, counts, bb, tracker, errorExtensionPincer);
1462 		}
1463 
1464 		if(ECC_TAIL || ECC_ALL){
1465 			int start=(ECC_ALL ? 0 : counts.size-k-1);
1466 //			if(ECC_PINCER && tracker!=null && tracker.detected>correctedPincer){start=start-k;}
1467 			correctedTail+=errorCorrectTail(bases, quals, leftCounts, rightCounts, kmers, counts, bb, tracker, start, errorExtensionTail);
1468 			r.reverseComplement();
1469 			valid=tables.fillKmers(bases, kmers);
1470 			counts.reverse();
1471 			correctedTail+=errorCorrectTail(bases, quals, leftCounts, rightCounts, kmers, counts, bb, tracker, start, errorExtensionTail);
1472 			r.reverseComplement();
1473 			counts.reverse();
1474 		}
1475 
1476 		if(ECC_REASSEMBLE){
1477 			if(verbose){outstream.println("Correcting "+possibleErrors+" errors.  Counts:\n"+counts);}
1478 			if((correctedPincer<1 && correctedTail<1) || countErrors(counts, quals)>0){
1479 				correctedReassemble=reassemble(bases, quals, rightCounts, counts, counts2, tracker, errorExtensionReassemble, bb, bb2, null, null, bs);
1480 			}
1481 //			assert(tracker.detectedReassemble>0) : counts;
1482 			if(verbose){outstream.println("Corrected  "+correctedReassemble+" errors.  Counts:\n"+counts);}
1483 		}
1484 		assert(counts.size>0);
1485 
1486 		//123 For testing.
1487 		if(false && tracker.detected()>tracker.corrected()){
1488 			correctedBrute+=errorCorrectBruteForce(bases, quals, leftCounts, rightCounts, kmers, counts, bb, tracker, errorExtensionPincer);
1489 		}
1490 
1491 		assert(correctedPincer+correctedTail+correctedReassemble+correctedBrute==tracker.corrected())
1492 			: correctedPincer+", "+correctedTail+", "+correctedReassemble+", "+correctedBrute+", "+tracker;
1493 
1494 		if(ECC_ROLLBACK && (tracker.corrected()>0 || tracker.rollback)){
1495 
1496 			if(!tracker.rollback && quals!=null && tracker.corrected()>3){
1497 				float mult=Tools.max(1, 0.5f*(0.5f+0.01f*r.length()));//1 for a 150bp read.
1498 				if(countErrors(counts, quals)>0 && tracker.corrected()>mult+expectedErrors){tracker.rollback=true;}
1499 				else if(tracker.corrected()>2.5f*mult+expectedErrors){tracker.rollback=true;}
1500 			}
1501 
1502 //			boolean printed=false;
1503 			IntList counts0=roll.counts0;
1504 			for(int i=0; !tracker.rollback && i<counts.size; i++){
1505 				int a=Tools.max(0, counts0.get(i));
1506 				int b=Tools.max(0, counts.get(i));
1507 //				assert(b+1>=a) : "Z: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts;
1508 				if(b<a-1 && !isSimilar(a, b)){
1509 //					assert(false) : "Y: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts;
1510 					if(verbose){outstream.println("Y: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts);}
1511 					tracker.rollback=true;
1512 				}
1513 //				else if(b<a-1 && !printed){
1514 //					assert(false);
1515 //					if(verbose){outstream.println("X: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts);}
1516 //					printed=true;
1517 //				}
1518 			}
1519 
1520 			if(tracker.rollback){
1521 				roll.rollback(r, counts);
1522 				tracker.clearCorrected();
1523 				return 0;
1524 			}
1525 		}
1526 
1527 		if(MARK_BAD_BASES>0 && (!MARK_ERROR_READS_ONLY || countErrors(counts, quals)>0 ||
1528 				r.expectedErrors(false, r.length())>3)){
1529 			int marked=markBadBases(bases, quals, counts, bs, MARK_BAD_BASES, MARK_DELTA_ONLY, MARK_QUALITY);
1530 			tracker.marked=marked;
1531 		}
1532 
1533 		return tracker.corrected();
1534 	}
1535 
findBestMutant(final byte[] bases, final int a, final LongList kmers, final long[] bestMutant)1536 	private int findBestMutant(final byte[] bases, final int a, final LongList kmers, final long[] bestMutant){
1537 		Arrays.fill(bestMutant, -1);
1538 		final long kmer0=kmers.get(a);
1539 		if(kmer0<0){return -1;}
1540 		long mask=3;
1541 		int max=0, second=0;
1542 		for(int i=0; i<k; i++){
1543 			for(long num=0; num<4; num++){
1544 				final long kmer=(kmer0&~mask)|(num<<(2*i));
1545 				if(kmer!=kmer0){
1546 					final long rkmer=rcomp(kmer);
1547 					int count=getCount(kmer, rkmer);
1548 					if(count>max){
1549 						second=max;
1550 						max=count;
1551 						bestMutant[0]=kmer;
1552 						bestMutant[1]=rkmer;
1553 						bestMutant[2]=i;
1554 					}else if(count>second){
1555 						second=count;
1556 					}
1557 				}
1558 			}
1559 			mask<<=2;
1560 		}
1561 		bestMutant[3]=max;
1562 		bestMutant[4]=second;
1563 		return max>second ? max : -1;
1564 	}
1565 
fixBestMutant(final byte[] bases, final int a, final LongList kmers, final IntList counts, final int pos, final long kmer)1566 	private boolean fixBestMutant(final byte[] bases, final int a, final LongList kmers, final IntList counts, final int pos, final long kmer){
1567 		int shift=2*pos;
1568 		long mask=(3L)<<shift;
1569 		byte num=(byte)((kmer>>shift)&mask);
1570 		byte base=AminoAcid.numberToBase[num];
1571 		byte old=bases[a+pos];
1572 		bases[a+pos]=base;
1573 
1574 //		long[] copy=counts.toArray();
1575 
1576 //		tables.regenerateKmers(bases, kmers, counts, a);
1577 		tables.fillKmers(bases, kmers);
1578 		tables.fillCounts(kmers, counts);
1579 
1580 //		for(int i=0; i<copy.length; i++){
1581 //			if(counts.get(i)<copy[i]){
1582 //				bases[a+pos]=old;
1583 //				tables.fillKmers(bases, kmers);
1584 //				tables.fillCounts(kmers, counts);
1585 //				return false;
1586 //			}
1587 //		}
1588 
1589 		return true;
1590 	}
1591 
errorCorrectBruteForce(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final LongList kmers, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension)1592 	public int errorCorrectBruteForce(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1593 			final LongList kmers, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension){
1594 
1595 		int detected=0;
1596 		int corrected=0;
1597 
1598 		final long[] bestMutant=new long[5];
1599 		final int[] countCopy=counts.toArray();
1600 		final byte[] basesCopy=bases.clone();
1601 
1602 		final int maxCount=Tools.max(counts.array, 0, counts.size-1);
1603 		if(maxCount<minCountCorrect()){
1604 			return 0;
1605 		}
1606 		int thresh=Tools.max(3, maxCount/4);
1607 
1608 		for(int a=0, d=k+1; a<counts.size; a++, d++){
1609 			final int aCount=counts.get(a);
1610 			if(aCount<thresh){
1611 				detected++;
1612 				boolean fixed=false;
1613 				int mCount=findBestMutant(bases, a, kmers, bestMutant);
1614 				if(mCount>thresh && mCount>=minCountCorrect() && (isSimilar(mCount, maxCount) || (mCount<maxCount && mCount>(1+aCount)*3))){
1615 					long kmer=bestMutant[0];
1616 					long rkmer=bestMutant[1];
1617 					int pos=(int)bestMutant[2];
1618 					assert(mCount==bestMutant[3]);
1619 					int secondBestCount=(int)bestMutant[4];
1620 					if(mCount>(1+secondBestCount*3) && pos>=errorExtension && pos<=(k-errorExtension)){
1621 						fixBestMutant(bases, a, kmers, counts, pos, kmer);
1622 						fixed=true;
1623 						corrected++;
1624 					}
1625 				}
1626 			}
1627 		}
1628 
1629 		for(int i=0; i<countCopy.length; i++){
1630 			if(counts.get(i)<countCopy[i]){
1631 				for(int j=0; j<bases.length; j++){bases[j]=basesCopy[j];}
1632 				counts.clear();
1633 				for(int count : countCopy){counts.add(count);}
1634 				tables.fillKmers(bases, kmers);
1635 				corrected=0;
1636 			}
1637 		}
1638 
1639 		{
1640 			tracker.detectedBrute+=detected;
1641 			tracker.correctedBrute+=corrected;
1642 		}
1643 
1644 		return corrected;
1645 	}
1646 
errorCorrectPincer(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final LongList kmers, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension)1647 	public int errorCorrectPincer(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1648 			final LongList kmers, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension){
1649 
1650 		int detected=0;
1651 		int corrected=0;
1652 
1653 		//a is the index of the left kmer
1654 		//b is a+1 (right-extension of left kmer)
1655 		//c is d-1 (left-extension of right kmer)
1656 		//d is the index of the right kmer
1657 		//the base between the kmers is at a+k
1658 		for(int a=0, d=k+1; d<counts.size; a++, d++){
1659 			final int aCount=counts.get(a);
1660 			final int bCount=counts.get(a+1);
1661 			final int cCount=counts.get(d-1);
1662 			final int dCount=counts.get(d);
1663 			final byte qb=(quals==null ? 20 : quals[a+kbig]);
1664 			if(isError(aCount, bCount, qb) && isError(dCount, cCount, qb) && isSimilar(aCount, dCount)){
1665 				if(verbose){
1666 					outstream.println("Found error: "+aCount+", "+bCount+", "+cCount+", "+dCount);
1667 				}
1668 				//Looks like a 1bp substitution; attempt to correct.
1669 				detected++;
1670 				int ret=correctSingleBasePincer(a, d, bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, errorExtension);
1671 				corrected+=ret;
1672 				if(verbose){
1673 					outstream.println("Corrected error.");
1674 				}
1675 			}else{
1676 				if(verbose){
1677 					outstream.println("Not an error: "+aCount+", "+bCount+", "+cCount+", "+dCount+
1678 							";  "+isError(aCount, bCount, qb)+", "+isError(dCount, cCount, qb)+", "+isSimilar(aCount, dCount));
1679 				}
1680 			}
1681 		}
1682 
1683 //		if(detected==0 && counts.get(0)>2 && counts.get(counts.size-1)>2){
1684 //			assert(!verbose);
1685 //			verbose=true;
1686 //			outstream.println("\n"+counts);
1687 //			errorCorrectPincer(bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, tracker);
1688 //			assert(false);
1689 //		}
1690 
1691 		{
1692 			tracker.detectedPincer+=detected;
1693 			tracker.correctedPincer+=corrected;
1694 		}
1695 
1696 		return corrected;
1697 	}
1698 
errorCorrectTail(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final LongList kmers, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int startPos, final int errorExtension)1699 	public int errorCorrectTail(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1700 			final LongList kmers, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int startPos, final int errorExtension){
1701 		if(bases.length<k+2+errorExtension+deadZone){return 0;}
1702 		int detected=0;
1703 		int corrected=0;
1704 
1705 		//a is the index of the left kmer
1706 		//b is a+1
1707 		//the base between the kmers is at a+k
1708 		for(int a=Tools.max(startPos, errorExtension), lim=counts.size-deadZone-1; a<lim; a++){//errorExtension-1
1709 			final int aCount=counts.get(a);
1710 			final int bCount=counts.get(a+1);
1711 			final byte qb=(quals==null ? 20 : quals[a+kbig]);
1712 			if(isError(aCount, bCount, qb) && isSimilar(aCount, a-errorExtension, a-1, counts) && isError(aCount, a+2, a+k, counts)){
1713 				if(verbose){
1714 					outstream.println("Found error: "+aCount+", "+bCount);
1715 				}
1716 				//Assume like a 1bp substitution; attempt to correct.
1717 				detected++;
1718 				int ret=correctSingleBaseRight(a, bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, errorExtension);
1719 				corrected+=ret;
1720 				if(verbose){
1721 					outstream.println("Corrected error.");
1722 				}
1723 			}else{
1724 				if(verbose){
1725 					outstream.println("Not an error: "+aCount+", "+bCount+
1726 							";  "+isError(aCount, bCount, qb)+", "+isSimilar(aCount, a-errorExtension, a-1, counts)+", "+isError(aCount, a+2, a+k, counts));
1727 				}
1728 			}
1729 		}
1730 
1731 //		if(detected==0 && counts.get(0)>2 && counts.get(counts.size-1)>2){
1732 //			assert(!verbose);
1733 //			verbose=true;
1734 //			outstream.println("\n"+counts);
1735 //			errorCorrectPincer(bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, tracker);
1736 //			assert(false);
1737 //		}
1738 
1739 		{
1740 			tracker.detectedTail+=detected;
1741 			tracker.correctedTail+=corrected;
1742 		}
1743 
1744 		return corrected;
1745 	}
1746 
1747 //	public int reassemble_inner(final ByteBuilder bases, final byte[] quals, final int[] rightCounts, final IntList counts,
1748 //			final int errorExtension, final Kmer kmer, final Kmer kmer2){
1749 //		throw new RuntimeException("TODO");
1750 //	}
1751 
1752 	@Override
reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts, final int errorExtension, final Kmer kmer, final Kmer regenKmer)1753 	public int reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts,
1754 			final int errorExtension, final Kmer kmer, final Kmer regenKmer){
1755 		return reassemble_inner(bb, quals, rightCounts, counts, errorExtension);
1756 	}
1757 
reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts, final int errorExtension)1758 	public int reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts,
1759 			final int errorExtension){
1760 		final int length=bb.length();
1761 		if(length<k+1+deadZone){return 0;}
1762 		final byte[] bases=bb.array;
1763 
1764 		long kmer=0, rkmer=0;
1765 
1766 		int detected=0;
1767 		int corrected=0;
1768 		int len=0;
1769 
1770 		//a is the index of the first base of the left kmer
1771 		//b=a+1 is the index of the next base
1772 		//ca=a-k is the index of the next base
1773 		//cb=a-k is the index of the next base
1774 		//the base between the kmers is at a+k
1775 		for(int a=0, lim=length-deadZone-1; a<lim; a++){
1776 
1777 			//Generate the new kmer
1778 			final byte aBase=bases[a];
1779 			final long x=AminoAcid.baseToNumber[aBase];
1780 			final long x2=AminoAcid.baseToComplementNumber[aBase];
1781 
1782 			if(x<0){
1783 				len=0;
1784 				kmer=rkmer=0;
1785 			}else{
1786 
1787 
1788 				//Now consider the next kmer
1789 				kmer=((kmer<<2)|(long)x)&mask;
1790 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1791 				len++;
1792 
1793 				if(verbose){
1794 					outstream.println("len: "+len+" vs "+k+"; a="+a);
1795 				}
1796 
1797 				if(len>=k){
1798 
1799 					final int b=a+1;
1800 					final int ca=a-k+1;
1801 					final int cb=ca+1;
1802 
1803 					final int aCount=counts.get(ca);
1804 					final int bCount=counts.get(cb);
1805 					final byte qb=(quals==null ? 20 : quals[b]);
1806 
1807 					if(verbose){
1808 						outstream.println("ca="+ca+", cb="+cb+"; aCount="+aCount+", bCount="+bCount);
1809 						outstream.println(isError(aCount, bCount, qb)+", "+isSimilar(aCount, ca-errorExtension, ca-1, counts)+
1810 								", "+isError(aCount, ca+2, ca+k, counts));
1811 					}
1812 
1813 //					if(isError(aCount, bCount) && isSimilar(aCount, ca-errorExtension, ca-1, counts) && isError(aCount, ca+2, ca+k, counts)){
1814 					if(isSubstitution(ca, errorExtension, qb, counts)){
1815 						if(verbose){
1816 							outstream.println("***Found error: "+aCount+", "+bCount);
1817 						}
1818 						//Assume a 1bp substitution; attempt to correct.
1819 
1820 						int rightMaxPos=fillRightCounts(kmer, rkmer, rightCounts);
1821 						int rightMax=rightCounts[rightMaxPos];
1822 						int rightSecondPos=Tools.secondHighestPosition(rightCounts);
1823 						int rightSecond=rightCounts[rightSecondPos];
1824 
1825 						byte base=bases[b];
1826 						byte num=AminoAcid.baseToNumber[base];
1827 
1828 						if(rightMax>=minCountExtend){
1829 							detected++;
1830 							if(num==rightMax){
1831 								detected--;
1832 								//							bases2[b]=base;
1833 							}else if((isError(rightMax, rightSecond, qb) || !isJunction(rightMax, rightSecond)) && isSimilar(aCount, rightMax)){
1834 								bases[b]=AminoAcid.numberToBase[rightMaxPos];
1835 								corrected++;
1836 								tables.regenerateCounts(bases, counts, ca);
1837 								if(verbose){outstream.println("Corrected error: "+num+"->"+rightMaxPos+". New counts:\n"+counts);}
1838 							}
1839 
1840 							//						else if(rightSecond>=minCountExtend && isJunction(rightMax, rightSecond) && isSimilar(aCount, rightSecond)
1841 							//								&& !isSimilar(aCount, rightMax)){//This branch may not be very safe.
1842 							//							bases2[b]=AminoAcid.numberToBase[rightSecondPos];
1843 							//							corrected++;
1844 							//							if(verbose){outstream.println("Corrected error.");}
1845 							//						}
1846 						}
1847 
1848 					}else{
1849 						if(verbose){
1850 							outstream.println("Not an error: "+aCount+", "+bCount+
1851 									";  "+isError(aCount, bCount, qb)+", "+isSimilar(aCount, a-errorExtension, a-1, counts)+", "+isError(aCount, a+2, a+k, counts));
1852 						}
1853 					}
1854 				}
1855 			}
1856 		}
1857 
1858 		return corrected;
1859 	}
1860 
correctSingleBasePincer(final int a, final int d, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final LongList kmers, final IntList counts, final ByteBuilder bb, final int errorExtension)1861 	private int correctSingleBasePincer(final int a, final int d, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1862 			final LongList kmers, final IntList counts, final ByteBuilder bb, final int errorExtension){
1863 		final byte leftReplacement, rightReplacement;
1864 		final int loc=a+k;
1865 		{
1866 			bb.clear();
1867 			final long kmer=kmers.get(a);
1868 			final long rkmer=rcomp(kmer);
1869 			int extension=extendToRight2(bb, null, rightBuffer, errorExtension, true, kmer, rkmer);
1870 			if(extension<errorExtension){return 0;}
1871 			for(int i=1; i<extension; i++){
1872 				if(bb.get(i)!=bases[loc+i]){
1873 					return 0;
1874 				}
1875 			}
1876 			leftReplacement=bb.get(0);
1877 		}
1878 		{
1879 			bb.clear();
1880 			final long rkmer=kmers.get(d);
1881 			final long kmer=rcomp(rkmer);
1882 			int extension=extendToRight2(bb, null, rightBuffer, errorExtension, true, kmer, rkmer);
1883 			if(extension<errorExtension){return 0;}
1884 			bb.reverseComplementInPlace();
1885 			for(int i=0; i<extension-1; i++){
1886 				if(bb.get(i)!=bases[loc+i+1-extension]){
1887 					return 0;
1888 				}
1889 			}
1890 			rightReplacement=bb.get(extension-1);
1891 		}
1892 		if(leftReplacement!=rightReplacement){return 0;}
1893 		if(bases[loc]==leftReplacement){return 0;}
1894 		if(!isSimilar(a, leftReplacement, kmers, counts)){return 0;}
1895 
1896 		bases[loc]=leftReplacement;
1897 		assert(d==a+k+1);
1898 		tables.regenerateKmers(bases, kmers, counts, a);
1899 		return 1;
1900 	}
1901 
correctSingleBaseRight(final int a, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final LongList kmers, final IntList counts, final ByteBuilder bb, final int errorExtension0)1902 	private int correctSingleBaseRight(final int a, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1903 			final LongList kmers, final IntList counts, final ByteBuilder bb, final int errorExtension0){
1904 		final byte leftReplacement;
1905 		final int loc=a+k;
1906 		final int errorExtension=Tools.min(errorExtension0, bases.length-loc);
1907 		{
1908 			bb.clear();
1909 			final long kmer=kmers.get(a);
1910 			final long rkmer=rcomp(kmer);
1911 			int extension=extendToRight2(bb, null, rightBuffer, errorExtension, true, kmer, rkmer);
1912 			if(extension<errorExtension){return 0;}
1913 			for(int i=1; i<extension; i++){
1914 				if(bb.get(i)!=bases[loc+i]){
1915 					return 0;
1916 				}
1917 			}
1918 			leftReplacement=bb.get(0);
1919 		}
1920 
1921 		if(bases[loc]==leftReplacement){return 0;}
1922 		if(!isSimilar(a, leftReplacement, kmers, counts)){return 0;}
1923 
1924 		bases[loc]=leftReplacement;
1925 		tables.regenerateKmers(bases, kmers, counts, a);
1926 		return 1;
1927 	}
1928 
isSimilar(int a, byte newBase, LongList kmers, IntList counts)1929 	private boolean isSimilar(int a, byte newBase, LongList kmers, IntList counts){
1930 		long kmer=kmers.get(a);
1931 
1932 		final long x=AminoAcid.baseToNumber[newBase];
1933 		kmer=((kmer<<2)|x)&mask;
1934 		long rkmer=rcomp(kmer);
1935 		int count=getCount(kmer, rkmer);
1936 		int aCount=counts.get(a);
1937 		boolean similar=isSimilar(aCount, count);
1938 		return similar;
1939 	}
1940 
1941 	/*--------------------------------------------------------------*/
1942 	/*----------------  Inherited Abstract Methods  ----------------*/
1943 	/*--------------------------------------------------------------*/
1944 
1945 	@Override
makeKhist()1946 	final void makeKhist(){
1947 		tables.makeKhist(outHist, histColumns, histMax, histHeader, histZeros, true, smoothHist, gcHist, false, 0.01, 1, 1);
1948 	}
1949 	@Override
dumpKmersAsText()1950 	final void dumpKmersAsText(){
1951 		tables.dumpKmersAsBytes_MT(outKmers, minToDump, maxToDump, true, null);
1952 	}
1953 
1954 	/*--------------------------------------------------------------*/
1955 	/*----------------            Fields            ----------------*/
1956 	/*--------------------------------------------------------------*/
1957 
1958 	@Override
tables()1959 	public final KmerTableSet tables(){return tables;}
1960 	public final KmerTableSet tables;
1961 
1962 	/** Normal kmer length */
1963 	final int k;
1964 	/** k-1; used in some expressions */
1965 	final int k2;
1966 
1967 	final int shift;
1968 	final int shift2;
1969 	final long mask;
1970 
1971 }
1972