1 package assemble;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.BitSet;
6 import java.util.concurrent.atomic.AtomicInteger;
7 
8 import dna.AminoAcid;
9 import jgi.BBMerge;
10 import shared.KillSwitch;
11 import shared.Shared;
12 import shared.Timer;
13 import shared.Tools;
14 import stream.ConcurrentReadInputStream;
15 import stream.Read;
16 import structures.ByteBuilder;
17 import structures.IntList;
18 import structures.ListNum;
19 import structures.LongList;
20 import ukmer.AbstractKmerTableU;
21 import ukmer.HashArrayU1D;
22 import ukmer.HashForestU;
23 import ukmer.Kmer;
24 import ukmer.KmerNodeU;
25 import ukmer.KmerTableSetU;
26 
27 
28 /**
29  * Long-kmer assembler based on KmerCountExact.
30  * @author Brian Bushnell
31  * @date May 15, 2015
32  *
33  */
34 public class Tadpole2 extends Tadpole {
35 
36 	/**
37 	 * Code entrance from the command line.
38 	 * @param args Command line arguments
39 	 */
main(String[] args)40 	public static void main(String[] args){
41 		Timer t=new Timer(), t2=new Timer();
42 		t.start();
43 		t2.start();
44 
45 		//Create a new CountKmersExact instance
46 		Tadpole2 wog=new Tadpole2(args, true);
47 		t2.stop();
48 		outstream.println("Initialization Time:      \t"+t2);
49 
50 		///And run it
51 		wog.process(t);
52 	}
53 
54 	/**
55 	 * Constructor.
56 	 * @param args Command line arguments
57 	 */
Tadpole2(String[] args, boolean setDefaults)58 	public Tadpole2(String[] args, boolean setDefaults){
59 		super(args, setDefaults);
60 
61 		final int extraBytesPerKmer;
62 		{
63 			int x=0;
64 			if(useOwnership){x+=4;}
65 			if(processingMode==correctMode || processingMode==discardMode){}
66 			else if(processingMode==contigMode || processingMode==extendMode){x+=1;}
67 			extraBytesPerKmer=x;
68 		}
69 
70 		tables=new KmerTableSetU(args, extraBytesPerKmer);
71 		assert(kbig==tables.kbig) : kbig+", "+tables.kbig;
72 //		kbig=tables.kbig;
73 		ksmall=tables.k;
74 //		k2=tables.k2;
75 //		ways=tables.ways;
76 	}
77 
78 
79 	/*--------------------------------------------------------------*/
80 	/*----------------         Outer Methods        ----------------*/
81 	/*--------------------------------------------------------------*/
82 
83 
84 	/*--------------------------------------------------------------*/
85 	/*----------------         Inner Methods        ----------------*/
86 	/*--------------------------------------------------------------*/
87 
88 	@Override
initializeOwnership()89 	void initializeOwnership(){
90 		tables.initializeOwnership();
91 	}
92 
93 	@Override
shave(boolean shave, boolean rinse)94 	long shave(boolean shave, boolean rinse){
95 		long sum=0;
96 
97 		for(int i=0; i<maxShaveDepth; i++){
98 			int a=1, b=maxShaveDepth, c=i+1;
99 			//				if(i>3){Shaver2.verbose2=true;}
100 			outstream.println("\nShave("+a+", "+b+", "+c+")");
101 			final Shaver shaver=Shaver.makeShaver(tables, THREADS, a, b, c, minCountExtend, branchMult2, Tools.max(minContigLen, shaveDiscardLen), shaveExploreDist, shave, rinse);
102 			long removed=shaver.shave(a, b);
103 
104 			sum+=removed;
105 			if(removed<100 || i>2){break;}
106 		}
107 
108 		outstream.println();
109 		return sum;
110 	}
111 
112 	@Override
loadKmers(Timer t)113 	public long loadKmers(Timer t){
114 		tables.process(t);
115 		return tables.kmersLoaded;
116 	}
117 
118 	/*--------------------------------------------------------------*/
119 	/*----------------        Recall Methods        ----------------*/
120 	/*--------------------------------------------------------------*/
121 
getCount(Kmer kmer)122 	public final int getCount(Kmer kmer){return tables.getCount(kmer);}
claim(Kmer kmer, int id)123 	final boolean claim(Kmer kmer, int id){return tables.claim(kmer, id);}
doubleClaim(ByteBuilder bb, int id , Kmer kmer)124 	final boolean doubleClaim(ByteBuilder bb, int id/*, long rid*/, Kmer kmer){return tables.doubleClaim(bb, id/*, rid*/, kmer);}
claim(ByteBuilder bb, int id, boolean earlyExit, Kmer kmer)125 	final boolean claim(ByteBuilder bb, int id, /*long rid, */boolean earlyExit, Kmer kmer){return tables.claim(bb, id/*, rid*/, earlyExit, kmer);}
claim(byte[] array, int len, int id, boolean earlyExit, Kmer kmer)126 	final boolean claim(byte[] array, int len, int id, /*long rid, */boolean earlyExit, Kmer kmer){return tables.claim(array, len, id/*, rid*/, earlyExit, kmer);}
findOwner(Kmer kmer)127 	final int findOwner(Kmer kmer){return tables.findOwner(kmer);}
findOwner(ByteBuilder bb, int id, Kmer kmer)128 	final int findOwner(ByteBuilder bb, int id, Kmer kmer){return tables.findOwner(bb, id, kmer);}
findOwner(byte[] array, int len, int id, Kmer kmer)129 	final int findOwner(byte[] array, int len, int id, Kmer kmer){return tables.findOwner(array, len, id, kmer);}
release(Kmer kmer, int id)130 	final void release(Kmer kmer, int id){tables.release(kmer, id);}
release(ByteBuilder bb, int id, Kmer kmer)131 	final void release(ByteBuilder bb, int id, Kmer kmer){tables.release(bb, id, kmer);}
release(byte[] array, int len, int id, Kmer kmer)132 	final void release(byte[] array, int len, int id, Kmer kmer){tables.release(array, len, id, kmer);}
fillRightCounts(Kmer kmer, int[] counts)133 	final int fillRightCounts(Kmer kmer, int[] counts){return tables.fillRightCounts(kmer, counts);}
fillLeftCounts(Kmer kmer, int[] counts)134 	final int fillLeftCounts(Kmer kmer, int[] counts){return tables.fillLeftCounts(kmer, counts);}
toText(Kmer kmer)135 	final static StringBuilder toText(Kmer kmer){return AbstractKmerTableU.toText(kmer);}
toText(long[] key, int k)136 	final static StringBuilder toText(long[] key, int k){return AbstractKmerTableU.toText(key, k);}
137 
138 	/*--------------------------------------------------------------*/
139 	/*----------------         Inner Classes        ----------------*/
140 	/*--------------------------------------------------------------*/
141 
142 	/*--------------------------------------------------------------*/
143 	/*----------------          BuildThread         ----------------*/
144 	/*--------------------------------------------------------------*/
145 
146 	@Override
makeBuildThread(int id, int mode, ConcurrentReadInputStream[] crisa)147 	BuildThread makeBuildThread(int id, int mode, ConcurrentReadInputStream[] crisa){
148 		return new BuildThread(id, mode, crisa);
149 	}
150 
151 	/**
152 	 * Builds contigs.
153 	 */
154 	private class BuildThread extends AbstractBuildThread{
155 
BuildThread(int id_, int mode_, ConcurrentReadInputStream[] crisa_)156 		public BuildThread(int id_, int mode_, ConcurrentReadInputStream[] crisa_){
157 			super(id_, mode_, crisa_);
158 		}
159 
160 		@Override
run()161 		public void run(){
162 			if(crisa==null || crisa.length==0){
163 				//Build from kmers
164 
165 				if(id==0){outstream.print("Seeding with min count = ");}
166 				String comma="";
167 				for(int i=contigPasses-1; i>0; i--){
168 					minCountSeedCurrent=(int)Tools.min(Integer.MAX_VALUE, Tools.max(minCountSeed+i, (long)Math.floor((minCountSeed)*Math.pow(contigPassMult, i)*0.92-0.25) ));
169 					if(id==0){
170 						outstream.print(comma+minCountSeedCurrent);
171 						comma=", ";
172 					}
173 					while(processNextTable(nextTable[i])){}
174 					while(processNextVictims(nextVictims[i])){}
175 				}
176 				//Final pass
177 				minCountSeedCurrent=minCountSeed;
178 				if(id==0){outstream.println(comma+minCountSeedCurrent);}
179 				while(processNextTable(nextTable[0])){}
180 				while(processNextVictims(nextVictims[0])){}
181 			}else{
182 				//Extend reads
183 				for(ConcurrentReadInputStream cris : crisa){
184 					synchronized(crisa){
185 						if(!cris.started()){
186 							cris.start();
187 						}
188 					}
189 					run(cris);
190 				}
191 			}
192 		}
193 
processNextTable(AtomicInteger aint)194 		private boolean processNextTable(AtomicInteger aint){
195 			final int tnum=aint.getAndAdd(1);
196 			if(tnum>=tables.ways){return false;}
197 			final HashArrayU1D table=tables.getTable(tnum);
198 			final int max=table.arrayLength();
199 			if(verbose && id==0){outstream.println("Processing table "+tnum+", size "+table.size()+", length "+max);}
200 			for(int cell=0; cell<max; cell++){
201 				if(verbose && id==0){outstream.println("Processing cell "+cell);}
202 				int x=processCell(table, cell, myKmer);
203 			}
204 			return true;
205 		}
206 
processNextVictims(AtomicInteger aint)207 		private boolean processNextVictims(AtomicInteger aint){
208 			final int tnum=aint.getAndAdd(1);
209 			if(tnum>=tables.ways){return false;}
210 			final HashArrayU1D table=tables.getTable(tnum);
211 			final HashForestU forest=table.victims();
212 			if(verbose && id==0){outstream.println("Processing forest "+tnum+", size "+forest.size());}
213 			final int max=forest.arrayLength();
214 			for(int cell=0; cell<max; cell++){
215 				KmerNodeU kn=forest.getNode(cell);
216 				int x=traverseKmerNodeU(kn);
217 			}
218 			return true;
219 		}
220 
processCell(HashArrayU1D table, int cell, Kmer kmer)221 		private int processCell(HashArrayU1D table, int cell, Kmer kmer){
222 			int count=table.readCellValue(cell);
223 			if(count<minCountSeedCurrent){
224 				if(verbose){outstream.println("For cell "+cell+", count="+count);}
225 				return 0;
226 			}
227 
228 			kmer=table.fillKmer(cell, kmer);
229 //			assert(kmer.verify(false));
230 //			assert(kmer.verify(true));
231 
232 			if(verbose){outstream.println("id="+id+" processing cell "+cell+"; \tkmer="+kmer);}
233 			if(useOwnership){
234 				int owner=table.getCellOwner(cell);
235 				if(verbose){outstream.println("Owner is initially "+owner);}
236 				if(owner>-1){return 0;}
237 				owner=table.setOwner(kmer, id, cell);
238 				if(verbose){outstream.println("Owner is now "+owner);}
239 				if(owner!=id){return 0;}
240 			}
241 			return processKmer(kmer);
242 		}
243 
traverseKmerNodeU(KmerNodeU kn)244 		private int traverseKmerNodeU(KmerNodeU kn){
245 			int sum=0;
246 			if(kn!=null){
247 				sum+=processKmerNodeU(kn);
248 				if(kn.left()!=null){
249 					sum+=traverseKmerNodeU(kn.left());
250 				}
251 				if(kn.right()!=null){
252 					sum+=traverseKmerNodeU(kn.right());
253 				}
254 			}
255 			return sum;
256 		}
257 
processKmerNodeU(KmerNodeU kn)258 		private int processKmerNodeU(KmerNodeU kn){
259 			final long[] key=kn.pivot();
260 			final int count=kn.getValue(key);
261 			if(count<minCountSeedCurrent){return 0;}
262 
263 			if(verbose){outstream.println("id="+id+" processing KmerNodeU; \tkmer="+Arrays.toString(key)+"\t"+toText(key, ksmall));}
264 			if(useOwnership){
265 				int owner=kn.getOwner(key);
266 				if(verbose){outstream.println("Owner is initially "+owner);}
267 				if(owner>-1){return 0;}
268 				owner=kn.setOwner(key, id);
269 				if(verbose){outstream.println("Owner is now "+owner);}
270 				if(owner!=id){return 0;}
271 			}
272 
273 			myKmer.setFrom(key);
274 			return processKmer(myKmer);
275 		}
276 
processKmer(Kmer kmer)277 		private int processKmer(Kmer kmer){
278 			Contig contig=makeContig(builderT, kmer, true);
279 			if(contig!=null){
280 				float coverage=tables.calcCoverage(contig, kmer);
281 				if(coverage<minCoverage || coverage>maxCoverage){return 0;}
282 				if(verbose){outstream.println("Added "+contig.length());}
283 				contig.id=(int)contigNum.incrementAndGet();
284 				contigs.add(contig);
285 				return contig.length();
286 			}else{
287 				if(verbose){outstream.println("Created null contig.");}
288 			}
289 			return 0;
290 		}
291 
run(ConcurrentReadInputStream cris)292 		private void run(ConcurrentReadInputStream cris){
293 
294 			ListNum<Read> ln=cris.nextList();
295 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
296 
297 			//While there are more reads lists...
298 			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
299 
300 				//For each read (or pair) in the list...
301 				for(int i=0; i<reads.size(); i++){
302 					final Read r1=reads.get(i);
303 					final Read r2=r1.mate;
304 
305 					processReadPair(r1, r2);
306 				}
307 
308 				//Fetch a new read list
309 				cris.returnList(ln);
310 				ln=cris.nextList();
311 				reads=(ln!=null ? ln.list : null);
312 			}
313 			cris.returnList(ln);
314 		}
315 
316 		//TODO: This appears to do read extension but is very confusing.
processReadPair(Read r1, Read r2)317 		private void processReadPair(Read r1, Read r2){
318 			if(verbose){outstream.println("Considering read "+r1.id+" "+new String(r1.bases));}
319 
320 			readsInT++;
321 			basesInT+=r1.length();
322 			if(r2!=null){
323 				readsInT++;
324 				basesInT+=r2.length();
325 			}
326 
327 			if(mode==insertMode){
328 				int x=BBMerge.findOverlapStrict(r1, r2, false);
329 				if(x<1){
330 					x=findInsertSize(r1, r2, rightCounts, myKmer, myKmer2);
331 				}
332 				insertSizes.increment(Tools.max(x, 0));
333 				return;
334 			}
335 
336 			if(ecco && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){BBMerge.findOverlapStrict(r1, r2, true);}
337 			if(r1!=null){
338 				if(r1.discarded()){
339 					lowqBasesT+=r1.length();
340 					lowqReadsT++;
341 				}else{
342 					byte[] bases=makeContig(r1.bases, builderT, r1.numericID, myKmer);
343 					if(bases!=null){
344 						if(verbose){outstream.println("Added "+bases.length);}
345 						final long num=contigNum.incrementAndGet();
346 						Contig temp=new Contig(bases, "contig_"+num+"_length_"+bases.length, (int)num);
347 						contigs.add(temp);
348 					}
349 				}
350 			}
351 			if(r2!=null){
352 				if(r2.discarded()){
353 					lowqBasesT+=r2.length();
354 					lowqReadsT++;
355 				}else{
356 					byte[] bases=makeContig(r2.bases, builderT, r2.numericID, myKmer);
357 					if(bases!=null){
358 						if(verbose){outstream.println("Added "+bases.length);}
359 						final long num=contigNum.incrementAndGet();
360 						Contig temp=new Contig(bases, "contig_"+num+"_length_"+bases.length, (int)num);
361 						contigs.add(temp);
362 					}
363 				}
364 			}
365 		}
366 
367 		/** From kmers */
makeContig(final ByteBuilder bb, Kmer kmer, boolean alreadyClaimed)368 		private Contig makeContig(final ByteBuilder bb, Kmer kmer, boolean alreadyClaimed){
369 			bb.setLength(0);
370 			bb.appendKmer(kmer);
371 			if(verbose){outstream.println("Filled bb: "+bb);}
372 
373 			final int initialLength=bb.length();
374 			assert(initialLength==kbig);
375 			if(initialLength<kbig){return null;}
376 
377 			boolean success=(alreadyClaimed || !useOwnership ? true : claim(kmer, id));
378 			if(verbose){outstream.println("Thread "+id+" checking owner after setting: "+findOwner(bb, id, kmer));}
379 			if(!success){
380 				assert(bb.length()==kbig);
381 //				release(bb, id); //no need to release
382 				return null;
383 			}
384 			if(verbose  /*|| true*/){outstream.println("Thread "+id+" building contig; initial length "+bb.length());}
385 			if(verbose){outstream.println("Extending to right.");}
386 			final int rightStatus, leftStatus;
387 			float leftRatio=0, rightRatio=0;
388 			{
389 				final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer);
390 
391 				if(status==DEAD_END){
392 					//do nothing
393 				}else if(status==LOOP){//TODO
394 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
395 					//Perhaps, the last kmer should be reclassified as a junction and removed.
396 				}else if(status==BAD_SEED){
397 					assert(bb.length()==kbig);
398 					release(kmer, id);
399 					return null;
400 				}else{
401 					if(bb.length()==kbig){
402 						if(status==BAD_OWNER){
403 							release(kmer, id);
404 							return null;
405 						}else if(isBranchCode(status)){
406 							release(kmer, id);
407 							return null;
408 						}else{
409 							throw new RuntimeException("Bad return value: "+status);
410 						}
411 					}else{
412 						if(status==BAD_OWNER){
413 							release(bb, id, kmer);
414 							return null;
415 						}else if(status==F_BRANCH || status==D_BRANCH){
416 							rightRatio=calcRatio(rightCounts);
417 						}else if(status==B_BRANCH){
418 							rightRatio=calcRatio(leftCounts);
419 						}else{
420 							throw new RuntimeException("Bad return value: "+status);
421 						}
422 					}
423 				}
424 				rightStatus=status;
425 			}
426 
427 //			success=extendToRight(bb, leftCounts, rightCounts, id, kmer);
428 //			if(!success){
429 //				release(bb, id, kmer);
430 //				return null;
431 //			}
432 			bb.reverseComplementInPlace();
433 			if(verbose  /*|| true*/){outstream.println("Extending rcomp to right; current length "+bb.length());}
434 
435 			{
436 				final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer);
437 
438 				if(status==DEAD_END){
439 					//do nothing
440 				}else if(status==LOOP){//TODO
441 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
442 					//Perhaps, the last kmer should be reclassified as a junction and removed.
443 				}else if(status==BAD_SEED){
444 					assert(false) : bb;//This should never happen.
445 					assert(bb.length()==kbig);
446 					release(kmer, id);
447 					return null;
448 				}else{
449 					if(status==BAD_OWNER){
450 						release(bb, id, kmer);
451 						return null;
452 					}else if(status==F_BRANCH || status==D_BRANCH){
453 						leftRatio=calcRatio(rightCounts);
454 					}else if(status==B_BRANCH){
455 						leftRatio=calcRatio(leftCounts);
456 					}else{
457 						throw new RuntimeException("Bad return value: "+status);
458 					}
459 				}
460 				leftStatus=status;
461 			}
462 //			success=extendToRight(bb, leftCounts, rightCounts, id, kmer);
463 //			if(!success){
464 //				release(bb, id, kmer);
465 //				return null;
466 //			}
467 			if(verbose  /*|| true*/){outstream.println("Final length for thread "+id+": "+bb.length());}
468 			//				if(useOwnership && THREADS==1){assert(claim(bases, bases.length, id, rid));}
469 			success=(useOwnership ? doubleClaim(bb, id, kmer) : true);
470 			if(verbose  /*|| true*/){outstream.println("Success for thread "+id+": "+success);}
471 
472 			if(trimEnds>0){bb.trimByAmount(trimEnds, trimEnds);}
473 			else if(trimCircular && leftStatus==LOOP && rightStatus==LOOP){bb.trimByAmount(0, kbig-1);}
474 			if(bb.length()>=initialLength+minExtension && (bb.length()>=minContigLen || popBubbles)){
475 				if(success){
476 					bb.reverseComplementInPlace();
477 					byte[] bases=bb.toBytes();
478 					Contig c=new Contig(bases);
479 					c.leftCode=leftStatus;
480 					c.rightCode=rightStatus;
481 					c.rightRatio=rightRatio;
482 					c.leftRatio=leftRatio;
483 					if(!c.canonical()){c.rcomp();}
484 					return c;
485 				}else{
486 					//					assert(false) : bb.length()+", "+id;
487 					release(bb, id, kmer);
488 					return null;
489 				}
490 			}
491 			if(verbose  /*|| true*/){outstream.println("Contig was too short for "+id+": "+bb.length());}
492 			return null;
493 		}
494 
495 		/** From a seed */
makeContig(final byte[] bases, final ByteBuilder bb, long rid, final Kmer kmer)496 		private byte[] makeContig(final byte[] bases, final ByteBuilder bb, long rid, final Kmer kmer){
497 			if(bases==null || bases.length<kbig){return null;}
498 //			if(verbose  /*|| true*/){outstream.println("Thread "+id+" checking owner: "+findOwner(bases, bases.length, id));}
499 			int owner=useOwnership ? findOwner(bases, bases.length, id, kmer) : -1;
500 			if(owner>=id){return null;}
501 			boolean success=(useOwnership ? claim(bases, bases.length, id, true, kmer) : true);
502 			if(verbose  /*|| true*/){outstream.println("Thread "+id+" checking owner after setting: "+findOwner(bases, bases.length, id, kmer));}
503 			if(!success){
504 				release(bases, bases.length, id, kmer);
505 				return null;
506 			}
507 			if(verbose  /*|| true*/){outstream.println("Thread "+id+" building contig; initial length "+bases.length);}
508 			bb.setLength(0);
509 			bb.append(bases);
510 			if(verbose){outstream.println("Extending to right.");}
511 			{
512 				final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer);
513 
514 				if(status==DEAD_END){
515 					//do nothing
516 				}else if(status==LOOP){//TODO
517 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
518 					//Perhaps, the last kmer should be reclassified as a junction and removed.
519 				}else if(status==BAD_SEED){
520 					//do nothing
521 				}else{
522 					if(status==BAD_OWNER){
523 						release(bb.array, bb.length(), id, kmer);
524 						return null;
525 					}else if(isBranchCode(status)){
526 						//do nothing
527 					}else{
528 						throw new RuntimeException("Bad return value: "+status);
529 					}
530 				}
531 			}
532 //			success=extendToRight(bb, leftCounts, rightCounts, id, kmer);
533 //			if(!success){
534 //				release(bb.array, bb.length(), id, kmer);
535 //				return null;
536 //			}
537 			bb.reverseComplementInPlace();
538 			if(verbose  /*|| true*/){outstream.println("Extending rcomp to right; current length "+bb.length());}
539 			{
540 				final int status=extendToRight(bb, leftCounts, rightCounts, id, kmer);
541 
542 				if(status==DEAD_END){
543 					//do nothing
544 				}else if(status==LOOP){//TODO
545 					//special case - handle specially, for a loop with no obvious junction, e.g. long tandem repeat.
546 					//Perhaps, the last kmer should be reclassified as a junction and removed.
547 				}else if(status==BAD_SEED){
548 					//do nothing
549 				}else{
550 					if(status==BAD_OWNER){
551 						release(bb.array, bb.length(), id, kmer);
552 						return null;
553 					}else if(isBranchCode(status)){
554 						//do nothing
555 					}else{
556 						throw new RuntimeException("Bad return value: "+status);
557 					}
558 				}
559 			}
560 //			success=extendToRight(bb, leftCounts, rightCounts, id, kmer);
561 //			if(!success){
562 //				release(bb.array, bb.length(), id, kmer);
563 //				return null;
564 //			}
565 			if(verbose  /*|| true*/){outstream.println("Final length for thread "+id+": "+bb.length());}
566 			//				if(useOwnership && THREADS==1){assert(claim(bases, bases.length, id, rid));}
567 			success=(useOwnership ? doubleClaim(bb, id, kmer) : true);
568 			if(verbose  /*|| true*/){outstream.println("Success for thread "+id+": "+success);}
569 			if(bb.length()>=bases.length+minExtension && (bb.length()>=minContigLen || popBubbles)){
570 				if(success){
571 					bb.reverseComplementInPlace();
572 					return bb.toBytes();
573 				}else{
574 					//					assert(false) : bb.length()+", "+id;
575 					release(bb.array, bb.length(), id, kmer);
576 					return null;
577 				}
578 			}
579 			if(verbose  /*|| true*/){outstream.println("Contig was too short for "+id+": "+bb.length());}
580 			return null;
581 		}
582 
583 		/*--------------------------------------------------------------*/
584 
585 		private final Kmer myKmer=new Kmer(kbig);
586 		private final Kmer myKmer2=new Kmer(kbig);
587 
588 	}
589 
590 
591 	/*--------------------------------------------------------------*/
592 	/*----------------       Contig Processing      ----------------*/
593 	/*--------------------------------------------------------------*/
594 
595 	@Override
makeProcessContigThread(ArrayList<Contig> contigs, AtomicInteger next)596 	ProcessContigThread makeProcessContigThread(ArrayList<Contig> contigs, AtomicInteger next){
597 		return new ProcessContigThread(contigs, next);
598 	}
599 
600 	@Override
initializeContigs(ArrayList<Contig> contigs)601 	public void initializeContigs(ArrayList<Contig> contigs){
602 		tables.clearOwnership();
603 		tables.initializeOwnership();
604 		final Kmer kmer=new Kmer(kbig);
605 		{
606 			int cnum=0;
607 			for(Contig c : contigs){
608 				c.id=cnum;
609 //				if(c.leftBranch()){
610 				if(true){
611 					c.leftKmer(kmer);
612 					tables.claim(kmer, cnum);
613 				}
614 //				if(c.rightBranch()){
615 				if(true){
616 					c.rightKmer(kmer);
617 					tables.claim(kmer, cnum);
618 				}
619 				cnum++;
620 			}
621 		}
622 	}
623 
624 	class ProcessContigThread extends AbstractProcessContigThread {
625 
ProcessContigThread(ArrayList<Contig> contigs_, AtomicInteger next_)626 		ProcessContigThread(ArrayList<Contig> contigs_, AtomicInteger next_){
627 			super(contigs_, next_);
628 			kmerA=new Kmer(kbig);
629 			kmerB=new Kmer(kbig);
630 			kmerC=new Kmer(kbig);
631 			lastExitCondition=BAD_SEED;
632 		}
633 
634 		@Override
processContigLeft(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb)635 		public void processContigLeft(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb){
636 //			System.err.println("processContigLeft: "+c.id+", "+c.leftCode+", "+c.leftEdges);
637 			if(c.leftCode==DEAD_END){return;}
638 
639 			final Kmer kmer0=c.leftKmer(kmerA);
640 			final Kmer kmer=kmerB;
641 
642 			assert(tables.getCount(kmer0)>0);
643 			assert(tables.findOwner(kmer0)==c.id) : tables.findOwner(kmer0)+", "+c.id;
644 
645 			int leftMaxPos=fillLeftCounts(kmer0, leftCounts);
646 			int leftMax=leftCounts[leftMaxPos];
647 			int leftSecondPos=Tools.secondHighestPosition(leftCounts);
648 			int leftSecond=leftCounts[leftSecondPos];
649 
650 			for(int x=0; x<leftCounts.length; x++){
651 				bb.clear();
652 				final int count=leftCounts[x];
653 				int target=-1;
654 				if(count>0 && isJunction(leftMax, count)){
655 					kmer.setFrom(kmer0);
656 					kmer.addLeftNumeric(x);
657 					assert(tables.getCount(kmer)==count) : count+", "+tables.getCount(kmer);
658 					bb.append(AminoAcid.numberToBase[x]);
659 					target=exploreRight(kmer, extraCounts, rightCounts, bb);
660 					if(verbose){
661 						outstream.println(c.id+"L_F: x="+x+", cnt="+count+", dest="+target
662 								+", "+codeStrings[lastExitCondition]+", len="+lastLength+", orient="+lastOrientation);
663 					}
664 				}
665 				if(target>=0){
666 					Edge se=new Edge(c.id, target, lastLength, lastOrientation, count, bb.toBytes());
667 //					System.err.println("Adding "+se+"; x="+x+"; bb="+bb);
668 					c.addLeftEdge(se);
669 					edgesMadeT++;
670 				}
671 			}
672 		}
673 
674 		@Override
processContigRight(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb)675 		public void processContigRight(Contig c, int[] leftCounts, int[] rightCounts, int[] extraCounts, ByteBuilder bb){
676 //			System.err.println("processContigRight: "+c.id+", "+c.rightCode);
677 			if(c.rightCode==DEAD_END){return;}
678 
679 			final Kmer kmer0=c.rightKmer(kmerA);
680 			final Kmer kmer=kmerB;
681 
682 			int rightMaxPos=fillRightCounts(kmer0, rightCounts);
683 			int rightMax=rightCounts[rightMaxPos];
684 			int rightSecondPos=Tools.secondHighestPosition(rightCounts);
685 			int rightSecond=rightCounts[rightSecondPos];
686 
687 			for(int x=0; x<rightCounts.length; x++){
688 				bb.clear();
689 				final int count=rightCounts[x];
690 				int target=-1;
691 				if(count>0 && isJunction(rightMax, count)){
692 					kmer.setFrom(kmer0);
693 					kmer.addRightNumeric(x);
694 					assert(tables.getCount(kmer)==count) : count+", "+tables.getCount(kmer);
695 					bb.append(AminoAcid.numberToBase[x]);
696 					target=exploreRight(kmer, leftCounts, extraCounts, bb);
697 					if(verbose){
698 						outstream.println(c.id+"R_F: x="+x+", cnt="+count+", dest="+target+", "+codeStrings[lastExitCondition]+", len="+lastLength+", orient="+lastOrientation);
699 					}
700 				}
701 				if(target>=0){
702 					lastOrientation|=1;
703 					Edge se=new Edge(c.id, target, lastLength, lastOrientation, count, bb.toBytes());
704 					c.addRightEdge(se);
705 					edgesMadeT++;
706 				}
707 			}
708 		}
709 
exploreRight(Kmer kmer, int[] leftCounts, int[] rightCounts, ByteBuilder bb)710 		private int exploreRight(Kmer kmer, int[] leftCounts, int[] rightCounts, ByteBuilder bb){
711 			final Kmer temp=kmerC;
712 			int length=1;
713 			int owner=-1;
714 			lastTarget=-1;
715 			for(; length<500; length++){
716 				owner=tables.findOwner(kmer);
717 				if(owner>=0){break;}
718 
719 				final int leftMaxPos=fillLeftCounts(kmer, leftCounts);
720 				final int leftMax=leftCounts[leftMaxPos];
721 				final int leftSecondPos=Tools.secondHighestPosition(leftCounts);
722 				final int leftSecond=leftCounts[leftSecondPos];
723 				if(isJunction(leftMax, leftSecond)){
724 					lastExitCondition=B_BRANCH;
725 					lastLength=length;
726 					return -1;
727 				}
728 
729 				final int rightMaxPos=fillRightCounts(kmer, rightCounts);
730 				final int rightMax=rightCounts[rightMaxPos];
731 				final int rightSecondPos=Tools.secondHighestPosition(rightCounts);
732 				final int rightSecond=rightCounts[rightSecondPos];
733 
734 //				outstream.println("* "+Arrays.toString(leftCounts)+", "+Arrays.toString(rightCounts)+", "+rightMaxPos);
735 
736 				if(rightMax<minCountExtend){
737 //					assert(false) : Arrays.toString(rightCounts);
738 					lastExitCondition=DEAD_END;
739 					lastLength=length;
740 					return -1;
741 				}else if(isJunction(rightMax, rightSecond)){
742 					lastExitCondition=F_BRANCH;
743 					lastLength=length;
744 					return -1;
745 				}
746 				bb.append(AminoAcid.numberToBase[rightMaxPos]);
747 				long x=rightMaxPos;
748 				kmer.addRightNumeric(x);
749 			}
750 			lastLength=length;
751 			lastTarget=owner;
752 			if(owner>=0){
753 				lastExitCondition=SUCCESS;
754 				Contig dest=contigs.get(owner);
755 				dest.leftKmer(temp);
756 
757 //				if(temp.equals(kmer)){
758 //					lastOrientation=temp.sameOrientation(kmer) ? 0 : 1;
759 //				}else{
760 //					dest.rightKmer(temp);
761 //					if(temp.equals(kmer)){
762 //						lastOrientation=temp.sameOrientation(kmer) ? 2 : 3;
763 //					}
764 //				}
765 
766 				if(temp.equals(kmer)){
767 					lastOrientation=0;
768 				}else{
769 					dest.rightKmer(temp);
770 					if(temp.equals(kmer)){
771 						lastOrientation=2;
772 					}else{
773 						assert(false);
774 					}
775 				}
776 			}else{
777 				lastExitCondition=TOO_LONG;
778 			}
779 			return owner;
780 		}
781 
782 		final Kmer kmerA, kmerB, kmerC;
783 
784 	}
785 
786 
787 	/*--------------------------------------------------------------*/
788 	/*----------------       Extension Methods      ----------------*/
789 	/*--------------------------------------------------------------*/
790 
791 
findInsertSize(Read r1, Read r2, int[] rightCounts, Kmer kmer1, Kmer kmer2)792 	public int findInsertSize(Read r1, Read r2, int[] rightCounts, Kmer kmer1, Kmer kmer2){
793 		kmer1=tables.rightmostKmer(r1.bases, r1.length(), kmer1);
794 		kmer2=tables.rightmostKmer(r2.bases, r2.length(), kmer2);
795 		if(kmer1==null || kmer2==null){return -1;}
796 		final int x=measureInsert(kmer1, kmer2, 24000, rightCounts);
797 		if(x<0){return -1;}
798 		return r1.length()+r2.length()+x-kbig;//TODO: May be off by 1.
799 	}
800 
801 	/* (non-Javadoc)
802 	 * @see assemble.Tadpole#extendRead(stream.Read, stream.ByteBuilder, int[], int[], int)
803 	 */
804 	@Override
extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance)805 	public int extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance) {
806 		return extendRead(r, bb, leftCounts, rightCounts, distance, getLocalKmer());
807 	}
808 
809 	@Override
extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance, final Kmer kmer)810 	public int extendRead(Read r, ByteBuilder bb, int[] leftCounts, int[] rightCounts, int distance, final Kmer kmer){
811 		final int initialLen=r.length();
812 		if(initialLen<kbig){return 0;}
813 		bb.setLength(0);
814 		bb.append(r.bases);
815 		Kmer temp=tables.rightmostKmer(bb, kmer);
816 		if(temp==null){return 0;}
817 		final int extension=extendToRight2_inner(bb, leftCounts, rightCounts, distance, true, kmer);
818 		if(extension>0){
819 			r.bases=bb.toBytes();
820 			if(r.quality!=null){
821 				final byte q=Shared.FAKE_QUAL;
822 				r.quality=KillSwitch.copyOf(r.quality, r.bases.length);
823 				for(int i=initialLen; i<r.quality.length; i++){
824 					r.quality[i]=q;
825 				}
826 			}
827 		}
828 		assert(extension==r.length()-initialLen);
829 		return extension;
830 	}
831 
832 	/** Returns distance between the two kmers, or -1 */
measureInsert(final Kmer kmer1, final Kmer kmer2, final int maxlen, final int[] rightCounts)833 	public int measureInsert(final Kmer kmer1, final Kmer kmer2, final int maxlen, final int[] rightCounts){
834 		int len=0;
835 
836 		{
837 			int count=tables.getCount(kmer2);
838 			if(count<minCountSeed){return -1;}
839 		}
840 
841 		int count=tables.getCount(kmer1);
842 		if(count<minCountSeed){return -1;}
843 		if(count<minCountSeed){
844 			if(verbose){outstream.println("Returning because count was too low: "+count);}
845 			return -1;
846 		}
847 
848 		int rightMaxPos=fillRightCounts(kmer1, rightCounts);
849 		int rightMax=rightCounts[rightMaxPos];
850 //		int rightSecondPos=Tools.secondHighestPosition(rightCounts);
851 //		int rightSecond=rightCounts[rightSecondPos];
852 
853 		if(rightMax<minCountExtend){return -1;}
854 //		if(isJunction(rightMax, rightSecond)){return -1;}
855 
856 		while(!kmer1.equals(kmer2) && len<maxlen){
857 
858 			//Generate the new kmer
859 //			final byte b=AminoAcid.numberToBase[rightMaxPos];
860 			final long x=rightMaxPos;
861 			kmer1.addRightNumeric(x);
862 
863 			assert(tables.getCount(kmer1)==rightMax);
864 			count=rightMax;
865 
866 			assert(count>=minCountExtend) : count;
867 
868 			rightMaxPos=fillRightCounts(kmer1, rightCounts);
869 			rightMax=rightCounts[rightMaxPos];
870 //			rightSecondPos=Tools.secondHighestPosition(rightCounts);
871 //			rightSecond=rightCounts[rightSecondPos];
872 
873 			if(verbose){
874 				outstream.println("kmer: "+kmer1);
875 				outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts));
876 				outstream.println("rightMaxPos="+rightMaxPos);
877 				outstream.println("rightMax="+rightMax);
878 //				outstream.println("rightSecondPos="+rightSecondPos);
879 //				outstream.println("rightSecond="+rightSecond);
880 			}
881 
882 			if(rightMax<minCountExtend){
883 				if(verbose){outstream.println("Breaking because highest right was too low:"+rightMax);}
884 				break;
885 			}
886 
887 //			if(isJunction(rightMax, rightSecond)){return -1;}
888 
889 			len++;
890 		}
891 		return len>=maxlen ? -1 : len;
892 	}
893 
894 
895 
896 	/**
897 	 * Extend these bases into a contig.
898 	 * Stops at both left and right junctions.
899 	 * Claims ownership.
900 	 */
extendToRight(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int id, Kmer kmer)901 	public int extendToRight(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int id, Kmer kmer){
902 		if(bb.length()<kbig){return BAD_SEED;}
903 		kmer.clear();
904 
905 		kmer=tables.rightmostKmer(bb, kmer);
906 		if(kmer==null || kmer.len<kbig){return BAD_SEED;}
907 		assert(kmer.len==kbig);
908 
909 		/* Now the trailing kmer has been initialized. */
910 
911 		if(verbose){
912 			outstream.println("extendToRight kmer="+kmer+", bb="+bb);
913 		}
914 
915 		HashArrayU1D table=tables.getTable(kmer);
916 		int count=table.getValue(kmer);
917 		if(count<minCountSeed){
918 			if(verbose){outstream.println("Returning because count was too low: "+count);}
919 			return BAD_SEED;
920 		}
921 
922 		int owner=(useOwnership ? table.getOwner(kmer) : id);
923 		if(verbose){outstream.println("Owner: "+owner);}
924 		if(owner>id){return BAD_OWNER;}
925 
926 		int leftMaxPos=0;
927 		int leftMax=minCountExtend;
928 		int leftSecondPos=1;
929 		int leftSecond=0;
930 
931 		if(leftCounts!=null){
932 			leftMaxPos=fillLeftCounts(kmer, leftCounts);
933 			leftMax=leftCounts[leftMaxPos];
934 			leftSecondPos=Tools.secondHighestPosition(leftCounts);
935 			leftSecond=leftCounts[leftSecondPos];
936 		}
937 
938 		int rightMaxPos=fillRightCounts(kmer, rightCounts);
939 		int rightMax=rightCounts[rightMaxPos];
940 		int rightSecondPos=Tools.secondHighestPosition(rightCounts);
941 		int rightSecond=rightCounts[rightSecondPos];
942 
943 		if(verbose){
944 			outstream.println("kmer: "+toText(kmer));
945 			outstream.println("Counts: "+count+", "+(leftCounts==null ? "null" : Arrays.toString(leftCounts))+", "+Arrays.toString(rightCounts));
946 			outstream.println("leftMaxPos="+leftMaxPos);
947 			outstream.println("leftMax="+leftMax);
948 			outstream.println("leftSecondPos="+leftSecondPos);
949 			outstream.println("leftSecond="+leftSecond);
950 			outstream.println("rightMaxPos="+rightMaxPos);
951 			outstream.println("rightMax="+rightMax);
952 			outstream.println("rightSecondPos="+rightSecondPos);
953 			outstream.println("rightSecond="+rightSecond);
954 		}
955 
956 		if(rightMax<minCountExtend){return DEAD_END;}
957 		if(isJunction(rightMax, rightSecond)){//Returning here is fine because nothing can be added
958 			if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");}
959 			return isJunction(leftMax, leftSecond) ? D_BRANCH : F_BRANCH;
960 		}
961 		if(isJunction(leftMax, leftSecond)){//Returning here is necessary, but this should mean the the length is exactly K
962 			assert(bb.length()==kbig) : bb.length()+", "+kbig+", "+leftMax+", "+leftSecond;
963 			if(verbose){outstream.println("B: Breaking because isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+")");}
964 			return B_BRANCH;
965 		}
966 
967 		if(useOwnership){
968 			owner=table.setOwner(kmer, id);
969 			if(verbose){outstream.println("A. Owner is now "+id+" for kmer "+kmer);}
970 			if(owner!=id){
971 				if(verbose){outstream.println("Returning early because owner was "+owner+" for thread "+id+".");}
972 				return BAD_OWNER;
973 			}
974 		}
975 
976 		final int maxLen=Tools.min((extendRight<0 ? maxContigLen : bb.length()+extendRight), maxContigLen);
977 
978 		while(owner==id && bb.length()<maxLen){
979 
980 			//Generate the new kmer
981 			final byte b=AminoAcid.numberToBase[rightMaxPos];
982 
983 			//Now consider the next kmer
984 			final long evicted=kmer.addRightNumeric(rightMaxPos);
985 
986 			table=tables.getTable(kmer);
987 
988 			assert(table.getValue(kmer)==rightMax || rightMax==0);
989 			count=rightMax;
990 
991 			assert(count>=minCountExtend) : count;
992 
993 			if(leftCounts!=null){
994 				leftMaxPos=fillLeftCounts(kmer, leftCounts);
995 				leftMax=leftCounts[leftMaxPos];
996 				leftSecondPos=Tools.secondHighestPosition(leftCounts);
997 				leftSecond=leftCounts[leftSecondPos];
998 			}
999 
1000 			rightMaxPos=fillRightCounts(kmer, rightCounts);
1001 			rightMax=rightCounts[rightMaxPos];
1002 			rightSecondPos=Tools.secondHighestPosition(rightCounts);
1003 			rightSecond=rightCounts[rightSecondPos];
1004 
1005 			if(verbose){
1006 				outstream.println("kmer: "+toText(kmer));
1007 				outstream.println("Counts: "+count+", "+(leftCounts==null ? "null" : Arrays.toString(leftCounts))+", "+Arrays.toString(rightCounts));
1008 				outstream.println("leftMaxPos="+leftMaxPos);
1009 				outstream.println("leftMax="+leftMax);
1010 				outstream.println("leftSecondPos="+leftSecondPos);
1011 				outstream.println("leftSecond="+leftSecond);
1012 				outstream.println("rightMaxPos="+rightMaxPos);
1013 				outstream.println("rightMax="+rightMax);
1014 				outstream.println("rightSecondPos="+rightSecondPos);
1015 				outstream.println("rightSecond="+rightSecond);
1016 			}
1017 
1018 			final boolean fbranch=isJunction(rightMax, rightSecond);
1019 			final boolean bbranch=isJunction(leftMax, leftSecond);
1020 			final boolean hbranch=(leftCounts!=null && leftMaxPos!=evicted && branchMult1>0);
1021 			if(bbranch){
1022 				if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+"); "
1023 						+ "("+fbranch+", "+bbranch+", "+hbranch+")");}
1024 				return fbranch ? D_BRANCH : B_BRANCH;
1025 			}else if(hbranch){
1026 				if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "
1027 						+ ""+leftMax+", "+leftSecond+"); ("+fbranch+", "+bbranch+", "+hbranch+")");}
1028 				if(verbose){outstream.println("Hidden branch: leftMaxPos!=evicted ("+leftMaxPos+"!="+evicted+")" +
1029 						"\nleftMaxPos="+leftMaxPos+", leftMax="+leftMax+", leftSecondPos="+leftSecondPos+", leftSecond="+leftSecond);}
1030 				return fbranch ? D_BRANCH : B_BRANCH;
1031 			}
1032 
1033 			bb.append(b);
1034 			if(verbose){outstream.println("Added base "+(char)b);}
1035 
1036 			if(useOwnership){
1037 				owner=table.getOwner(kmer);
1038 				if(verbose){outstream.println("Owner is initially "+id+" for key "+kmer);}
1039 				if(owner==id){//loop detection
1040 					if(verbose  /*|| true*/){
1041 //						outstream.println(new String(bb.array, bb.length()-31, 31));
1042 						outstream.println(bb);
1043 						outstream.println(toText(kmer));
1044 						outstream.println("Breaking because owner was "+owner+" for thread "+id+".");
1045 					}
1046 					return LOOP;
1047 				}
1048 				owner=table.setOwner(kmer, id);
1049 				if(verbose){outstream.println("B. Owner is now "+id+" for kmer "+kmer);}
1050 			}
1051 
1052 			if(fbranch){
1053 				if(verbose){outstream.println("B: Breaking - isJunction("+rightMax+", "+rightSecond+", "+leftMax+", "+leftSecond+"); "
1054 						+ "("+fbranch+", "+bbranch+", "+hbranch+")");}
1055 				return F_BRANCH;
1056 			}else if(rightMax<minCountExtend){
1057 				if(verbose){outstream.println("B: Breaking because highest right was too low:"+rightMax);}
1058 				return DEAD_END;
1059 			}
1060 		}
1061 		assert(owner!=id);
1062 		if(verbose  /*|| true*/){
1063 			outstream.println("Current contig: "+bb+"\nReturning because owner was "+owner+" for thread "+id+".");
1064 		}
1065 		return BAD_OWNER;
1066 	}
1067 
1068 	@Override
extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase)1069 	public int extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase){
1070 		initializeThreadLocals();
1071 		return extendToRight2(bb, leftCounts, rightCounts, distance, includeJunctionBase, getLocalKmer());
1072 	}
1073 
1074 	@Override
extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer)1075 	public int extendToRight2(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer){
1076 		if(verbose || verbose2){outstream.println("Entering extendToRight2 (no kmers).");}
1077 		final int initialLength=bb.length();
1078 		if(initialLength<kbig){return 0;}
1079 		kmer.clear();
1080 
1081 		kmer=tables.rightmostKmer(bb, kmer);
1082 		if(kmer==null || kmer.len<kbig){return 0;}
1083 		assert(kmer.len==kbig);
1084 
1085 		return extendToRight2_inner(bb, leftCounts, rightCounts, distance, includeJunctionBase, kmer);
1086 	}
1087 
1088 	/**
1089 	 * Extend these bases to the right by at most 'distance'.
1090 	 * Stops at right junctions only.
1091 	 * Does not claim ownership.
1092 	 */
extendToRight2_inner(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer)1093 	private int extendToRight2_inner(final ByteBuilder bb, final int[] leftCounts, final int[] rightCounts, final int distance, boolean includeJunctionBase, Kmer kmer){
1094 		if(verbose || verbose2){outstream.println("Entering extendToRight2_inner (with kmers).");}
1095 		final int initialLength=bb.length();
1096 		assert(kmer.len==kbig) : kmer.len+", "+kbig+", "+bb.length();
1097 
1098 		HashArrayU1D table=tables.getTable(kmer);
1099 		int count=table.getValue(kmer);
1100 		if(count<minCountSeed){
1101 			if(verbose || verbose2){outstream.println("Returning because count was too low: "+count+"<"+minCountSeed);}
1102 			return 0;
1103 		}
1104 
1105 		int leftMaxPos=0;
1106 		int leftMax=minCountExtend;
1107 		int leftSecondPos=1;
1108 		int leftSecond=0;
1109 
1110 		if(leftCounts!=null){
1111 			leftMaxPos=fillLeftCounts(kmer, leftCounts);
1112 			leftMax=leftCounts[leftMaxPos];
1113 			leftSecondPos=Tools.secondHighestPosition(leftCounts);
1114 			leftSecond=leftCounts[leftSecondPos];
1115 		}
1116 
1117 		int rightMaxPos=fillRightCounts(kmer, rightCounts);
1118 		int rightMax=rightCounts[rightMaxPos];
1119 		int rightSecondPos=Tools.secondHighestPosition(rightCounts);
1120 		int rightSecond=rightCounts[rightSecondPos];
1121 
1122 		if(verbose){
1123 			outstream.println("kmer: "+toText(kmer));
1124 			outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts));
1125 			outstream.println("rightMaxPos="+rightMaxPos);
1126 			outstream.println("rightMax="+rightMax);
1127 			outstream.println("rightSecondPos="+rightSecondPos);
1128 			outstream.println("rightSecond="+rightSecond);
1129 		}
1130 
1131 		if(rightMax<minCountExtend){
1132 			if(verbose || verbose2){outstream.println("Returning because rightMax was too low: "+rightMax+"<"+minCountExtend+"\n"+count+", "+Arrays.toString(rightCounts));}
1133 			return 0;
1134 		}
1135 		if(isJunction(rightMax, rightSecond, leftMax, leftSecond)){
1136 			if(verbose || verbose2){outstream.println("Returning because isJunction: "+rightMax+", "+rightSecond+"; "+leftMax+", "+leftSecond);}
1137 			return 0;
1138 		}
1139 
1140 		final int maxLen=Tools.min(bb.length()+distance, maxContigLen);
1141 
1142 		while(bb.length()<maxLen){
1143 
1144 			//Generate the new kmer
1145 			final byte b=AminoAcid.numberToBase[rightMaxPos];
1146 
1147 			//Now consider the next kmer
1148 			final long evicted=kmer.addRightNumeric(rightMaxPos);
1149 
1150 			table=tables.getTable(kmer);
1151 
1152 			assert(table.getValue(kmer)==rightMax || rightMax==0);
1153 			count=rightMax;
1154 
1155 			assert(count>=minCountExtend) : count;
1156 
1157 			if(leftCounts!=null){
1158 				leftMaxPos=fillLeftCounts(kmer, leftCounts);
1159 				leftMax=leftCounts[leftMaxPos];
1160 				leftSecondPos=Tools.secondHighestPosition(leftCounts);
1161 				leftSecond=leftCounts[leftSecondPos];
1162 			}
1163 
1164 			rightMaxPos=fillRightCounts(kmer, rightCounts);
1165 			rightMax=rightCounts[rightMaxPos];
1166 			rightSecondPos=Tools.secondHighestPosition(rightCounts);
1167 			rightSecond=rightCounts[rightSecondPos];
1168 
1169 			if(verbose){
1170 				outstream.println("kmer: "+toText(kmer));
1171 				outstream.println("Counts: "+count+", "+Arrays.toString(rightCounts));
1172 				outstream.println("rightMaxPos="+rightMaxPos);
1173 				outstream.println("rightMax="+rightMax);
1174 				outstream.println("rightSecondPos="+rightSecondPos);
1175 				outstream.println("rightSecond="+rightSecond);
1176 			}
1177 
1178 			if(isJunction(rightMax, rightSecond, leftMax, leftSecond)){
1179 				if(includeJunctionBase && kmer.key()==kmer.array2()){//TODO: Does not work on palindromes.
1180 					bb.append(b);
1181 					if(verbose){outstream.println("Added base "+(char)b);}
1182 				}
1183 				break;
1184 			}
1185 
1186 			if(leftCounts!=null && leftMaxPos!=evicted){
1187 				if(verbose){outstream.println("B: Breaking because of hidden branch: leftMaxPos!=evicted ("+leftMaxPos+"!="+evicted+")" +
1188 						"\nleftMaxPos="+leftMaxPos+", leftMax="+leftMax+", leftSecondPos="+leftSecondPos+", leftSecond="+leftSecond);}
1189 				if(includeJunctionBase && kmer.key()==kmer.array2()){//TODO: Does not work on palindromes.
1190 					bb.append(b);
1191 					if(verbose){outstream.println("Added base "+(char)b);}
1192 				}
1193 				break;
1194 			}
1195 
1196 			bb.append(b);
1197 			if(verbose){outstream.println("Added base "+(char)b);}
1198 
1199 			if(rightMax<minCountExtend){
1200 				if(verbose || verbose2){outstream.println("C: Breaking because highest right was too low: "+rightMax+"<"+minCountExtend);}
1201 				break;
1202 			}
1203 		}
1204 		if(verbose || verbose2){outstream.println("Extended by "+(bb.length()-initialLength));}
1205 		return bb.length()-initialLength;
1206 	}
1207 
1208 
1209 	/*--------------------------------------------------------------*/
1210 	/*----------------        Junk Detection        ----------------*/
1211 	/*--------------------------------------------------------------*/
1212 
1213 	@Override
isJunk(Read r)1214 	public boolean isJunk(Read r){
1215 		boolean junk=isJunk(r, localRightCounts.get(), getLocalKmer());
1216 		return junk;
1217 	}
1218 
1219 	@Override
isJunk(Read r, final int[] counts, Kmer kmer)1220 	public boolean isJunk(Read r, final int[] counts, Kmer kmer){
1221 		final int blen=r.length();
1222 		if(blen<kbig){return true;}
1223 		final byte[] bases=r.bases;
1224 		kmer.clearFast();
1225 		assert(kmer.len==0);
1226 
1227 		/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */
1228 		for(int i=0; i<kbig; i++){
1229 			kmer.addRight(bases[i]);
1230 		}
1231 
1232 		if(kmer.len>=kbig){
1233 			int maxPos=fillLeftCounts(kmer, counts);
1234 			if(counts[maxPos]>0){return false;}
1235 		}
1236 
1237 		final boolean paired=(r.mateLength()>=kbig);
1238 		int maxDepth=0;
1239 		{
1240 			for(int i=kbig; i<blen; i++){
1241 				kmer.addRight(bases[i]);
1242 				if(kmer.len>=kbig){
1243 					int depth=getCount(kmer);
1244 					if(depth>maxDepth){
1245 						maxDepth=depth;
1246 						if(maxDepth>1 && (!paired || maxDepth>2)){return false;}
1247 					}
1248 				}
1249 			}
1250 		}
1251 
1252 		if(kmer.len>=kbig && !paired){
1253 			int maxPos=fillRightCounts(kmer, counts);
1254 			if(counts[maxPos]>0){return false;}
1255 		}
1256 		return true;
1257 	}
1258 
1259 	@Override
hasKmersAtOrBelow(Read r, int tooLow, final float fraction)1260 	public boolean hasKmersAtOrBelow(Read r, int tooLow, final float fraction){
1261 		return hasKmersAtOrBelow(r, tooLow, fraction, getLocalKmer());
1262 	}
1263 
1264 	@Override
hasKmersAtOrBelow(Read r, final int tooLow, final float fraction, Kmer kmer)1265 	public boolean hasKmersAtOrBelow(Read r, final int tooLow, final float fraction, Kmer kmer){
1266 		final int blen=r.length();
1267 		if(blen<kbig){return true;}
1268 		final byte[] bases=r.bases;
1269 		kmer.clearFast();
1270 		assert(kmer.len==0) : kmer.len+", "+kmer;
1271 
1272 //		outstream.println("\n"+new String(r.bases)+":");
1273 
1274 		final int limit=Tools.max(1, Math.round((bases.length-kbig+1)*fraction));
1275 		int valid=0, invalid=0;
1276 		{
1277 			for(int i=0; i<blen; i++){
1278 				kmer.addRight(bases[i]);
1279 				if(kmer.len>=kbig){
1280 					int depth=getCount(kmer);
1281 //					outstream.println("depth="+depth+", kmer="+kmer);
1282 					if(depth>tooLow){valid++;}
1283 					else{
1284 						invalid++;
1285 						if(invalid>=limit){return true;}
1286 					}
1287 				}
1288 			}
1289 		}
1290 
1291 		//Compensate for nocalls changing the expected kmer count
1292 		final int limit2=Tools.max(1, Math.round((valid+invalid)*fraction));
1293 		return valid<1 || invalid>=limit2;
1294 	}
1295 
1296 
1297 	/*--------------------------------------------------------------*/
1298 	/*----------------       Error Correction       ----------------*/
1299 	/*--------------------------------------------------------------*/
1300 
1301 	@Override
errorCorrect(Read r)1302 	public int errorCorrect(Read r){
1303 		initializeThreadLocals();
1304 		int corrected=errorCorrect(r, localLeftCounts.get(), localRightCounts.get(), localIntList.get(), localIntList2.get(),
1305 				localByteBuilder.get(), localByteBuilder2.get(), localTracker.get(), localBitSet.get(), getLocalKmer(), getLocalKmer2());
1306 		return corrected;
1307 	}
1308 
1309 	@Override
errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2, final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, Kmer kmer, Kmer kmer2)1310 	public int errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, LongList kmers, IntList counts, IntList counts2,
1311 			final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, Kmer kmer, Kmer kmer2){
1312 		return errorCorrect(r, leftCounts, rightCounts, counts, counts2, bb, bb2, tracker, bs, kmer, kmer2);
1313 	}
1314 
hasErrorsFast(byte[] bases, Kmer kmer)1315 	boolean hasErrorsFast(byte[] bases, Kmer kmer){
1316 		if(bases.length<=kbig){return false;}
1317 		int prev=-99;
1318 
1319 		kmer.clearFast();
1320 		final int incr=Tools.mid(1, kbig/2, 9), mcc=minCountCorrect();
1321 		for(int i=0, next=kbig-1; i<bases.length; i++){
1322 			kmer.addRight(bases[i]);
1323 			if(i+1>=kbig){
1324 				if(kmer.len()<kbig){
1325 					assert(new String(bases).indexOf('N')>=0);
1326 					return true;
1327 				}
1328 				if(i==next){
1329 					assert(kmer.len()>=kbig);
1330 					int count=getCount(kmer);
1331 					final int min=Tools.min(count, prev), max=Tools.max(count, prev);
1332 					if(prev>-99 && (count<mcc || (i>0 && (isError(max+1, min-1))))){
1333 						return true;
1334 					}
1335 					prev=count;
1336 					next=Tools.min(bases.length-1, next+incr);
1337 				}
1338 			}else{
1339 				assert(kmer.len()<kbig);
1340 			}
1341 		}
1342 		return false;
1343 	}
1344 
1345 	public int errorCorrect(Read r, final int[] leftCounts, final int[] rightCounts, IntList counts, IntList counts2,
1346 			final ByteBuilder bb, final ByteBuilder bb2, final ErrorTracker tracker, final BitSet bs, final Kmer kmer, final Kmer regenKmer){
1347 
1348 //		verbose=r.numericID==0;
1349 
1350 		final byte[] bases=r.bases;
1351 		final byte[] quals=r.quality;
1352 		tracker.clear();
1353 		if(!r.containsUndefined() && !hasErrorsFast(bases, kmer)){return 0;}
1354 
1355 		int valid=tables.fillCounts(bases, counts, kmer);
1356 		if(valid<2){return 0;}
1357 		int possibleErrors=countErrors(counts, quals);
1358 		if(possibleErrors<0){return 0;}
1359 		final float expectedErrors=r.expectedErrors(true, r.length());
1360 		final Rollback roll=ECC_ROLLBACK ? new Rollback(r, counts) : null;
1361 
1362 		int correctedPincer=0;
1363 		int correctedTail=0;
1364 		int correctedBrute=0;
1365 		int correctedReassemble=0;
1366 
1367 		if(ECC_PINCER){
1368 			correctedPincer+=errorCorrectPincer(bases, quals, leftCounts, rightCounts, counts, bb, tracker, errorExtensionPincer, kmer);
1369 		}
1370 
1371 		if(ECC_TAIL || ECC_ALL){
1372 			int start=(ECC_ALL ? 0 : counts.size-kbig-1);
1373 //			if(ECC_PINCER && tracker!=null && tracker.detected>correctedPincer){start=start-kbig;}
1374 			correctedTail+=errorCorrectTail(bases, quals, leftCounts, rightCounts, counts, bb, tracker, start, errorExtensionTail, kmer);
1375 			r.reverseComplement();
1376 
1377 			counts.reverse();
1378 			correctedTail+=errorCorrectTail(bases, quals, leftCounts, rightCounts, counts, bb, tracker, start, errorExtensionTail, kmer);
1379 			r.reverseComplement();
1380 			counts.reverse();
1381 		}
1382 
1383 		if(ECC_REASSEMBLE){
1384 			if((correctedPincer<1 && correctedTail<1) || countErrors(counts, quals)>0){
1385 				correctedReassemble=reassemble(bases, quals, rightCounts, counts, counts2, tracker, errorExtensionReassemble, bb, bb2, kmer, regenKmer, bs);
1386 			}
1387 		}
1388 
1389 		assert(correctedPincer+correctedTail+correctedReassemble+correctedBrute==tracker.corrected())
1390 			: correctedPincer+", "+correctedTail+", "+correctedReassemble+", "+correctedBrute+", "+tracker;
1391 
1392 		if(ECC_ROLLBACK && (tracker.corrected()>0 || tracker.rollback)){
1393 
1394 			if(!tracker.rollback && quals!=null && tracker.corrected()>3){
1395 				float mult=Tools.max(1, 0.5f*(0.5f+0.01f*r.length()));//1 for a 150bp read.
1396 				if(countErrors(counts, quals)>0 && tracker.corrected()>mult+expectedErrors){tracker.rollback=true;}
1397 				else if(tracker.corrected()>2.5f*mult+expectedErrors){tracker.rollback=true;}
1398 			}
1399 
1400 //			boolean printed=false;
1401 			IntList counts0=roll.counts0;
1402 			for(int i=0; !tracker.rollback && i<counts.size; i++){
1403 				int a=Tools.max(0, counts0.get(i));
1404 				int b=Tools.max(0, counts.get(i));
1405 //				assert(b+1>=a) : "Z: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts;
1406 				if(b<a-1 && !isSimilar(a, b)){
1407 //					assert(false) : "Y: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts;
1408 					if(verbose){outstream.println("Y: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts);}
1409 					tracker.rollback=true;
1410 				}
1411 //				else if(b<a-1 && !printed){
1412 //					assert(false);
1413 //					if(verbose){outstream.println("X: RID="+r.numericID+"; "+a+"->"+b+"\n"+counts0+"\n"+counts);}
1414 //					printed=true;
1415 //				}
1416 			}
1417 
1418 			if(tracker.rollback){
1419 				roll.rollback(r, counts);
1420 				tracker.clearCorrected();
1421 				return 0;
1422 			}
1423 		}
1424 
1425 		if(MARK_BAD_BASES>0 && (!MARK_ERROR_READS_ONLY || countErrors(counts, quals)>0 ||
1426 				r.expectedErrors(false, r.length())>3)){
1427 			int marked=markBadBases(bases, quals, counts, bs, MARK_BAD_BASES, MARK_DELTA_ONLY, MARK_QUALITY);
1428 			tracker.marked=marked;
1429 		}
1430 
1431 		return tracker.corrected();
1432 	}
1433 
errorCorrectPincer(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension, final Kmer kmer)1434 	public int errorCorrectPincer(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1435 			final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int errorExtension, final Kmer kmer){
1436 
1437 		int detected=0;
1438 		int corrected=0;
1439 
1440 		//a is the index of the left kmer
1441 		//b is a+1
1442 		//c is d-1
1443 		//d is the index of the right kmer
1444 		//the base between the kmers is at a+k
1445 		for(int a=0, d=kbig+1; d<counts.size; a++, d++){
1446 			final int aCount=counts.get(a);
1447 			final int bCount=counts.get(a+1);
1448 			final int cCount=counts.get(d-1);
1449 			final int dCount=counts.get(d);
1450 			final byte qb=(quals==null ? 20 : quals[a+kbig]);
1451 			if(isError(aCount, bCount, qb) && isError(dCount, cCount, qb) && isSimilar(aCount, dCount)){
1452 				if(verbose){
1453 					outstream.println("Found error: "+aCount+", "+bCount+", "+cCount+", "+dCount);
1454 				}
1455 				//Looks like a 1bp substitution; attempt to correct.
1456 				detected++;
1457 				int ret=correctSingleBasePincer(a, d, bases, quals, leftBuffer, rightBuffer, counts, bb, errorExtension, kmer);
1458 				corrected+=ret;
1459 				if(verbose){
1460 					outstream.println("Corrected error.");
1461 				}
1462 			}else{
1463 				if(verbose){
1464 					outstream.println("Not an error: "+aCount+", "+bCount+", "+cCount+", "+dCount+
1465 							";  "+isError(aCount, bCount, qb)+", "+isError(dCount, cCount, qb)+", "+isSimilar(aCount, dCount));
1466 				}
1467 			}
1468 		}
1469 
1470 //		if(detected==0 && counts.get(0)>2 && counts.get(counts.size-1)>2){
1471 //			assert(!verbose);
1472 //			verbose=true;
1473 //			outstream.println("\n"+counts);
1474 //			errorCorrectPincer(bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, tracker);
1475 //			assert(false);
1476 //		}
1477 
1478 		{
1479 			tracker.detectedPincer+=detected;
1480 			tracker.correctedPincer+=corrected;
1481 		}
1482 
1483 		return corrected;
1484 	}
1485 
errorCorrectTail(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int startPos, final int errorExtension, final Kmer kmer)1486 	public int errorCorrectTail(final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1487 			final IntList counts, final ByteBuilder bb, final ErrorTracker tracker, final int startPos, final int errorExtension, final Kmer kmer){
1488 		if(bases.length<kbig+2+errorExtension+deadZone){return 0;}
1489 		int detected=0;
1490 		int corrected=0;
1491 
1492 		//a is the index of the left kmer
1493 		//b is a+1
1494 		//the base between the kmers is at a+k
1495 		for(int a=Tools.max(startPos, errorExtension), lim=counts.size-deadZone-1; a<lim; a++){//errorExtension-1
1496 			final int aCount=counts.get(a);
1497 			final int bCount=counts.get(a+1);
1498 			final byte qb=(quals==null ? 20 : quals[a+kbig]);
1499 			if(isError(aCount, bCount, qb) && isSimilar(aCount, a-errorExtension, a-1, counts) && isError(aCount, a+2, a+kbig, counts)){
1500 				if(verbose){
1501 					outstream.println("Found error: "+aCount+", "+bCount);
1502 				}
1503 				//Assume like a 1bp substitution; attempt to correct.
1504 				detected++;
1505 				int ret=correctSingleBaseRight(a, bases, quals, leftBuffer, rightBuffer, counts, bb, errorExtension, kmer);
1506 				corrected+=ret;
1507 				if(verbose){
1508 					outstream.println("Corrected error.");
1509 				}
1510 			}else{
1511 				if(verbose){
1512 					outstream.println("Not an error: "+aCount+", "+bCount+
1513 							";  "+isError(aCount, bCount, qb)+", "+isSimilar(aCount, a-errorExtension, a-1, counts)+", "+isError(aCount, a+2, a+kbig, counts));
1514 				}
1515 			}
1516 		}
1517 
1518 //		if(detected==0 && counts.get(0)>2 && counts.get(counts.size-1)>2){
1519 //			assert(!verbose);
1520 //			verbose=true;
1521 //			outstream.println("\n"+counts);
1522 //			errorCorrectPincer(bases, quals, leftBuffer, rightBuffer, kmers, counts, bb, tracker);
1523 //			assert(false);
1524 //		}
1525 
1526 		{
1527 			tracker.detectedTail+=detected;
1528 			tracker.correctedTail+=corrected;
1529 		}
1530 
1531 		return corrected;
1532 	}
1533 
1534 	@Override
reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts, final int errorExtension, final Kmer kmer, final Kmer regenKmer)1535 	public int reassemble_inner(final ByteBuilder bb, final byte[] quals, final int[] rightCounts, final IntList counts,
1536 			final int errorExtension, final Kmer kmer, final Kmer regenKmer){
1537 		final int length=bb.length();
1538 		if(length<kbig+1+deadZone){return 0;}
1539 		final byte[] bases=bb.array;
1540 		int detected=0;
1541 		int corrected=0;
1542 
1543 		//Initialize the kmer
1544 		kmer.clear();
1545 
1546 		//a is the index of the first base of the left kmer
1547 		//b=a+1 is the index of the next base
1548 		//ca=a-kbig is the index of the next base
1549 		//cb=a-kbig is the index of the next base
1550 		//the base between the kmers is at a+k
1551 		for(int a=0, lim=length-deadZone-1; a<lim; a++){
1552 			//Update the kmer
1553 			kmer.addRight(bases[a]);
1554 
1555 			if(verbose){
1556 				outstream.println("kmer.len(): "+kmer.len()+" vs "+kbig+"; a="+a);
1557 			}
1558 
1559 			if(kmer.len()>=kbig){
1560 
1561 				final int b=a+1;
1562 				final int ca=a-kbig+1;
1563 				final int cb=ca+1;
1564 
1565 				final int aCount=counts.get(ca);
1566 				final int bCount=counts.get(cb);
1567 				final byte qb=(quals==null ? 20 : quals[b]);
1568 
1569 				if(verbose){
1570 					outstream.println("ca="+ca+", cb="+cb+"; aCount="+aCount+", bCount="+bCount);
1571 					outstream.println(isError(aCount, bCount, qb)+", "+isSimilar(aCount, ca-errorExtension, ca-1, counts)+
1572 							", "+isError(aCount, ca+2, ca+kbig, counts));
1573 				}
1574 
1575 //				if(isError(aCount, bCount) && isSimilar(aCount, ca-errorExtension, ca-1, counts) && isError(aCount, ca+2, ca+kbig, counts)){
1576 				if(isSubstitution(ca, errorExtension, qb, counts)){
1577 					if(verbose){
1578 						outstream.println("***Found error: "+aCount+", "+bCount);
1579 					}
1580 					//Assume like a 1bp substitution; attempt to correct.
1581 
1582 					int rightMaxPos=fillRightCounts(kmer, rightCounts);
1583 					int rightMax=rightCounts[rightMaxPos];
1584 					int rightSecondPos=Tools.secondHighestPosition(rightCounts);
1585 					int rightSecond=rightCounts[rightSecondPos];
1586 
1587 					byte base=bases[b];
1588 					byte num=AminoAcid.baseToNumber[base];
1589 
1590 					if(rightMax>=minCountExtend){
1591 						detected++;
1592 						if(num==rightMax){
1593 							detected--;
1594 //							bases2[b]=base;
1595 						}else if((isError(rightMax, rightSecond, qb) || !isJunction(rightMax, rightSecond)) && isSimilar(aCount, rightMax)){
1596 							bases[b]=AminoAcid.numberToBase[rightMaxPos];
1597 							corrected++;
1598 							tables.regenerateCounts(bases, counts, ca, regenKmer);
1599 							if(verbose){outstream.println("Corrected error: "+num+"->"+rightMaxPos+". New counts:\n"+counts);}
1600 						}
1601 
1602 //						else if(rightSecond>=minCountExtend && isJunction(rightMax, rightSecond) && isSimilar(aCount, rightSecond)
1603 //								&& !isSimilar(aCount, rightMax)){//This branch may not be very safe.
1604 //							bases2[b]=AminoAcid.numberToBase[rightSecondPos];
1605 //							corrected++;
1606 //							if(verbose){outstream.println("Corrected error.");}
1607 //						}
1608 					}
1609 
1610 				}else{
1611 					if(verbose){
1612 						outstream.println("Not an error: "+aCount+", "+bCount+
1613 								";  "+isError(aCount, bCount, qb)+", "+isSimilar(aCount, a-errorExtension, a-1, counts)+", "+isError(aCount, a+2, a+kbig, counts));
1614 					}
1615 				}
1616 			}
1617 		}
1618 
1619 		return corrected;
1620 	}
1621 
correctSingleBasePincer(final int a, final int d, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final int errorExtension, final Kmer kmer0)1622 	private int correctSingleBasePincer(final int a, final int d, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1623 			final IntList counts, final ByteBuilder bb, final int errorExtension, final Kmer kmer0){
1624 		final byte leftReplacement, rightReplacement;
1625 		final int loc=a+kbig;
1626 		{
1627 			bb.clear();
1628 			Kmer kmer=getKmer(bases, a, kmer0);
1629 			if(kmer==null){return 0;}
1630 			int extension=extendToRight2_inner(bb, null, rightBuffer, errorExtension, true, kmer);
1631 			if(extension<errorExtension){return 0;}
1632 			for(int i=1; i<extension; i++){
1633 				if(bb.get(i)!=bases[loc+i]){return 0;}
1634 			}
1635 			leftReplacement=bb.get(0);
1636 		}
1637 		{
1638 			bb.clear();
1639 			Kmer kmer=getKmer(bases, d, kmer0);
1640 			if(kmer==null){return 0;}
1641 			kmer.rcomp();
1642 			int extension=extendToRight2_inner(bb, null, rightBuffer, errorExtension, true, kmer);
1643 			if(extension<errorExtension){return 0;}
1644 			bb.reverseComplementInPlace();
1645 			for(int i=0; i<extension-1; i++){
1646 				if(bb.get(i)!=bases[loc+i+1-extension]){return 0;}
1647 			}
1648 			rightReplacement=bb.get(extension-1);
1649 		}
1650 		if(leftReplacement!=rightReplacement){return 0;}
1651 		if(bases[loc]==leftReplacement){return 0;}
1652 		if(!isSimilar(bases, a, leftReplacement, counts, kmer0)){return 0;}
1653 
1654 		bases[loc]=leftReplacement;
1655 		assert(d==a+kbig+1);
1656 		tables.regenerateCounts(bases, counts, a, kmer0);
1657 		return 1;
1658 	}
1659 
correctSingleBaseRight(final int a, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer, final IntList counts, final ByteBuilder bb, final int errorExtension0, final Kmer kmer0)1660 	private int correctSingleBaseRight(final int a, final byte[] bases, final byte[] quals, final int[] leftBuffer, final int[] rightBuffer,
1661 			final IntList counts, final ByteBuilder bb, final int errorExtension0, final Kmer kmer0){
1662 		final byte leftReplacement;
1663 		final int loc=a+kbig;
1664 		final int errorExtension=Tools.min(errorExtension0, bases.length-loc);
1665 		{
1666 			bb.clear();
1667 			Kmer kmer=getKmer(bases, a, kmer0);
1668 			if(kmer==null){return 0;}
1669 			int extension=extendToRight2_inner(bb, null, rightBuffer, errorExtension, true, kmer);
1670 			if(extension<errorExtension){return 0;}
1671 			for(int i=1; i<extension; i++){
1672 				if(bb.get(i)!=bases[loc+i]){
1673 					return 0;
1674 				}
1675 			}
1676 			leftReplacement=bb.get(0);
1677 		}
1678 
1679 		if(bases[loc]==leftReplacement){return 0;}
1680 		if(!isSimilar(bases, a, leftReplacement, counts, kmer0)){return 0;}
1681 
1682 		bases[loc]=leftReplacement;
1683 		tables.regenerateCounts(bases, counts, a, kmer0);
1684 		return 1;
1685 	}
1686 
isSimilar(byte[] bases, int a, byte newBase, IntList counts, final Kmer kmer0)1687 	private final boolean isSimilar(byte[] bases, int a, byte newBase, IntList counts, final Kmer kmer0){
1688 		Kmer kmer=getKmer(bases, a, kmer0);
1689 		if(kmer==null){
1690 			assert(false); //Should never happen
1691 			return false;
1692 		}
1693 		kmer.addRight(newBase);
1694 		int count=getCount(kmer);
1695 		int aCount=counts.get(a);
1696 		boolean similar=isSimilar(aCount, count);
1697 		return similar;
1698 	}
1699 
1700 	/*--------------------------------------------------------------*/
1701 	/*----------------  Inherited Abstract Methods  ----------------*/
1702 	/*--------------------------------------------------------------*/
1703 
1704 	@Override
makeKhist()1705 	final void makeKhist(){
1706 		tables.makeKhist(outHist, histColumns, histMax, histHeader, histZeros, true, smoothHist, gcHist, false, 0.01, 1, 1);
1707 	}
1708 	@Override
dumpKmersAsText()1709 	final void dumpKmersAsText(){
1710 		tables.dumpKmersAsBytes_MT(outKmers, minToDump, maxToDump, true, null);
1711 	}
1712 
1713 	/*--------------------------------------------------------------*/
1714 	/*----------------            Fields            ----------------*/
1715 	/*--------------------------------------------------------------*/
1716 
1717 	@Override
tables()1718 	public final KmerTableSetU tables(){return tables;}
1719 	public final KmerTableSetU tables;
1720 
1721 	/** Normal kmer length */
1722 	final int ksmall;
1723 
1724 }
1725