1 package align2;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.HashMap;
6 
7 import dna.AminoAcid;
8 import dna.Data;
9 import shared.KillSwitch;
10 import shared.Shared;
11 import shared.Tools;
12 import stream.SiteScore;
13 import structures.LongM;
14 
15 
16 /**
17  * Based on Index11f
18  * Index stored in single array per block.
19  *
20  *
21  *
22  * @author Brian Bushnell
23  * @date Dec 22, 2012
24  *
25  */
26 public final class BBIndex extends AbstractIndex {
27 
28 
main(String[] args)29 	public static void main(String[] args){
30 
31 		int k=13;
32 
33 		for(int i=0; i<args.length; i++){
34 			String s=args[i].toLowerCase();
35 			if(s.contains("=")){
36 				String[] split=s.split("=");
37 				String a=split[0];
38 				String b=split[1];
39 				if(a.equals("build") || a.equals("b")){
40 					Data.setGenome(Integer.parseInt(b));
41 				}else if(a.equals("minchrom")){
42 					MINCHROM=Integer.parseInt(b);
43 				}else if(a.equals("maxchrom")){
44 					MAXCHROM=Integer.parseInt(b);
45 				}else if(a.equals("keylen") || a.equals("k")){
46 					k=Integer.parseInt(b);
47 				}
48 			}
49 		}
50 
51 		if(MINCHROM==-1){MINCHROM=1;}
52 		if(MAXCHROM==-1){
53 			assert(Data.numChroms<=Byte.MAX_VALUE) : "TODO";
54 			MAXCHROM=Data.numChroms;
55 		}
56 
57 
58 		System.err.println("Writing build "+Data.GENOME_BUILD+" "+
59 				"BASESPACE index, keylen="+k+", chrom bits="+NUM_CHROM_BITS);
60 
61 
62 		int first=(NUM_CHROM_BITS==0 ? 1 : 0);
63 
64 
65 		Data.sysout.println("Loading index for chunk "+first+"-"+MAXCHROM+", build "+Data.GENOME_BUILD);
66 		index=IndexMaker4.makeIndex(Data.GENOME_BUILD, first, MAXCHROM,
67 				k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, true, false, index);
68 
69 
70 		System.err.println("Finished all chroms, may still be writing.");
71 	}
72 
73 
BBIndex(int k_, int minChrom_, int maxChrom_, int kfilter_, MSA msa_)74 	public BBIndex(int k_, int minChrom_, int maxChrom_, int kfilter_, MSA msa_){
75 		super(k_, kfilter_, BASE_HIT_SCORE, minChrom_, maxChrom_, msa_);
76 		INV_BASE_KEY_HIT_SCORE=1f/BASE_KEY_HIT_SCORE;
77 		INDEL_PENALTY=(BASE_KEY_HIT_SCORE/2)-1; //default (HIT_SCORE/2)-1
78 		INDEL_PENALTY_MULT=20; //default 20; penalty for indel length
79 		MAX_PENALTY_FOR_MISALIGNED_HIT=BASE_KEY_HIT_SCORE-(1+BASE_KEY_HIT_SCORE/8);
80 		SCOREZ_1KEY=Z_SCORE_MULT*KEYLEN;
81 		{
82 			int cyc=0;
83 			for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){cyc+=2;}
84 			cycles=cyc;
85 		}
86 		prescoreArray=new int[cycles];
87 		precountArray=new int[cycles];
88 	}
89 
90 	/** Load or generate index from minChrom to maxChrom, inclusive, with keylength k.
91 	 * This range can encompass multiple blocks.
92 	 * Should only be called once in a process. */
loadIndex(int minChrom, int maxChrom, int k, boolean writeToDisk, boolean diskInvalid)93 	public static final synchronized void loadIndex(int minChrom, int maxChrom, int k, boolean writeToDisk, boolean diskInvalid){
94 		if(minChrom<1){minChrom=1;}
95 		if(maxChrom>Data.numChroms){maxChrom=Data.numChroms;}
96 		assert(minChrom<=maxChrom) : minChrom+", "+maxChrom+"\nThe reference file appears to be empty.";
97 		Data.sysout.println("Loading index for chunk "+minChrom+"-"+maxChrom+", build "+Data.GENOME_BUILD);
98 		index=IndexMaker4.makeIndex(Data.GENOME_BUILD, minChrom, maxChrom,
99 				k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, writeToDisk, diskInvalid, index);
100 	}
101 
102 	/** Calculate statistics of index, such as list lengths, and find clumpy keys */
analyzeIndex(int minChrom, int maxChrom, float fractionToExclude, int k)103 	public static final synchronized void analyzeIndex(int minChrom, int maxChrom, float fractionToExclude, int k){
104 		assert(lengthHistogram==null);
105 		assert(COUNTS==null);
106 
107 		int KEYSPACE=1<<(2*k);
108 		COUNTS=new int[KEYSPACE];
109 		maxChrom=maxChrom(maxChrom);
110 
111 		HashMap<Integer, LongM> cmap=new HashMap<Integer, LongM>();
112 
113 		for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
114 			Block b=index[chrom];
115 			final int[] sites=b.sites;
116 			final int[] starts=b.starts;
117 
118 			for(int key=0; key<KEYSPACE; key++){
119 
120 				long clumps=0;
121 
122 				final int start1=starts[key];
123 				final int stop1=starts[key+1];
124 				final int len1=stop1-start1;
125 				COUNTS[key]=Tools.min(Integer.MAX_VALUE, COUNTS[key]+len1);
126 
127 				if(REMOVE_CLUMPY){
128 					for(int i=start1+1; i<stop1; i++){
129 						int dif=sites[i]-sites[i-1];
130 						assert(dif!=0);
131 						if(dif>0 && dif<=CLUMPY_MAX_DIST){
132 							clumps++;
133 						}
134 					}
135 					if(clumps>0){
136 						final int x=Tools.min(key, AminoAcid.reverseComplementBinaryFast(key, k));
137 						final Integer ko=x;
138 						LongM lm=cmap.get(ko);
139 						if(lm==null){
140 							lm=new LongM(0);
141 							cmap.put(ko, lm);
142 						}
143 						lm.increment(clumps);
144 					}
145 				}
146 			}
147 		}
148 
149 		for(int key=0; key<COUNTS.length; key++){
150 			int rkey=AminoAcid.reverseComplementBinaryFast(key, k);
151 			if(key<rkey){
152 				int x=(int)Tools.min(Integer.MAX_VALUE, COUNTS[key]+(long)COUNTS[rkey]);
153 				COUNTS[key]=COUNTS[rkey]=x;
154 			}
155 		}
156 
157 		if(REMOVE_CLUMPY){
158 			Integer[] keys=cmap.keySet().toArray(new Integer[cmap.size()]);
159 			Arrays.sort(keys);
160 
161 			for(Integer key : keys){
162 				long clumps=cmap.get(key).value();
163 				long len=COUNTS[key];
164 				if((len>CLUMPY_MIN_LENGTH_INDEX && clumps>CLUMPY_FRACTION*len)/* || (len>8*CLUMPY_MIN_LENGTH_INDEX && clumps>.75f*CLUMPY_FRACTION*len)*/){
165 					int rkey=AminoAcid.reverseComplementBinaryFast(key, k);
166 					assert(key<=rkey);
167 					assert(key==KeyRing.reverseComplementKey(rkey, k));
168 					COUNTS[key]=0;
169 					COUNTS[rkey]=0;
170 				}
171 			}
172 		}
173 
174 		lengthHistogram=Tools.makeLengthHistogram3(COUNTS, 1000, verbose2);
175 
176 		//if(verbose2){System.err.println("lengthHistogram: "+Arrays.toString(lengthHistogram));}
177 
178 		if(REMOVE_FREQUENT_GENOME_FRACTION){
179 
180 			int lengthLimitIndex=(int)((1-fractionToExclude)*(lengthHistogram.length-1));
181 			int lengthLimitIndex2=(int)((1-fractionToExclude*DOUBLE_SEARCH_THRESH_MULT)*(lengthHistogram.length-1));
182 
183 			MAX_USABLE_LENGTH=Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex]);
184 			MAX_USABLE_LENGTH2=Tools.max(6*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex2]);
185 
186 			if(verbose2){System.err.println("MAX_USABLE_LENGTH:  "+MAX_USABLE_LENGTH+"\nMAX_USABLE_LENGTH2: "+MAX_USABLE_LENGTH2);}
187 		}
188 
189 		Solver.POINTS_PER_SITE=(int)Math.floor((Solver.BASE_POINTS_PER_SITE*4000f)/Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH]));
190 		if(Solver.POINTS_PER_SITE==0){Solver.POINTS_PER_SITE=-1;}
191 		if(verbose2){System.err.println("POINTS_PER_SITE:  "+Solver.POINTS_PER_SITE);}
192 		assert(Solver.POINTS_PER_SITE<0) : Solver.POINTS_PER_SITE;
193 	}
194 
195 
196 	/** Returns the filename for the block holding this chrom */
197 	public static final String fname(int chrom, int k){
198 		return IndexMaker4.fname(minChrom(chrom), maxChrom(chrom), k, NUM_CHROM_BITS);
199 	}
200 
201 	/** Ensure key offsets are strictly ascending. */
202 	private static boolean checkOffsets(int[] offsets){
203 		for(int i=1; i<offsets.length; i++){
204 			if(offsets[i]<=offsets[i-1]){return false;}
205 		}
206 		return true;
207 	}
208 
209 	@Deprecated
210 	private final int trimExcessHitLists(int[] keys, int[][] hits){
211 
212 		assert(false) : "Needs to be redone because hits are no longer sorted by length.";
213 
214 		assert(hits.length==keys.length);
215 //		assert(false) : "modify this function so that it gives more weight to trimming lists over highly covered baits";
216 		//And also, incorporate the "remove the longest list" function
217 
218 		final int limit=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH])*keys.length;
219 		final int limit2=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH2]);
220 		final int limit3=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_SHORTEST_LIST_TO_SEARCH]);
221 
222 		int sum=0;
223 		int initialHitCount=0;
224 
225 		int shortest=Integer.MAX_VALUE-1;
226 		int shortest2=Integer.MAX_VALUE;
227 
228 		for(int i=0; i<keys.length; i++){
229 			int key=keys[i];
230 			int x=count(key);
231 			sum+=x;
232 			initialHitCount+=(x==0 ? 0 : 1);
233 			if(x>0){
234 				if(x<shortest2){
235 					shortest2=x;
236 					if(shortest2<shortest){
237 						shortest2=shortest;
238 						shortest=x;
239 					}
240 				}
241 			}
242 		}
243 		assert(shortest2>=shortest);
244 		if(initialHitCount<MIN_APPROX_HITS_TO_KEEP){return initialHitCount;}
245 		if(shortest>limit3 && !SLOW){
246 			for(int i=0; i<hits.length; i++){hits[i]=null;}
247 			return 0;
248 		}
249 		if(sum<=limit && sum/initialHitCount<=limit2){return initialHitCount;}
250 
251 		Pointer[] ptrs=Pointer.loadMatrix(hits);
252 //		ptrs[0].value/=2;
253 //		ptrs[ptrs.length-1].value/=2;
254 		Arrays.sort(ptrs);
255 
256 		int finalHitCount=initialHitCount;
257 		for(int i=ptrs.length-1; sum>limit || sum/finalHitCount>limit2; i--){
258 			Pointer p=ptrs[i];
259 			sum-=hits[p.key].length;
260 			hits[p.key]=null;
261 			finalHitCount--;
262 		}
263 
264 		return finalHitCount;
265 	}
266 
267 	/** Remove least useful keys to accelerate search */
268 	private final int trimExcessHitListsByGreedy(int[] offsets, int[] keyScores, int maxHitLists, int[] keys){
269 
270 		float[] keyWeights=getKeyWeightArray(keyScores.length);
271 		for(int i=0; i<keyScores.length; i++){
272 			keyWeights[i]=keyScores[i]*INV_BASE_KEY_HIT_SCORE;
273 		}
274 
275 //		assert(false) : "modify this function so that it gives more weight to trimming lists over highly covered baits";
276 		//And also, incorporate the "remove the longest list" function
277 
278 		//Remove lists until the length sum is below this value
279 		final int limit=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH])*keys.length;
280 		final int limit2=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH2]);
281 
282 		//Do not search if the shortest list is longer than this
283 		final int limit3=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_SHORTEST_LIST_TO_SEARCH]);
284 //		final int limitS=lengthHistogram[chrom][MAX_SINGLE_LIST_TO_SEARCH];
285 
286 		int sum=0;
287 		int initialHitCount=0;
288 
289 		//Shortest list found
290 		int shortest=Integer.MAX_VALUE-1;
291 		//Second shortest list found
292 		int shortest2=Integer.MAX_VALUE;
293 
294 //		for(int i=0; i<hits.length; i++){
295 //			if(hits[i]!=null && hits[i].length>limitS){hits[i]=null;}
296 //		}
297 
298 		final int[] lengths=getGenericArray(keys.length);
299 
300 		//Find shortest lists
301 		for(int i=0; i<keys.length; i++){
302 			int key=keys[i];
303 			int x=count(key);
304 			lengths[i]=x;
305 			sum+=x;
306 			initialHitCount+=(x==0 ? 0 : 1);
307 			if(x>0){
308 				if(x<shortest2){
309 					shortest2=x;
310 					if(shortest2<shortest){
311 						shortest2=shortest;
312 						shortest=x;
313 					}
314 				}
315 			}
316 		}
317 //		assert(false) : limit+", "+limit2+", "+limit3+", "+shortest2+", "+shortest+", "+initialHitCount+", "+MIN_APPROX_HITS_TO_KEEP+"\n"+Arrays.toString(lengths);
318 		assert(shortest2>=shortest);
319 		if(initialHitCount<MIN_APPROX_HITS_TO_KEEP){return initialHitCount;}
320 
321 		//Don't search if the shortest list is too long
322 		if(shortest>limit3 && !SLOW){
323 			for(int i=0; i<keys.length; i++){keys[i]=-1;}
324 			return 0;
325 		}
326 
327 		int hitsCount=initialHitCount;
328 		int worstValue=Integer.MIN_VALUE;
329 
330 		while(hitsCount>=MIN_APPROX_HITS_TO_KEEP && (sum>limit || sum/initialHitCount>limit2 || hitsCount>maxHitLists/* || worstValue<0*/)){
331 			final int[] lists=getGreedyListArray(hitsCount);
332 			for(int i=0, j=0; j<lists.length; i++){
333 				if(lengths[i]>0){
334 					lists[j]=i;
335 					j++;
336 				}
337 			}
338 
339 			Solver.findWorstGreedy(offsets, lengths, keyWeights, KEYLEN, lists, greedyReturn);
340 			int worstIndex=greedyReturn[0];
341 			int worst=lists[worstIndex];
342 			worstValue=greedyReturn[1];
343 			sum-=lengths[worst];
344 
345 //			if(worstValue>0 && (hitsCount<=maxHitLists || lengths[worst]<excessListLimit)){return hitsCount;}
346 			if(worstValue>0 || lengths[worst]<SMALL_GENOME_LIST){return hitsCount;} //This line increases accuracy at expense of speed.  Lower constant = more accurate, default 0.
347 			hitsCount--;
348 			lengths[worst]=0;
349 			keys[worst]=-1;
350 		}
351 		return hitsCount;
352 	}
353 
354 
355 	private final int getHits(final int[] keys, final int chrom, final int maxLen, final int[] starts, final int[] stops){
356 		int numHits=0;
357 		final Block b=index[chrom];
358 		for(int i=0; i<keys.length; i++){
359 			final int key=keys[i];
360 			starts[i]=-1;
361 			stops[i]=-1;
362 			if(key>=0){
363 				final int len=count(key);
364 				if(len>0 && len<maxLen){
365 					final int len2=b.length(key);
366 					if(len2>0){
367 						starts[i]=b.starts[key];
368 						stops[i]=starts[i]+len2;
369 						numHits++;
370 					}
371 				}
372 			}
373 		}
374 		return numHits;
375 	}
376 
377 
378 	private final int countHits(final int[] keys, final int maxLen, boolean clearBadKeys){
379 		int numHits=0;
380 		for(int i=0; i<keys.length; i++){
381 			final int key=keys[i];
382 			if(key>=0){
383 				final int len=count(key);
384 //				System.err.println(len);
385 				if(len>0 && len<maxLen){
386 					numHits++;
387 				}else if(clearBadKeys){
388 					keys[i]=-1;
389 				}
390 			}
391 		}
392 		return numHits;
393 	}
394 
395 
396 	@Override
397 	public final ArrayList<SiteScore> findAdvanced(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsets, long id){
398 		assert(minChrom<=maxChrom && minChrom>=0);
399 		ArrayList<SiteScore> result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, true, id);
400 		if(DOUBLE_SEARCH_NO_HIT && (result==null || result.isEmpty())){result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, false, id);}
401 
402 		return result;
403 	}
404 
405 
406 	public final ArrayList<SiteScore> find(byte[] basesP, byte[] basesM, byte[] qual,  byte[] baseScoresP, int[] keyScoresP, int[] offsetsP, boolean obeyLimits, long id){
407 
408 		assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
409 		final int[] keysOriginal=KeyRing.makeKeys(basesP, offsetsP, KEYLEN);
410 		int[] keysP=Arrays.copyOf(keysOriginal, keysOriginal.length);
411 
412 		initialKeys+=offsetsP.length;
413 		initialKeyIterations++;
414 
415 		final int maxLen=(obeyLimits ? MAX_USABLE_LENGTH : MAX_USABLE_LENGTH2);
416 
417 		int numHits=0;
418 		numHits=countHits(keysP, maxLen, true);
419 
420 		if(verbose){
421 			System.err.println("initial hits: "+numHits);
422 			System.err.println("initial keys: "+keysP.length+"\n"+Arrays.toString(keysP));
423 		}
424 
425 		if(numHits>0){ //TODO: Change these to higher numbers
426 			int trigger=(3*keysP.length)/4;
427 			if(numHits<4 && numHits<trigger){
428 				for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];}
429 				numHits=countHits(keysP, (maxLen*3)/2, true);
430 			}
431 			if(numHits<3 && numHits<trigger){
432 				for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];}
433 				numHits=countHits(keysP, maxLen*2, true);
434 			}
435 			if(numHits<3 && numHits<trigger){
436 				for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];}
437 				numHits=countHits(keysP, maxLen*3, true);
438 			}
439 			if(numHits<2 && numHits<trigger){
440 				for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];}
441 				numHits=countHits(keysP, maxLen*5, true);
442 			}
443 		}
444 //		assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
445 		if(numHits<keysP.length){
446 			int[][] r=shrink2(offsetsP, keysP, keyScoresP);
447 			assert(r!=null);
448 			if(r!=null){
449 				offsetsP=r[0];
450 				keysP=r[1];
451 				keyScoresP=r[2];
452 			}
453 		}else{
454 			assert(shrink2(offsetsP, keysP, keyScoresP)==null);
455 		}
456 		initialKeys2+=numHits;
457 		//assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
458 
459 //		assert(checkOffsets(offsets)) : Arrays.toString(offsets);
460 		if(TRIM_BY_GREEDY && obeyLimits){
461 			int maxLists=Tools.max((int)(HIT_FRACTION_TO_RETAIN*keysP.length), MIN_HIT_LISTS_TO_RETAIN);
462 			numHits=trimExcessHitListsByGreedy(offsetsP, keyScoresP, maxLists, keysP);
463 		}
464 //		System.out.println("After greedy: numHits = "+numHits);
465 
466 		if(verbose){
467 			System.err.println("final hits: "+numHits);
468 			System.err.println("final keys: "+keysP.length+"\n"+Arrays.toString(keysP));
469 		}
470 
471 		if(TRIM_BY_TOTAL_SITE_COUNT && obeyLimits){
472 			throw new RuntimeException("Needs to be redone.");
473 //			numHits=trimExcessHitLists(keys, hits);
474 		}
475 
476 		if(TRIM_LONG_HIT_LISTS && obeyLimits && numHits>MIN_APPROX_HITS_TO_KEEP){
477 			int cutoffIndex=((int) (HIT_FRACTION_TO_RETAIN*(keysP.length)-0.01f))+(keysP.length-numHits);
478 
479 			int zeroes=keysP.length-numHits;
480 			int altMinIndex=(zeroes+(MIN_HIT_LISTS_TO_RETAIN-1));
481 			cutoffIndex=Tools.max(cutoffIndex, altMinIndex);
482 
483 			assert(cutoffIndex>0) : cutoffIndex+"\n"+numHits;
484 
485 			if(cutoffIndex<(keysP.length-1)){
486 				int[] lens=getGenericArray(keysP.length);
487 				for(int i=0; i<keysP.length; i++){lens[i]=count(keysP[i]);}
488 				Arrays.sort(lens);
489 				int cutoff=lens[cutoffIndex];
490 
491 				cutoff=Tools.max(lengthHistogram[MIN_INDEX_TO_DROP_LONG_HIT_LIST], cutoff);
492 
493 				int removed=0;
494 
495 				for(int i=0; i<keysP.length; i++){
496 					int key=keysP[i];
497 					if(count(key)>cutoff){
498 						keysP[i]=-1;
499 						removed++;
500 						numHits--;
501 					}
502 				}
503 			}
504 		}
505 //		assert(checkOffsets(offsets)) : Arrays.toString(offsets);
506 
507 		final ArrayList<SiteScore> result=new ArrayList<SiteScore>(8);
508 		if(numHits<MIN_APPROX_HITS_TO_KEEP){return result;}
509 		//assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
510 		if(numHits<keysP.length){
511 			int[][] r=shrink2(offsetsP, keysP, keyScoresP);
512 			assert(r!=null);
513 			if(r!=null){
514 				offsetsP=r[0];
515 				keysP=r[1];
516 				keyScoresP=r[2];
517 			}
518 		}else{
519 			assert(shrink2(offsetsP, keysP, keyScoresP)==null);
520 		}
521 		assert(keysP.length==numHits);
522 		//assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
523 		//Reverse the offsets for minus-strand mapping, since they are generated based on quality
524 		int[] offsetsM=KeyRing.reverseOffsets(offsetsP, KEYLEN, basesP.length);
525 		if(verbose){
526 			System.err.println("Reversed offsets: \n"+Arrays.toString(offsetsP)+"  ->\n"+Arrays.toString(offsetsM));
527 		}
528 		final int[] keysM=KeyRing.reverseComplementKeys(keysP, KEYLEN);
529 
530 //		assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP);
531 //		assert(checkOffsets(offsetsM)) : Arrays.toString(offsetsM);
532 
533 		assert(!USE_EXTENDED_SCORE || (baseScoresP!=null && (qual==null || baseScoresP.length==qual.length)));
534 		assert(keyScoresP!=null);
535 		assert(keyScoresP.length==offsetsP.length) : keyScoresP.length+", "+offsetsP.length+", "+Arrays.toString(keyScoresP);
536 		final byte[] baseScoresM=Tools.reverseAndCopy(baseScoresP, getBaseScoreArray(baseScoresP.length, 1));
537 		final int[] keyScoresM=Tools.reverseAndCopy(keyScoresP, getKeyScoreArray(keyScoresP.length, 1));
538 		final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP);
539 
540 		assert(offsetsM.length==offsetsP.length);
541 		assert(maxQuickScore==maxQuickScore(offsetsM, keyScoresM));
542 
543 		/*
544 		 * bestScores:
545 		 *
546 		 * bestScores[0]	currentTopScore
547 		 * bestScores[1]	maxHits
548 		 * bestScores[2]	qcutoff
549 		 * bestScores[3]	bestqscore
550 		 * bestScores[4]	maxQuickScore
551 		 * bestScores[5]	perfectsFound
552 		 */
553 		final int[] bestScores=KillSwitch.allocInt1D(6);
554 
555 		//This prevents filtering by qscore when a low-quality read only uses a few keys.
556 		//In that case, extending is more important.
557 		final boolean prescan_qscore=(PRESCAN_QSCORE && numHits>=5);
558 
559 		int[][] prescanResults=null;
560 		int[] precounts=null;
561 		int[] prescores=null;
562 
563 		int hitsCutoff=0;
564 		int qscoreCutoff=(int)(MIN_QSCORE_MULT*maxQuickScore);
565 
566 		boolean allBasesCovered=true;
567 		{
568 			if(offsetsP[0]!=0){allBasesCovered=false;}
569 			else if(offsetsP[offsetsP.length-1]!=(basesP.length-KEYLEN)){allBasesCovered=false;}
570 			else{
571 				for(int i=1; i<offsetsP.length; i++){
572 					if(offsetsP[i]>offsetsP[i-1]+KEYLEN){
573 						allBasesCovered=false;
574 						break;
575 					}
576 				}
577 			}
578 		}
579 
580 		//TODO I don't understand this logic
581 		final boolean pretendAllBasesAreCovered=//false;
582 					(allBasesCovered ||
583 					keysP.length>=keysOriginal.length-4 ||
584 					(keysP.length>=9 && (offsetsP[offsetsP.length-1]-offsetsP[0]+KEYLEN)>Tools.max(40, (int)(basesP.length*.75f))));
585 
586 //		System.err.println(allBasesCovered+"\t"+Arrays.toString(offsetsP));
587 //		assert(allBasesCovered);
588 
589 		if(prescan_qscore){
590 			prescanResults=prescanAllBlocks(bestScores,
591 					keysP, keyScoresP, offsetsP,
592 					keysM, keyScoresM, offsetsM,
593 					pretendAllBasesAreCovered);
594 
595 			if(prescanResults!=null){
596 				precounts=prescanResults[0];
597 				prescores=prescanResults[1];
598 			}
599 
600 			if(bestScores[1]<MIN_APPROX_HITS_TO_KEEP){return result;}
601 			if(bestScores[3]<maxQuickScore*MIN_QSCORE_MULT2){return result;} //if(bestScores[3]<maxQuickScore(offsetsP, keyScoresP)*.10f){return result;}
602 
603 			if(bestScores[3]>=maxQuickScore && pretendAllBasesAreCovered){
604 				assert(bestScores[3]==maxQuickScore);
605 				assert(bestScores[1]==numHits);
606 
607 				hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, true);
608 				qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*DYNAMIC_QSCORE_THRESH_PERFECT));
609 			}else{
610 				hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, false);
611 				qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*PRESCAN_QSCORE_THRESH));
612 			}
613 		}
614 
615 		final int maxScore=maxScore(offsetsP, baseScoresP, keyScoresP, basesP.length, true);
616 		final boolean fullyDefined=AminoAcid.isFullyDefined(basesP);
617 		assert(bestScores[2]<=0) : Arrays.toString(bestScores);
618 
619 		int cycle=0;
620 		for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
621 			if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){
622 				find(keysP, basesP, baseScoresP, keyScoresP, chrom, Shared.PLUS,
623 						offsetsP, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined);
624 			}
625 			if(QUIT_AFTER_TWO_PERFECTS){
626 				if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human
627 			}
628 			cycle++;
629 			if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){
630 				find(keysM, basesM, baseScoresM, keyScoresM, chrom, Shared.MINUS,
631 						offsetsM, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined);
632 			}
633 			if(QUIT_AFTER_TWO_PERFECTS){
634 				if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human
635 			}
636 			cycle++;
637 		}
638 
639 //		assert(Read.CHECKSITES(result, basesP, basesM, id, false)); //TODO: Comment out once checked
640 
641 		return result;
642 	}
643 
644 	/** Search blocks rapidly to find max hits, and perfect sites.  May indicate some blocks can be skipped. */
645 	private final int[][] prescanAllBlocks(int[] bestScores,
646 			int[] keysP, int[] keyScoresP, int[] offsetsP,
647 			int[] keysM, int[] keyScoresM, int[] offsetsM,
648 			final boolean allBasesCovered){
649 
650 		int[][][] pm=new int[][][] {{keysP, keyScoresP, offsetsP}, {keysM, keyScoresM, offsetsM}};
651 
652 		int bestqscore=0;
653 		int maxHits=0;
654 		int minHitsToScore=MIN_APPROX_HITS_TO_KEEP;
655 
656 		final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP);
657 
658 		final int[] counts=precountArray;
659 		final int[] scores=prescoreArray;
660 		final int[][] ret=prescanReturn;
661 		Arrays.fill(counts, keysP.length);
662 		Arrays.fill(scores, maxQuickScore);
663 		ret[0]=counts;
664 		ret[1]=scores;
665 
666 		int cycle=0;
667 		for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){
668 			final int baseChrom=baseChrom(chrom);
669 			for(int pmi=0; pmi<2; pmi++, cycle++){
670 
671 				int[] keys=pm[pmi][0];
672 				int[] keyScores=pm[pmi][1];
673 				int[] offsets=pm[pmi][2];
674 //				int[][] hits=getHitArray(offsets.length);
675 
676 				int[] starts=startArray;
677 				int[] stops=stopArray;
678 				int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops);
679 
680 				if(numHits<minHitsToScore){
681 					scores[cycle]=-9999;
682 					counts[cycle]=0;
683 				}else{
684 
685 //					final int maxQuickScore=maxQuickScore(offsets, keyScores);
686 					//				System.err.println("maxScore = "+maxScore);
687 
688 					if(numHits<keys.length){
689 						int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
690 						if(r!=null){
691 							starts=r[0];
692 							stops=r[1];
693 							offsets=r[2];
694 							keyScores=r[4];
695 						}
696 					}
697 
698 					assert(numHits==offsets.length);
699 					assert(numHits==keyScores.length);
700 					heap.clear();
701 					final Quad[] triples=tripleStorage;
702 					final int[] values=valueArray;
703 
704 					int[] temp=findMaxQscore2(starts, stops, offsets, keyScores, baseChrom, triples, values, minHitsToScore, true,
705 							bestqscore>=maxQuickScore && allBasesCovered);
706 
707 					scores[cycle]=temp[0];
708 					counts[cycle]=temp[1];
709 
710 					bestqscore=Tools.max(temp[0], bestqscore);
711 					maxHits=Tools.max(maxHits, temp[1]);
712 					if(bestqscore>=maxQuickScore && allBasesCovered){
713 						assert(bestqscore==maxQuickScore);
714 						assert(maxHits==keysP.length) :
715 							"\nTemp: \t"+Arrays.toString(temp)+", cycle="+cycle+"\n" +
716 							"Scores: \t"+Arrays.toString(scores)+
717 							"Counts: \t"+Arrays.toString(counts)+
718 							"bestqscore: \t"+bestqscore+
719 							"maxHits: \t"+maxHits+
720 							"maxQuickScore: \t"+maxQuickScore+
721 							"numHits: \t"+numHits+
722 							"minHitsToScore: \t"+minHitsToScore+
723 							"keys.length: \t"+keys.length;
724 
725 						minHitsToScore=Tools.max(minHitsToScore, maxHits);
726 
727 						{
728 							//This early exit is optional.  Does not seem to impact speed much either way.
729 							bestScores[1]=Tools.max(bestScores[1], maxHits);
730 							bestScores[3]=Tools.max(bestScores[3], bestqscore);
731 							return ret;
732 						}
733 					}
734 				}
735 			}
736 		}
737 
738 		bestScores[1]=Tools.max(bestScores[1], maxHits);
739 		bestScores[3]=Tools.max(bestScores[3], bestqscore);
740 
741 		if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
742 
743 		return ret;
744 	}
745 
746 
747 	/** Search a single block and strand */
748 	public final ArrayList<SiteScore> find(int[] keys, final byte[] bases, final byte[] baseScores, int[] keyScores,
749 			final int chrom, final byte strand,
750 			int[] offsets, final boolean obeyLimits, ArrayList<SiteScore> ssl, int[] bestScores,
751 			final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
752 
753 		assert(checkOffsets(offsets)) : Arrays.toString(offsets);
754 
755 		//Index of first location of each key
756 		int[] starts=startArray;
757 		//Index of first location of next key (i.e., (last location of key)+1)
758 		int[] stops=stopArray;
759 
760 		int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops);
761 		if(numHits<MIN_APPROX_HITS_TO_KEEP){return ssl;}
762 //		assert(false) : "\n"+Data.getChromosome(1).minIndex+"\n"+Data.getChromosome(1).maxIndex+"\n"+Data.getChromosome(1).array.length;
763 //		assert(false) : "\n"+Data.getChromosome(1).minIndex+"\n"+Data.getChromosome(1).maxIndex+"\n"+Data.getChromosome(1).array.length+"\n"+(char)Data.getChromosome(1).array[0];
764 //		assert(false) : numHits+"\n"+Arrays.toString(starts)+"\n"+Arrays.toString(stops)+"\n"+Arrays.toString(index[0].getHitList(starts[0], stops[0]))+"\n";
765 		if(USE_CAMELWALK){
766 			if(USE_SLOWALK3){
767 				if(!RETAIN_BEST_SCORES){Arrays.fill(bestScores, 0);}
768 				ssl=camelWalk3(starts, stops, bases, baseScores, keyScores, offsets, chrom, strand, obeyLimits, ssl, bestScores, allBasesCovered, maxScore, fullyDefined);
769 			}else{
770 				assert(false) : "TODO";
771 				ssl=slowWalk2(starts, stops, bases, baseScores, keyScores, offsets, chrom, strand, obeyLimits, ssl, fullyDefined);
772 			}
773 		}else{
774 			if(USE_SLOWALK3){
775 				if(!RETAIN_BEST_SCORES){Arrays.fill(bestScores, 0);}
776 				ssl=slowWalk3(starts, stops, bases, baseScores, keyScores, offsets, chrom, strand, obeyLimits, ssl, bestScores, allBasesCovered, maxScore, fullyDefined);
777 			}else{
778 				ssl=slowWalk2(starts, stops, bases, baseScores, keyScores, offsets, chrom, strand, obeyLimits, ssl, fullyDefined);
779 			}
780 		}
781 
782 		return ssl;
783 	}
784 
785 	/** Compress arrays by removing null/empty lists */
786 	private final int[][] shrink(int[] starts, int[] stops, int[] offsets, int[] keyScores, final int len){
787 		int numHits=0;
788 		for(int i=0; i<len; i++){
789 			if(starts[i]>=0){numHits++;}
790 		}
791 
792 		if(numHits==offsets.length){
793 			return null;
794 		}
795 		int[][] r=shrinkReturn3;
796 		int[] starts2=startArray;
797 		int[] stops2=stopArray;
798 		int[] offsets2=getOffsetArray(numHits);
799 		int[] keyScores2=new int[numHits];
800 
801 		for(int i=0, j=0; i<len; i++){
802 			if(starts[i]>=0){
803 				starts2[j]=starts[i];
804 				stops2[j]=stops[i];
805 				offsets2[j]=offsets[i];
806 				keyScores2[j]=keyScores[i];
807 				j++;
808 			}
809 		}
810 		r[0]=starts2;
811 		r[1]=stops2;
812 		r[2]=offsets2;
813 		r[4]=keyScores2;
814 		return r;
815 	}
816 
817 	/** Removes "-1" keys. */
818 	private final int[][] shrink2(int[] offsets, int[] keys, int[] keyScores){
819 
820 
821 		int numHits=0;
822 		for(int i=0; i<keys.length; i++){
823 			if(keys[i]>=0){numHits++;}
824 		}
825 
826 
827 		assert(checkOffsets(offsets)) : Arrays.toString(offsets);
828 		if(numHits==keys.length){
829 			return null;
830 		}else{
831 			int[][] r=shrinkReturn2;
832 			int[] offsets2=getOffsetArray(numHits);
833 			assert(offsets2!=offsets);
834 			assert(offsets2.length<offsets.length);
835 			int[] keys2=new int[numHits];
836 			int[] keyScores2=new int[numHits];
837 
838 			for(int i=0, j=0; i<keys.length; i++){
839 				if(keys[i]>=0){
840 					offsets2[j]=offsets[i];
841 					keys2[j]=keys[i];
842 					keyScores2[j]=keyScores[i];
843 					j++;
844 				}
845 			}
846 			assert(checkOffsets(offsets2)) : "\nnumHits="+numHits+"\n"+Arrays.toString(offsets)+" -> \n"+Arrays.toString(offsets2)+"\n"+
847 				"\n"+Arrays.toString(keys)+" -> \n"+Arrays.toString(keys2)+"\n";
848 			r[0]=offsets2;
849 			r[1]=keys2;
850 			r[2]=keyScores2;
851 			return r;
852 		}
853 	}
854 
855 
856 	/** This uses a heap to track next column to increment */
857 	private final ArrayList<SiteScore> slowWalk2(int[] starts, int[] stops, final byte[] bases,
858 			final byte[] baseScores, int[] keyScores, int[] offsets,
859 			final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList<SiteScore> ssl, final boolean fullyDefined){
860 
861 		if(SHRINK_BEFORE_WALK){
862 			int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
863 			if(r!=null){
864 				starts=r[0];
865 				stops=r[1];
866 				offsets=r[2];
867 				keyScores=r[4];
868 			}
869 		}
870 
871 		final int numHits=offsets.length; //After shrink
872 
873 		assert(numHits==offsets.length);
874 		assert(numHits==keyScores.length);
875 
876 		assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
877 
878 		//This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
879 		final int maxScore=maxScore(offsets, baseScores, keyScores, bases.length, true);
880 //		final int maxQuickScore=(!USE_EXTENDED_SCORE ? maxScore : maxQuickScore(offsets));
881 //		System.err.println("maxScore = "+maxScore);
882 
883 //		final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
884 		final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
885 //		final int minQuickScore=(!USE_EXTENDED_SCORE ? minScore : (int)(maxQuickScore*0.15f));
886 //		final int minScore=(int)(MIN_SCORE_MULT*maxScore);
887 //		System.err.println("minScore = "+minScore);
888 
889 		final int baseChrom=baseChrom(baseChrom_);
890 
891 
892 		heap.clear();
893 		final Quad[] triples=tripleStorage;
894 
895 		final Block b=index[baseChrom];
896 		final int[] values=valueArray;
897 		final int[] sizes=sizeArray;
898 		final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
899 
900 		for(int i=0; i<numHits; i++){
901 			final int[] sites=b.sites;
902 			final int start=starts[i];
903 			sizes[i]=b.length(start, stops[i]);
904 			assert(sizes[i]>0);
905 
906 			int a=sites[start];
907 			int a2;
908 			if((a&SITE_MASK)>=offsets[i]){
909 				a2=a-offsets[i];
910 			}else{
911 				int ch=numberToChrom(a, baseChrom);
912 				int st=numberToSite(a);
913 				int st2=Tools.max(st-offsets[i], 0);
914 				a2=toNumber(st2, ch);
915 			}
916 			assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
917 
918 			Quad t=triples[i];
919 			assert(t!=null) : "Should be using tripleStorage";
920 			assert(i==t.column);
921 			t.row=start;
922 			t.site=a2;
923 			t.list=sites;
924 			values[i]=a2;
925 
926 			heap.add(t);
927 		}
928 
929 		if(ssl==null){ssl=new ArrayList<SiteScore>(8);}
930 
931 		int currentTopScore=-999999999;
932 
933 		int cutoff=minScore;
934 
935 		int maxHits=0;
936 		int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP;
937 
938 //		System.out.println("\nEntering SS loop:");
939 //		System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
940 //				"cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
941 //		System.out.println();
942 
943 		SiteScore prevSS=null;
944 		while(!heap.isEmpty()){
945 			Quad t=heap.peek();
946 			final int site=t.site;
947 			final int centerIndex=t.column;
948 
949 			int maxNearbySite=site;
950 			int approxHits=0;
951 
952 			{//Inner loop
953 				final int minsite=site-MAX_INDEL, maxsite=site+MAX_INDEL2;
954 				for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){
955 					final int x=values[column];
956 					assert(x==triples[column].site);
957 					if(x>=minsite && x<=maxsite){
958 						maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
959 						approxHits++;
960 					}else{chances--;}
961 				}
962 			}
963 			hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
964 
965 			assert(centerIndex>=0) : centerIndex;
966 
967 			//I don't remember what this assertion was for or why, but it's causing trouble.
968 			//assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
969 			if(approxHits>=approxHitsCutoff){
970 
971 				int score;
972 
973 				int mapStart=site, mapStop=maxNearbySite;
974 
975 				if(USE_EXTENDED_SCORE){
976 					final int chrom=numberToChrom(site, baseChrom);
977 					score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits);
978 					if(true/*USE_AFFINE_SCORE*/){
979 						//Correct begin and end positions if they changed.
980 						int min=Integer.MAX_VALUE;
981 						int max=Integer.MIN_VALUE;
982 						for(int i=0; i<locArray.length; i++){
983 							int x=locArray[i];
984 							if(x>-1){
985 								if(x<min){min=x;}
986 								if(x>max){max=x;}
987 							}
988 						}
989 
990 //						assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
991 						if(min<0 || max<0){
992 							//Note: This error can trigger if minChrom and maxChrom do not align to block boundaries
993 							if(!Shared.anomaly){
994 								Shared.anomaly=true;
995 								System.err.println("Anomaly in "+getClass().getName()+".slowWalk: "+
996 										chrom+", "+mapStart+", "+mapStop+", "+centerIndex+", "+
997 										Arrays.toString(locArray)+"\n"+
998 										Arrays.toString(values)+"\n"+
999 										new String(bases)+"\nstrand="+strand+"\n");
1000 								System.err.println();
1001 							}
1002 							score=-99999;
1003 						}
1004 
1005 
1006 						mapStart=toNumber(min, chrom);
1007 						mapStop=toNumber(max, chrom);
1008 
1009 
1010 
1011 //						System.err.println("site="+site+", maxNearbySite="+maxNearbySite+", min="+min+", max="+max+
1012 //								", mapStart="+mapStart+", mapStop="+mapStop);
1013 
1014 //						if(chrom==17 && absdif(min, 30354420)<2000){
1015 //							System.err.println("\n*****\n");
1016 //							System.err.println("site="+site+" ("+numberToSite(site)+"), maxNearbySite="+maxNearbySite+
1017 //									" ("+numberToSite(maxNearbySite)+"), min="+min+", max="+max+
1018 //									", mapStart="+mapStart+", mapStop="+mapStop);
1019 //							System.err.println();
1020 //							System.err.println(Arrays.toString(locArray));
1021 //							System.err.println();
1022 //							System.err.println("chrom="+chrom);
1023 //							System.err.println("score="+score);
1024 //						}
1025 					}
1026 				}else{
1027 					score=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits);
1028 					if(ADD_SCORE_Z){
1029 						int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
1030 						score+=scoreZ;
1031 					}
1032 				}
1033 
1034 				if(score>=cutoff){
1035 
1036 					if(score>currentTopScore){
1037 //						System.err.println("New top score!");
1038 
1039 						if(DYNAMICALLY_TRIM_LOW_SCORES){
1040 
1041 							maxHits=Tools.max(approxHits, maxHits);
1042 //							approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits);
1043 							approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits-1); //More sensitive, but slower
1044 
1045 							cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
1046 
1047 							//									cutoff=Tools.max(cutoff, minScore+(int)((score-minScore)*DYNAMIC_SCORE_THRESH));
1048 							if(USE_EXTENDED_SCORE && score>=maxScore){
1049 								cutoff=Tools.max(cutoff, (int)(score*0.95f));
1050 							}
1051 						}
1052 
1053 						currentTopScore=score;
1054 
1055 //						System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")");
1056 					}
1057 
1058 					final int chrom=numberToChrom(mapStart, baseChrom);
1059 					final int site2=numberToSite(mapStart);
1060 					final int site3=numberToSite(mapStop)+bases.length-1;
1061 
1062 					assert(NUM_CHROM_BITS==0 || site2<SITE_MASK-1000) : "chrom="+chrom+", strand="+strand+", site="+site+
1063 						", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length+
1064 						"\n\n"+Arrays.toString(b.getHitList(centerIndex));
1065 					assert(site2<site3) : "chrom="+chrom+", strand="+strand+", site="+site+
1066 						", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length;
1067 
1068 
1069 					//Note:  I could also do this as soon as score is calculated.
1070 //					if(ADD_SCORE_Z){
1071 //						int scoreZ=scoreZ2(values, centerIndex, offsets);
1072 //						score+=scoreZ;
1073 //					}
1074 
1075 					//This block is optional, but tries to eliminate multiple identical alignments
1076 //					SiteScore prevSS=(ssl.size()<1 ? null : ssl.get(ssl.size()-1));
1077 
1078 					SiteScore ss=null;
1079 					final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined;
1080 
1081 					int[] gapArray=null;
1082 					if(site3-site2>=MINGAP+bases.length){
1083 						gapArray=makeGapArray(locArray, mapStart, MINGAP);
1084 						if(gapArray!=null){
1085 							int sub=site2-mapStart;//thus site2=mapStart+sub
1086 							for(int i=0; i<gapArray.length; i++){
1087 								gapArray[i]+=sub;
1088 							}
1089 							assert(gapArray[0]==mapStart) : gapArray[0]+", "+mapStart;
1090 							assert(gapArray[gapArray.length-1]==mapStop);
1091 						}
1092 						assert(false) : Arrays.toString(locArray);
1093 					}
1094 
1095 					if(gapArray==null && prevSS!=null && prevSS.gaps==null &&
1096 							prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){
1097 
1098 						int betterScore=Tools.max(score, prevSS.score);
1099 						int minStart=Tools.min(prevSS.start, site2);
1100 						int maxStop=Tools.max(prevSS.stop, site3);
1101 						final boolean perfect2=USE_EXTENDED_SCORE && prevSS.score==maxScore && fullyDefined;
1102 						assert(!USE_EXTENDED_SCORE || perfect2==prevSS.perfect);
1103 
1104 						boolean shortEnough=(!LIMIT_SUBSUMPTION_LENGTH_TO_2X  || (maxStop-minStart<2*bases.length));
1105 
1106 						if(prevSS.start==site2 && prevSS.stop==site3){
1107 							prevSS.score=prevSS.quickScore=betterScore;
1108 						}else if(SUBSUME_SAME_START_SITES && shortEnough && prevSS.start==site2){
1109 							if(perfect2){
1110 								//do nothing
1111 							}else if(perfect1){
1112 								prevSS.setStop(site3);
1113 								prevSS.setPerfect();
1114 							}else{
1115 								prevSS.setStop(maxStop);
1116 							}
1117 							prevSS.score=prevSS.quickScore=betterScore;
1118 						}else if(SUBSUME_SAME_STOP_SITES && shortEnough && prevSS.stop==site3){
1119 							if(perfect2){
1120 								//do nothing
1121 							}else if(perfect1){
1122 								prevSS.setStart(site2);
1123 								prevSS.setPerfect();
1124 							}else{
1125 								prevSS.setStart(minStart);
1126 							}
1127 							prevSS.score=prevSS.quickScore=betterScore;
1128 						}else if(SUBSUME_OVERLAPPING_SITES && shortEnough && (maxStop-minStart<=bases.length+MAX_SUBSUMPTION_LENGTH)
1129 								&& !perfect1 && !perfect2){
1130 							prevSS.setLimits(minStart, maxStop);
1131 							prevSS.score=prevSS.quickScore=betterScore;
1132 						}else{
1133 							ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1);
1134 							if(!perfect1){ss.setPerfect(bases);}
1135 							assert(!perfect1 || ss.stop-ss.start==bases.length-1);
1136 						}
1137 						assert(!perfect2 || prevSS.stop-prevSS.start==bases.length-1);
1138 					}else{
1139 						ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1);
1140 						if(!perfect1){ss.setPerfect(bases);}
1141 						ss.gaps=gapArray;
1142 						if(verbose && gapArray!=null){
1143 							System.err.println(ss.toText()+"\t"+Arrays.toString(gapArray)+"\n"+Arrays.toString(locArray)+"\n");
1144 						}
1145 					}
1146 
1147 					if(ss!=null){
1148 //						System.out.println("Added site "+ss.toText());
1149 						ssl.add(ss);
1150 						prevSS=ss;
1151 					}else{
1152 //						System.out.println("Subsumed site "+new SiteScore(chrom, strand, site2, site3, score).toText());
1153 					}
1154 
1155 //					if(prevSS!=null && prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){
1156 //						int betterScore=Tools.max(score, prevSS.score);
1157 //						if(prevSS.start==site2 && prevSS.stop==site3){
1158 //							prevSS.score=prevSS.quickScore=betterScore;
1159 //						}else if(prevSS.start==site2
1160 //								/*isWithin(prevSS.start, prevSS.stop, site2, site3) ||
1161 //								isWithin(site2, site3, prevSS.start, prevSS.stop)*/){
1162 //							prevSS.score=prevSS.quickScore=betterScore;
1163 //							assert(prevSS.start<prevSS.stop);
1164 ////							prevSS.start=Tools.min(prevSS.start, site2);
1165 //							prevSS.stop=Tools.max(prevSS.stop, site3);
1166 //							assert(prevSS.start<prevSS.stop);
1167 //						}else{
1168 //							SiteScore ss=new SiteScore(chrom, strand, site2, site3, score);
1169 //							ssl.add(ss);
1170 //							prevSS=ss;
1171 //						}
1172 //					}else{
1173 //						SiteScore ss=new SiteScore(chrom, strand, site2, site3, score);
1174 //						ssl.add(ss);
1175 //						prevSS=ss;
1176 //					}
1177 
1178 				}
1179 			}
1180 
1181 			while(heap.peek().site==site){ //Remove all identical elements, and add subsequent elements
1182 				final Quad t2=heap.poll();
1183 				final int row=t2.row+1, col=t2.column;
1184 				if(row<stops[col]){
1185 					t2.row=row;
1186 
1187 					int a=t2.list[row];
1188 					int a2;
1189 					if((a&SITE_MASK)>=offsets[col]){
1190 						a2=a-offsets[col];
1191 
1192 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1193 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1194 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1195 					}else{
1196 						int ch=numberToChrom(a, baseChrom);
1197 						int st=numberToSite(a);
1198 						int st2=Tools.max(st-offsets[col], 0);
1199 						a2=toNumber(st2, ch);
1200 
1201 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1202 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1203 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1204 					}
1205 
1206 					assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1207 						"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1208 						", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1209 
1210 					t2.site=a2;
1211 					values[col]=a2;
1212 					heap.add(t2);
1213 				}
1214 				if(heap.isEmpty()){break;}
1215 			}
1216 
1217 		}
1218 
1219 		return ssl;
1220 	}
1221 
1222 	/** This uses a heap to track next column to increment */
1223 	private final ArrayList<SiteScore> slowWalk3(int[] starts, int[] stops, final byte[] bases,
1224 			final byte[] baseScores, int[] keyScores, int[] offsets,
1225 			final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList<SiteScore> ssl,
1226 			int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
1227 		assert(USE_EXTENDED_SCORE);
1228 
1229 		final int numKeys=offsets.length; //Before shrink
1230 
1231 		//This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
1232 		final int maxQuickScore=maxQuickScore(offsets, keyScores);
1233 
1234 		if(SHRINK_BEFORE_WALK){
1235 			int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
1236 			if(r!=null){
1237 				starts=r[0];
1238 				stops=r[1];
1239 				offsets=r[2];
1240 				keyScores=r[4];
1241 			}
1242 		}
1243 
1244 		final int numHits=offsets.length; //After shrink
1245 
1246 
1247 		assert(numHits==offsets.length);
1248 		assert(numHits==keyScores.length);
1249 
1250 		usedKeys+=numHits;
1251 		usedKeyIterations++;
1252 
1253 		final boolean filter_by_qscore=(FILTER_BY_QSCORE && numKeys>=5);
1254 
1255 		assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
1256 
1257 
1258 //		final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
1259 		final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
1260 		final int minQuickScore=(int)(MIN_QSCORE_MULT*maxQuickScore);
1261 
1262 		final int baseChrom=baseChrom(baseChrom_);
1263 
1264 		heap.clear();
1265 
1266 		final Quad[] triples=tripleStorage;
1267 
1268 		final int[] values=valueArray;
1269 		final int[] sizes=sizeArray;
1270 		final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
1271 		final Block b=index[baseChrom];
1272 
1273 		if(ssl==null){ssl=new ArrayList<SiteScore>(8);}
1274 
1275 		int currentTopScore=bestScores[0];
1276 		int cutoff=Tools.max(minScore, (int)(currentTopScore*DYNAMIC_SCORE_THRESH));
1277 
1278 		int qcutoff=Tools.max(bestScores[2], minQuickScore);
1279 		int bestqscore=bestScores[3];
1280 		int maxHits=bestScores[1];
1281 		int perfectsFound=bestScores[5];
1282 		assert((currentTopScore>=maxScore) == (perfectsFound>0)) : currentTopScore+", "+maxScore+", "+perfectsFound+", "+maxHits+", "+numHits+", "+new String(bases);
1283 		int approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, currentTopScore>=maxScore);
1284 		if(approxHitsCutoff>numHits){return ssl;}
1285 
1286 		final boolean shortCircuit=(allBasesCovered && numKeys==numHits && filter_by_qscore);
1287 
1288 
1289 		assert(USE_EXTENDED_SCORE);
1290 
1291 		if(currentTopScore>=maxScore){
1292 			assert(currentTopScore==maxScore);
1293 			qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
1294 		}
1295 
1296 
1297 		for(int i=0; i<numHits; i++){
1298 			final int[] sites=b.sites;
1299 			final int start=starts[i];
1300 			sizes[i]=b.length(start, stops[i]);
1301 			assert(sizes[i]>0);
1302 
1303 			int a=sites[start];
1304 			int a2;
1305 			if((a&SITE_MASK)>=offsets[i]){
1306 				a2=a-offsets[i];
1307 			}else{
1308 				int ch=numberToChrom(a, baseChrom);
1309 				int st=numberToSite(a);
1310 				int st2=Tools.max(st-offsets[i], 0);
1311 				a2=toNumber(st2, ch);
1312 			}
1313 			assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
1314 
1315 			Quad t=triples[i];
1316 			assert(t!=null) : "Should be using tripleStorage";
1317 			assert(i==t.column);
1318 			t.row=start;
1319 			t.site=a2;
1320 			t.list=sites;
1321 			values[i]=a2;
1322 
1323 			heap.add(t);
1324 		}
1325 
1326 //		System.out.println("\nEntering SS loop:");
1327 //		System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
1328 //				"cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
1329 //		System.out.println("maxQuickScore="+maxQuickScore+"\tminQuickScore="+minQuickScore+"\tqcutoff="+qcutoff);
1330 
1331 
1332 		SiteScore prevSS=null;
1333 		while(!heap.isEmpty()){
1334 			Quad t=heap.peek();
1335 			final int site=t.site;
1336 			final int centerIndex=t.column;
1337 
1338 			int maxNearbySite=site;
1339 
1340 
1341 			int approxHits=0;
1342 
1343 			{//Inner loop
1344 				final int minsite=site-MAX_INDEL, maxsite=site+MAX_INDEL2;
1345 				for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){
1346 					final int x=values[column];
1347 					assert(x==triples[column].site);
1348 					if(x>=minsite && x<=maxsite){
1349 						maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
1350 						approxHits++;
1351 					}else{chances--;}
1352 				}
1353 			}
1354 			hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
1355 
1356 			assert(centerIndex>=0) : centerIndex;
1357 
1358 			//I don't remember what this assertion was for or why, but it's causing trouble.
1359 			//assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
1360 			if(approxHits>=approxHitsCutoff){
1361 
1362 				int score;
1363 				int qscore=(filter_by_qscore ? quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits) : qcutoff);
1364 				if(ADD_SCORE_Z){
1365 					int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
1366 					qscore+=scoreZ;
1367 				}
1368 
1369 				int mapStart=site, mapStop=maxNearbySite;
1370 
1371 				assert(USE_EXTENDED_SCORE);
1372 
1373 				boolean locArrayValid=false;
1374 				if(qscore<qcutoff){
1375 					score=-1;
1376 				}else{
1377 
1378 					final int chrom=numberToChrom(site, baseChrom);
1379 
1380 					//TODO Note that disabling the shortCircuit code seems to make things run 2% faster (with identical results).
1381 					//However, theoretically, shortCircuit should be far more efficient.  Test both ways on cluster and on a larger run.
1382 					//May have something to do with compiler loop optimizations.
1383 					if(shortCircuit && qscore==maxQuickScore){
1384 						assert(approxHits==numKeys);
1385 						score=maxScore;
1386 					}else{
1387 						if(verbose){
1388 							System.err.println("numHits="+numHits+", approxHits="+approxHits+", keys="+numKeys+", centerIndex="+centerIndex+
1389 									", qscore="+qscore+", qcutoff="+qcutoff+", filter_by_qscore="+filter_by_qscore);
1390 							System.err.println("Extending "+Arrays.toString(values));
1391 						}
1392 						score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits);
1393 						locArrayValid=true;
1394 
1395 						if(verbose){
1396 							System.err.println("score: "+score);
1397 							System.err.println("locArray: "+Arrays.toString(locArray));
1398 						}
1399 
1400 						//Correct begin and end positions if they changed.
1401 						int min=Integer.MAX_VALUE;
1402 						int max=Integer.MIN_VALUE;
1403 						for(int i=0; i<locArray.length; i++){
1404 							int x=locArray[i];
1405 							if(x>-1){
1406 								if(x<min){min=x;}
1407 								if(x>max){max=x;}
1408 							}
1409 						}
1410 
1411 						if(score>=maxScore){
1412 							assert(min==max && min>-1) : "\n"+score+", "+maxScore+", "+min+", "+max+
1413 							", "+(max-min)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
1414 						}
1415 
1416 						//							assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
1417 						if(min<0 || max<0){
1418 							if(!Shared.anomaly){
1419 								Shared.anomaly=true;
1420 								System.err.println("\nAnomaly in "+getClass().getName()+".slowWalk:\n"+
1421 										"chrom="+chrom+", mapStart="+mapStart+", mapStop="+mapStop+", centerIndex="+centerIndex+", strand="+strand+"\n"+
1422 										"score="+score+", maxScore="+maxScore+", qscore="+qscore+", filter_by_qscore="+filter_by_qscore+"\n"+
1423 										"numHits="+approxHits+", approxHits="+approxHits+"\n"+
1424 										"min="+min+", max="+max+", (max-min)="+(max-min)+"\n"+
1425 										"bases.length="+bases.length+"\n"+Arrays.toString(locArray)+"\n"+
1426 										"locArray:\t"+Arrays.toString(locArray)+"\n"+
1427 										"values:\t"+Arrays.toString(values)+"\n"+
1428 										"bases:\t"+new String(bases));
1429 								System.err.println();
1430 								assert(false);
1431 							}
1432 							score=-99999;
1433 						}
1434 
1435 						//mapStart and mapStop are indices
1436 						mapStart=toNumber(min, chrom);
1437 						mapStop=toNumber(max, chrom);
1438 
1439 						if(score>=maxScore){
1440 							assert(mapStop-mapStart==0) : "\n"+score+", "+maxScore+", "+min+", "+max+
1441 							", "+(max-min)+", "+(mapStop-mapStart)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
1442 						}
1443 					}
1444 
1445 					if(score==maxScore){
1446 						qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
1447 						approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, true);
1448 					}
1449 
1450 					if(score>=cutoff){
1451 						qcutoff=Tools.max(qcutoff, (int)(qscore*DYNAMIC_QSCORE_THRESH));
1452 						bestqscore=Tools.max(qscore, bestqscore);
1453 					}
1454 				}
1455 
1456 				if(score>=cutoff){
1457 
1458 					if(score>currentTopScore){
1459 //						System.err.println("New top score!");
1460 
1461 						if(DYNAMICALLY_TRIM_LOW_SCORES){
1462 
1463 							maxHits=Tools.max(approxHits, maxHits);
1464 							approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, approxHitsCutoff, currentTopScore>=maxScore);
1465 
1466 							cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
1467 							if(score>=maxScore){
1468 								assert(USE_EXTENDED_SCORE);
1469 								cutoff=Tools.max(cutoff, (int)(score*0.95f));
1470 							}
1471 						}
1472 
1473 						currentTopScore=score;
1474 
1475 //						System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")");
1476 					}
1477 
1478 					final int chrom=numberToChrom(mapStart, baseChrom);
1479 					final int site2=numberToSite(mapStart);
1480 					final int site3=numberToSite(mapStop)+bases.length-1;
1481 
1482 					assert(site2!=site3) : site2+", "+site3+", "+mapStart+", "+mapStop;
1483 
1484 					assert(NUM_CHROM_BITS==0 || site2<SITE_MASK-1000) : "chrom="+chrom+", strand="+strand+", site="+site+
1485 						", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length+
1486 						"\n\n"+Arrays.toString(b.getHitList(centerIndex));
assert(site2<site3) : R+chrom+R+strand+R+site+ R+maxNearbySite+R+site2+R+site3+R+bases.length; int[] gapArray=null; if(site3-site2>=MINGAP+bases.length)1487 					assert(site2<site3) : "chrom="+chrom+", strand="+strand+", site="+site+
1488 						", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length;
1489 
1490 
1491 					int[] gapArray=null;
1492 					if(site3-site2>=MINGAP+bases.length){
1493 						assert(locArrayValid) : "Loc array was not filled.";
1494 //						System.err.println("****\n"+Arrays.toString(locArray)+"\n");
1495 //						int[] clone=locArray.clone();
1496 						gapArray=makeGapArray(locArray, site2, MINGAP);
1497 						if(gapArray!=null){
1498 //							System.err.println(Arrays.toString(locArray)+"\n");
1499 //							System.err.println(Arrays.toString(gapArray));
1500 //
1501 ////							int sub=site2-mapStart;//thus site2=mapStart+sub
1502 ////							for(int i=0; i<gapArray.length; i++){
1503 ////								gapArray[i]+=sub;
1504 ////							}
1505 ////							System.err.println(Arrays.toString(gapArray));
1506 //
1507 //							System.err.println(mapStart+" -> "+site2);
1508 //							System.err.println(mapStop+" -> "+site3);
1509 
1510 							assert(gapArray[0]>=site2 && gapArray[0]-site2<bases.length);
1511 							assert(gapArray[gapArray.length-1]<=site3 && site3-gapArray[gapArray.length-1]<bases.length) : "\n"+
1512 								mapStart+" -> "+site2+"\n"+
1513 								mapStop+" -> "+site3+"\n\n"+
1514 								Arrays.toString(gapArray)+"\n\n"+
1515 //								Arrays.toString(clone)+"\n\n"+
1516 								Arrays.toString(locArray)+"\n"+
1517 								"numHits="+numHits+", "+
1518 								"heap.size="+heap.size()+", "+
1519 								"numHits="+numHits+", "+
1520 								"approxHits="+approxHits+"\n";
1521 							gapArray[0]=Tools.min(gapArray[0], site2);
1522 							gapArray[gapArray.length-1]=Tools.max(gapArray[gapArray.length-1], site3);
1523 						}
1524 						if(verbose){System.err.println("@ site "+site2+", made gap array: "+Arrays.toString(gapArray));}
1525 //						assert(false) : Arrays.toString(locArray);
1526 					}
1527 
1528 
1529 					//This block is optional, but tries to eliminate multiple identical alignments
1530 
1531 					SiteScore ss=null;
1532 					final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined;
1533 					final boolean inbounds=(site2>=0 && site3<Data.chromLengths[chrom]);
1534 //					if(!inbounds){System.err.println("Index tossed out-of-bounds site chr"+chrom+", "+site2+"-"+site3);}
1535 
1536 					if(verbose){
1537 						System.err.println("Considering site chr"+chrom+", site2="+site2+", site3="+site3+", mapStart="+mapStart+", mapStop="+mapStop);
1538 						if(!inbounds){System.err.println("Index tossed out-of-bounds site.");}
1539 						assert(site2!=site3) : site2+", "+site3+", "+mapStart+", "+mapStop;
1540 					}
1541 					//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1542 
1543 					if(inbounds && !SEMIPERFECTMODE && !PERFECTMODE && gapArray==null && prevSS!=null &&
1544 							prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){
1545 
1546 						if(verbose){System.err.println("Considering overlapping site chr"+chrom+", "+site2+"-"+site3);}
1547 
1548 						final int betterScore=Tools.max(score, prevSS.score);
1549 						final int minStart=Tools.min(prevSS.start, site2);
1550 						final int maxStop=Tools.max(prevSS.stop, site3);
1551 						final boolean perfect2=USE_EXTENDED_SCORE && prevSS.score==maxScore && fullyDefined;
1552 						assert(!USE_EXTENDED_SCORE || perfect2==prevSS.perfect);
1553 
1554 						final boolean shortEnough=(!LIMIT_SUBSUMPTION_LENGTH_TO_2X  || (maxStop-minStart<2*bases.length));
1555 
1556 						if(prevSS.start==site2 && prevSS.stop==site3){
1557 							if(verbose){System.err.println("Class 1: Same bounds as last site.");}
1558 							prevSS.score=prevSS.quickScore=betterScore;
1559 							prevSS.perfect=(prevSS.perfect || perfect1 || perfect2);
1560 							if(prevSS.perfect){prevSS.semiperfect=true;}
1561 							//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1562 						}else if(SUBSUME_SAME_START_SITES && shortEnough && prevSS.start==site2 && !prevSS.semiperfect){
1563 							if(verbose){System.err.println("Class 2: Same start as last site.");}
1564 							if(perfect2){
1565 								//do nothing
1566 							}else if(perfect1){
1567 								prevSS.setStop(site3);
1568 								if(!prevSS.perfect){perfectsFound++;}//***$
1569 								prevSS.perfect=prevSS.semiperfect=true;
1570 							}else{
1571 								prevSS.setStop(maxStop);
1572 								prevSS.setPerfect(bases);
1573 							}
1574 							//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1575 							prevSS.score=prevSS.quickScore=betterScore;
1576 						}else if(SUBSUME_SAME_STOP_SITES && shortEnough && prevSS.stop==site3 && !prevSS.semiperfect){
1577 							if(verbose){System.err.println("Class 3: Same stop as last site.");}
1578 							if(perfect2){
1579 								//do nothing
1580 							}else if(perfect1){
1581 								prevSS.setStart(site2);
1582 								if(!prevSS.perfect){perfectsFound++;}//***$
1583 								prevSS.perfect=prevSS.semiperfect=true;
1584 							}else{
1585 								prevSS.setStart(minStart);
1586 								prevSS.setPerfect(bases);
1587 							}
1588 							//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1589 							prevSS.score=prevSS.quickScore=betterScore;
1590 						}else if(SUBSUME_OVERLAPPING_SITES && shortEnough && (maxStop-minStart<=bases.length+MAX_SUBSUMPTION_LENGTH)
1591 								&& !perfect1 && !perfect2 && !prevSS.semiperfect){
1592 							if(verbose){System.err.println("Class 4.");}
1593 							prevSS.setLimits(minStart, maxStop);
1594 							prevSS.score=prevSS.quickScore=betterScore;
1595 							prevSS.setPerfect(bases);
1596 							//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1597 						}else{
1598 							if(verbose){System.err.println("Class 5: Making new site");}
1599 							ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1);
1600 							if(!perfect1){ss.setPerfect(bases);}
1601 							//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1602 //							assert(Read.CHECKSITE(ss, bases));
1603 							if(verbose){System.err.println("A) Index made SiteScore "+ss.toText()+", "+Arrays.toString(ss.gaps));}
1604 							assert(!perfect1 || ss.stop-ss.start==bases.length-1);
1605 						}
1606 						assert(!perfect2 || prevSS.stop-prevSS.start==bases.length-1);
1607 					}else if(inbounds){
1608 						if(verbose){System.err.println("Considering new site chr"+chrom+", "+site2+"-"+site3);}
1609 						ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1);
1610 						if(!perfect1){ss.setPerfect(bases);}
1611 						//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1612 //						assert(Read.CHECKSITE(ss, bases));
1613 						ss.gaps=gapArray;
1614 						if(verbose){System.err.println("B) Index made SiteScore "+ss.toText()+", "+Arrays.toString(ss.gaps));}
1615 					}
1616 					//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1617 					assert(ss==null || !ss.perfect || ss.semiperfect) : ss;
1618 					assert(prevSS==null || !prevSS.perfect || prevSS.semiperfect) : "\n"+SiteScore.header()+"\n"+ss+"\n"+prevSS;
1619 					if(ss!=null && ((SEMIPERFECTMODE && !ss.semiperfect) || (PERFECTMODE && !ss.perfect))){ss=null;}
1620 
1621 
1622 					if(ss!=null){
1623 //						System.out.println("Added site "+ss.toText()+", qscore="+qscore);
1624 						ssl.add(ss);
1625 						if(ss.perfect){
1626 
1627 							if(prevSS==null || !prevSS.perfect || !ss.overlaps(prevSS)){
1628 								if(prevSS==null){assert ssl.size()<2 || !ss.overlaps(ssl.get(ssl.size()-2));}
1629 								perfectsFound++;
1630 
1631 								//Human-specific code
1632 //								if(QUIT_AFTER_TWO_PERFECTS){
1633 //									if(perfectsFound>=3 || (perfectsFound>=2 && chrom<24)){break;}
1634 //								}
1635 
1636 								if(QUIT_AFTER_TWO_PERFECTS && perfectsFound>=2){break;}
1637 							}
1638 						}
1639 
1640 						prevSS=ss;
1641 					}else{
1642 //						System.out.println("Subsumed site "+new SiteScore(chrom, strand, site2, site3, score).toText());
1643 					}
1644 					//assert((ss==null || !ss.semiperfect) && (prevSS==null || !prevSS.semiperfect)) : (ss==null ? false : ss.semiperfect)+", "+(prevSS==null ? false : prevSS.semiperfect); //***
1645 				}
1646 			}
1647 
1648 			while(heap.peek().site==site){ //Remove all identical elements, and add subsequent elements
1649 				final Quad t2=heap.poll();
1650 				final int row=t2.row+1, col=t2.column;
1651 				if(row<stops[col]){
1652 					t2.row=row;
1653 
1654 					int a=t2.list[row];
1655 					int a2;
1656 					if((a&SITE_MASK)>=offsets[col]){
1657 						a2=a-offsets[col];
1658 
1659 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1660 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1661 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1662 					}else{
1663 						int ch=numberToChrom(a, baseChrom);
1664 						int st=numberToSite(a);
1665 						int st2=Tools.max(st-offsets[col], 0);
1666 						a2=toNumber(st2, ch);
1667 
1668 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1669 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1670 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1671 					}
1672 
1673 					assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1674 						"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1675 						", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1676 
1677 					t2.site=a2;
1678 					values[col]=a2;
1679 					heap.add(t2);
1680 				}else if(heap.size()<approxHitsCutoff || PERFECTMODE){
1681 					assert(USE_EXTENDED_SCORE);
1682 					bestScores[0]=Tools.max(bestScores[0], currentTopScore);
1683 					bestScores[1]=Tools.max(bestScores[1], maxHits);
1684 					bestScores[2]=Tools.max(bestScores[2], qcutoff);
1685 					bestScores[3]=Tools.max(bestScores[3], bestqscore);
1686 
1687 					bestScores[4]=maxQuickScore;
1688 					bestScores[5]=perfectsFound; //***$ fixed by adding this line
1689 					if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
1690 
1691 					return ssl;
1692 				}
1693 				if(heap.isEmpty()){
1694 					assert(false) : heap.size()+", "+approxHitsCutoff;
1695 					break;
1696 				}
1697 			}
1698 
1699 		}
1700 
1701 		assert(USE_EXTENDED_SCORE);
1702 		bestScores[0]=Tools.max(bestScores[0], currentTopScore);
1703 		bestScores[1]=Tools.max(bestScores[1], maxHits);
1704 		bestScores[2]=Tools.max(bestScores[2], qcutoff);
1705 		bestScores[3]=Tools.max(bestScores[3], bestqscore);
1706 
1707 		bestScores[4]=maxQuickScore;
1708 		bestScores[5]=perfectsFound;
1709 		if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
1710 
1711 		return ssl;
1712 	}
1713 
1714 
1715 	/** Uses dual heaps - reserve (big) and active (small). */
camelWalk3(int[] starts, int[] stops, final byte[] bases, final byte[] baseScores, int[] keyScores, int[] offsets, final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList<SiteScore> ssl, int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined)1716 	private final ArrayList<SiteScore> camelWalk3(int[] starts, int[] stops, final byte[] bases,
1717 			final byte[] baseScores, int[] keyScores, int[] offsets,
1718 			final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList<SiteScore> ssl,
1719 			int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){
1720 		assert(USE_EXTENDED_SCORE);
1721 
1722 		final int numKeys=offsets.length; //Before shrink
1723 
1724 		//This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc.
1725 		final int maxQuickScore=maxQuickScore(offsets, keyScores);
1726 
1727 		if(SHRINK_BEFORE_WALK){
1728 			int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length);
1729 			if(r!=null){
1730 				starts=r[0];
1731 				stops=r[1];
1732 				offsets=r[2];
1733 				keyScores=r[4];
1734 			}
1735 		}
1736 
1737 		final int numHits=offsets.length; //After shrink
1738 
1739 
1740 		assert(numHits==offsets.length);
1741 		assert(numHits==keyScores.length);
1742 
1743 		usedKeys+=numHits;
1744 		usedKeyIterations++;
1745 
1746 		final boolean filter_by_qscore=(FILTER_BY_QSCORE && numKeys>=5);
1747 
1748 		assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z));
1749 
1750 
1751 //		final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore));
1752 		final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore));
1753 		final int minQuickScore=(int)(MIN_QSCORE_MULT*maxQuickScore);
1754 
1755 		final int baseChrom=baseChrom(baseChrom_);
1756 
1757 		heap.clear();
1758 		active.clear();
1759 
1760 		final Quad[] triples=tripleStorage;
1761 
1762 		final int[] values=valueArray;
1763 		final int[] sizes=sizeArray;
1764 		final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null);
1765 		final Block b=index[baseChrom];
1766 
1767 		if(ssl==null){ssl=new ArrayList<SiteScore>(8);}
1768 
1769 		int currentTopScore=bestScores[0];
1770 		int cutoff=Tools.max(minScore, (int)(currentTopScore*DYNAMIC_SCORE_THRESH));
1771 
1772 		int qcutoff=Tools.max(bestScores[2], minQuickScore);
1773 		int bestqscore=bestScores[3];
1774 		int maxHits=bestScores[1];
1775 		int perfectsFound=bestScores[5];
1776 		assert((currentTopScore>=maxScore) == (perfectsFound>0)) : currentTopScore+", "+maxScore+", "+perfectsFound+", "+maxHits+", "+numHits;
1777 		int approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, currentTopScore>=maxScore);
1778 		if(approxHitsCutoff>numHits){return ssl;}
1779 
1780 		final boolean shortCircuit=(allBasesCovered && numKeys==numHits && filter_by_qscore);
1781 
1782 		if(currentTopScore>=maxScore){
1783 			assert(currentTopScore==maxScore);
1784 			qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
1785 		}
1786 
1787 
1788 		for(int i=0; i<numHits; i++){
1789 			final int[] sites=b.sites;
1790 			final int start=starts[i];
1791 			sizes[i]=b.length(start, stops[i]);
1792 			assert(sizes[i]>0);
1793 
1794 			int a=sites[start];
1795 			int a2;
1796 			if((a&SITE_MASK)>=offsets[i]){
1797 				a2=a-offsets[i];
1798 			}else{
1799 				int ch=numberToChrom(a, baseChrom);
1800 				int st=numberToSite(a);
1801 				int st2=Tools.max(st-offsets[i], 0);
1802 				a2=toNumber(st2, ch);
1803 			}
1804 			assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
1805 
1806 			Quad t=triples[i];
1807 			assert(t!=null) : "Should be using tripleStorage";
1808 			assert(i==t.column);
1809 			t.row=start;
1810 			t.site=a2;
1811 			t.list=sites;
1812 			values[i]=a2;
1813 
1814 			heap.add(t);
1815 		}
1816 
1817 		assert(numHits>0);
1818 		assert(heap.size()==numHits);
1819 
1820 		/* Tracks largest element allowed in 'active' */
1821 
1822 //		System.err.println("\nEntering SS loop:");
1823 //		System.err.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" +
1824 //				"cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff);
1825 //		System.err.println("maxQuickScore="+maxQuickScore+"\tminQuickScore="+minQuickScore+"\tqcutoff="+qcutoff);
1826 
1827 //		int iter=0;
1828 		SiteScore prevSS=null;
1829 		int maxNearbySite=0;
1830 		int site=0;
1831 		int horizon=0;
1832 		assert(active.isEmpty());
1833 		while(!heap.isEmpty() || !active.isEmpty()){
1834 //			iter++;
1835 
1836 			do{
1837 				while(!active.isEmpty() && active.peek().site==site){ //Remove all identical elements, and add subsequent elements
1838 					final Quad t2=active.poll();
1839 					final int row=t2.row+1, col=t2.column;
1840 
1841 					//This is called the "increment" operation.  Very messy and slow due to rare cases at beginning of a chrom.
1842 					if(row<stops[col]){//then increment and return to the heap
1843 						t2.row=row;
1844 
1845 						int a=t2.list[row];
1846 						int a2;
1847 						if((a&SITE_MASK)>=offsets[col]){
1848 							a2=a-offsets[col];
1849 
1850 							assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1851 								"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1852 								", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1853 						}else{
1854 							int ch=numberToChrom(a, baseChrom);
1855 							int st=numberToSite(a);
1856 							int st2=Tools.max(st-offsets[col], 0);
1857 							a2=toNumber(st2, ch);
1858 
1859 							assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1860 								"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1861 								", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1862 						}
1863 
1864 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
1865 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
1866 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
1867 
1868 						t2.site=a2;
1869 						values[col]=a2;
1870 						if(a2<=horizon){
1871 							active.add(t2);
1872 							maxNearbySite=Tools.max(t2.site, maxNearbySite);
1873 						}else{heap.add(t2);}
1874 					}else if((heap.size()+active.size())<approxHitsCutoff || PERFECTMODE){ //Then there are not enough keys remaining for a site
1875 						assert(USE_EXTENDED_SCORE);
1876 						bestScores[0]=Tools.max(bestScores[0], currentTopScore);
1877 						bestScores[1]=Tools.max(bestScores[1], maxHits);
1878 						bestScores[2]=Tools.max(bestScores[2], qcutoff);
1879 						bestScores[3]=Tools.max(bestScores[3], bestqscore);
1880 
1881 						bestScores[4]=maxQuickScore;
1882 						bestScores[5]=perfectsFound; //***$ fixed by adding this line
1883 						if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
1884 
1885 						return ssl;
1886 					}
1887 //					if(heap.isEmpty() && active.isEmpty()){
1888 //						assert(false) : heap.size()+", "+active.size()+", "+approxHitsCutoff;
1889 //						break;
1890 //					}
1891 				}
1892 
1893 				final Quad t;
1894 				if(active.isEmpty()){
1895 					t=heap.poll();
1896 					active.add(t);
1897 					maxNearbySite=t.site;
1898 				}else{
1899 					t=active.peek();
1900 				}
1901 				site=t.site;
1902 				horizon=(int)Tools.min(Integer.MAX_VALUE, site+(long)MAX_INDEL2);
1903 				while(!heap.isEmpty() && heap.peek().site<=horizon){
1904 					Quad t2=heap.poll();
1905 					active.add(t2);
1906 					maxNearbySite=t2.site;
1907 				}
1908 
1909 				if(verbose){System.err.println("\nFinished loop iteration. active="+active+"\nheap="+heap+"\nfloor="+site+", horizon="+horizon+", maxNearbySite="+maxNearbySite);}
1910 
1911 			}while(active.size()<approxHitsCutoff);
1912 
1913 			final int approxHits=active.size();
1914 			final Quad t=active.peek();
1915 			final int centerIndex=t.column;
1916 
1917 			if(verbose){
1918 				System.err.println("\nLeft loop. active="+active+"\nheap="+heap+"\n" +
1919 						"floor="+site+", maxNearbySite="+maxNearbySite+", approxHits="+approxHits+", approxHitsCutoff="+approxHitsCutoff+"\n");
1920 			}
1921 
1922 
1923 
1924 //			approxHits=0;
1925 //			{//Inner loop
1926 //				final int minsite=site, maxsite=site+MAX_INDEL2;
1927 //				for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){
1928 //					final int x=values[column];
1929 //					assert(x==triples[column].site);
1930 //					if(x>=minsite && x<=maxsite){
1931 //						maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
1932 //						approxHits++;
1933 //					}else{chances--;}
1934 ////					if(verbose){
1935 ////						System.err.println("column="+column+", numHits="+numHits+", approxHits="+approxHits+
1936 ////								", approxHitsCutoff="+approxHitsCutoff+", chances="+chances);
1937 ////					}
1938 //				}
1939 //				//Invalid assertion due to loop early exit
1940 ////				assert(approxHits>0) : "\niter="+iter+", maxHits="+maxHits+", numHits="+numHits+", approxHitsCutoff="+approxHitsCutoff+
1941 ////					"\nheap.size()="+heap.size()+", minsite="+minsite+", maxsite="+maxsite+", values[center]="+values[centerIndex]+", t="+t;
1942 //			}
1943 //			assert(approxHits<=active.size()) : "approxHits="+approxHits+", active.size()="+active.size()+", maxNearbySite="+maxNearbySite+"\nvalues="+Arrays.toString(values);
1944 
1945 			hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
1946 
1947 			assert(centerIndex>=0) : centerIndex;
1948 
1949 			//I don't remember what this assertion was for or why, but it's causing trouble.
1950 			//assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
1951 			if(approxHits>=approxHitsCutoff){
1952 
1953 //				if(verbose){System.err.println("A");}
1954 
1955 				int score;
1956 				int qscore=(filter_by_qscore ? quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits) : qcutoff);
1957 				if(ADD_SCORE_Z){
1958 					int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
1959 					qscore+=scoreZ;
1960 				}
1961 
1962 				int mapStart=site, mapStop=maxNearbySite;
1963 				assert(mapStart<=mapStop);
1964 
1965 				assert(USE_EXTENDED_SCORE);
1966 
1967 				boolean locArrayValid=false;
1968 				if(qscore<qcutoff){
1969 					score=-1;
1970 //					if(verbose){System.err.println("B");}
1971 				}else{
1972 //					if(verbose){System.err.println("C");}
1973 					final int chrom=numberToChrom(site, baseChrom);
1974 
1975 					//TODO Note that disabling the shortCircuit code seems to make things run 2% faster (with identical results).
1976 					//However, theoretically, shortCircuit should be far more efficient.  Test both ways on cluster and on a larger run.
1977 					//May have something to do with compiler loop optimizations.
1978 					if(shortCircuit && qscore==maxQuickScore){
1979 //						if(verbose){System.err.println("D");}
1980 						assert(approxHits==numKeys);
1981 						score=maxScore;
1982 					}else{
1983 //						if(verbose){System.err.println("E");}
1984 						if(verbose){
1985 							System.err.println("numHits="+numHits+", approxHits="+approxHits+", qscore="+qscore+", qcutoff="+qcutoff+", filter_by_qscore="+filter_by_qscore);
1986 							System.err.println("Extending "+Arrays.toString(values));
1987 						}
1988 						score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits);
1989 						locArrayValid=true;
1990 
1991 						if(verbose){
1992 							System.err.println("score: "+score);
1993 							System.err.println("locArray: "+Arrays.toString(locArray));
1994 						}
1995 
1996 						//Correct begin and end positions if they changed.
1997 						int min=Integer.MAX_VALUE;
1998 						int max=Integer.MIN_VALUE;
1999 						for(int i=0; i<locArray.length; i++){
2000 							int x=locArray[i];
2001 							if(x>-1){
2002 								if(x<min){min=x;}
2003 								if(x>max){max=x;}
2004 							}
2005 						}
2006 
2007 						if(score>=maxScore){
2008 							assert(min==max && min>-1) : "\n"+score+", "+maxScore+", "+min+", "+max+
2009 							", "+(max-min)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
2010 						}
2011 
2012 						//							assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger?
2013 						if(min<0 || max<0){
2014 							if(!Shared.anomaly){
2015 								Shared.anomaly=true;
2016 								System.err.println("\nAnomaly in "+getClass().getName()+".slowWalk:\n"+
2017 										"chrom="+chrom+", mapStart="+mapStart+", mapStop="+mapStop+", centerIndex="+centerIndex+", strand="+strand+"\n"+
2018 										"score="+score+", maxScore="+maxScore+", qscore="+qscore+", filter_by_qscore="+filter_by_qscore+"\n"+
2019 										"numHits="+approxHits+", approxHits="+approxHits+"\n"+
2020 										"min="+min+", max="+max+", (max-min)="+(max-min)+"\n"+
2021 										"bases.length="+bases.length+"\n"+Arrays.toString(locArray)+"\n"+
2022 										"locArray:\t"+Arrays.toString(locArray)+"\n"+
2023 										"values:\t"+Arrays.toString(values)+"\n"+
2024 										"bases:\t"+new String(bases));
2025 								System.err.println();
2026 								assert(false);
2027 							}
2028 							score=-99999;
2029 						}
2030 
2031 						//mapStart and mapStop are indices
2032 						mapStart=toNumber(min, chrom);
2033 						mapStop=toNumber(max, chrom);
2034 						assert(mapStart<=mapStop);
2035 
2036 						if(score>=maxScore){
2037 							assert(mapStop-mapStart==0) : "\n"+score+", "+maxScore+", "+min+", "+max+
2038 							", "+(max-min)+", "+(mapStop-mapStart)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n";
2039 						}
2040 					}
2041 //					if(verbose){System.err.println("F");}
2042 
2043 					if(score==maxScore){
2044 						qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT));
2045 						approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, true);
2046 					}
2047 
2048 					if(score>=cutoff){
2049 						qcutoff=Tools.max(qcutoff, (int)(qscore*DYNAMIC_QSCORE_THRESH));
2050 						bestqscore=Tools.max(qscore, bestqscore);
2051 					}
2052 				}
2053 //				if(verbose){System.err.println("G");}
2054 
2055 				if(score>=cutoff){
2056 //					if(verbose){System.err.println("H");}
2057 
2058 					if(score>currentTopScore){
2059 //						if(verbose){System.err.println("I");}
2060 //						System.err.println("New top score!");
2061 
2062 						if(DYNAMICALLY_TRIM_LOW_SCORES){
2063 
2064 							maxHits=Tools.max(approxHits, maxHits);
2065 							approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, approxHitsCutoff, currentTopScore>=maxScore);
2066 
2067 							cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH));
2068 							if(score>=maxScore){
2069 								assert(USE_EXTENDED_SCORE);
2070 								cutoff=Tools.max(cutoff, (int)(score*0.95f));
2071 							}
2072 						}
2073 
2074 						currentTopScore=score;
2075 
2076 //						System.err.println("New top score: "+currentTopScore+" \t("+cutoff+")");
2077 					}
2078 //					if(verbose){System.err.println("J");}
2079 
2080 					final int chrom=numberToChrom(mapStart, baseChrom);
2081 					final int site2=numberToSite(mapStart);
2082 					final int site3=numberToSite(mapStop)+bases.length-1;
2083 
2084 					assert(NUM_CHROM_BITS==0 || site2<SITE_MASK-1000) : "chrom="+chrom+", strand="+strand+", site="+site+
2085 						", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length+
2086 						"\n\n"+Arrays.toString(b.getHitList(centerIndex));
2087 					assert(site2<site3) : "\nchrom="+chrom+", strand="+strand+", site="+site+", maxNearbySite="+maxNearbySite+"\n"+
2088 						"mapStart="+mapStart+", mapStop="+mapStop+", site2="+site2+", site3="+site3+", read.length="+bases.length+"\n"+
2089 						"numberToSite("+mapStart+")="+numberToSite(mapStart)+", numberToSite("+mapStop+")="+numberToSite(mapStop)+"\n"+
2090 						"\n"+new String(bases)+"\n";
2091 
2092 					if(verbose){
2093 						System.err.println("chrom="+chrom+", strand="+strand+", site="+site+", maxNearbySite="+maxNearbySite+"\n"+
2094 								"mapStart="+mapStart+", mapStop="+mapStop+", site2="+site2+", site3="+site3+", read.length="+bases.length);
2095 					}
2096 
2097 					int[] gapArray=null;
2098 					if(site3-site2>=MINGAP+bases.length){
2099 						assert(locArrayValid) : "Loc array was not filled.";
2100 //						System.err.println("****\n"+Arrays.toString(locArray)+"\n");
2101 //						int[] clone=locArray.clone();
2102 						gapArray=makeGapArray(locArray, site2, MINGAP);
2103 						if(gapArray!=null){
2104 //							System.err.println(Arrays.toString(locArray)+"\n");
2105 //							System.err.println(Arrays.toString(gapArray));
2106 //
2107 ////							int sub=site2-mapStart;//thus site2=mapStart+sub
2108 ////							for(int i=0; i<gapArray.length; i++){
2109 ////								gapArray[i]+=sub;
2110 ////							}
2111 ////							System.err.println(Arrays.toString(gapArray));
2112 //
2113 //							System.err.println(mapStart+" -> "+site2);
2114 //							System.err.println(mapStop+" -> "+site3);
2115 
2116 							assert(gapArray[0]>=site2 && gapArray[0]-site2<bases.length);
2117 							assert(gapArray[gapArray.length-1]<=site3 && site3-gapArray[gapArray.length-1]<bases.length) : "\n"+
2118 								mapStart+" -> "+site2+"\n"+
2119 								mapStop+" -> "+site3+"\n\n"+
2120 								Arrays.toString(gapArray)+"\n\n"+
2121 //								Arrays.toString(clone)+"\n\n"+
2122 								Arrays.toString(locArray)+"\n"+
2123 								"numHits="+numHits+", "+
2124 								"heap.size="+heap.size()+", "+
2125 								"numHits="+numHits+", "+
2126 								"approxHits="+approxHits+"\n";
2127 							gapArray[0]=Tools.min(gapArray[0], site2);
2128 							gapArray[gapArray.length-1]=Tools.max(gapArray[gapArray.length-1], site3);
2129 						}
2130 						if(verbose){System.err.println("@ site "+site2+", made gap array: "+Arrays.toString(gapArray));}
2131 //						assert(false) : Arrays.toString(locArray);
2132 					}
2133 
2134 
2135 					//This block is optional, but tries to eliminate multiple identical alignments
2136 
2137 					SiteScore ss=null;
2138 					final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined;
2139 					final boolean inbounds=(site2>=0 && site3<Data.chromLengths[chrom]);
2140 //					if(!inbounds){System.err.println("Index tossed out-of-bounds site chr"+chrom+", "+site2+"-"+site3);}
2141 
2142 					if(inbounds && !SEMIPERFECTMODE && !PERFECTMODE && gapArray==null && prevSS!=null &&
2143 							prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){
2144 
2145 						final int betterScore=Tools.max(score, prevSS.score);
2146 						final int minStart=Tools.min(prevSS.start, site2);
2147 						final int maxStop=Tools.max(prevSS.stop, site3);
2148 						final boolean perfect2=USE_EXTENDED_SCORE && prevSS.score==maxScore && fullyDefined;
2149 						assert(!USE_EXTENDED_SCORE || perfect2==prevSS.perfect);
2150 
2151 						final boolean shortEnough=(!LIMIT_SUBSUMPTION_LENGTH_TO_2X  || (maxStop-minStart<2*bases.length));
2152 
2153 						if(prevSS.start==site2 && prevSS.stop==site3){
2154 							prevSS.score=prevSS.quickScore=betterScore;
2155 							prevSS.perfect=(prevSS.perfect || perfect1 || perfect2);
2156 							if(prevSS.perfect){prevSS.semiperfect=true;}
2157 						}else if(SUBSUME_SAME_START_SITES && shortEnough && prevSS.start==site2 && !prevSS.semiperfect){
2158 							if(perfect2){
2159 								//do nothing
2160 							}else if(perfect1){
2161 								prevSS.setStop(site3);
2162 								if(!prevSS.perfect){perfectsFound++;}//***$
2163 								prevSS.perfect=prevSS.semiperfect=true;
2164 							}else{
2165 								prevSS.setStop(maxStop);
2166 								prevSS.setPerfect(bases);
2167 							}
2168 							prevSS.score=prevSS.quickScore=betterScore;
2169 						}else if(SUBSUME_SAME_STOP_SITES && shortEnough && prevSS.stop==site3 && !prevSS.semiperfect){
2170 							if(perfect2){
2171 								//do nothing
2172 							}else if(perfect1){
2173 								prevSS.setStart(site2);
2174 								if(!prevSS.perfect){perfectsFound++;}//***$
2175 								prevSS.perfect=prevSS.semiperfect=true;
2176 							}else{
2177 								prevSS.setStart(minStart);
2178 								prevSS.setPerfect(bases);
2179 							}
2180 							prevSS.score=prevSS.quickScore=betterScore;
2181 						}else if(SUBSUME_OVERLAPPING_SITES && shortEnough && (maxStop-minStart<=bases.length+MAX_SUBSUMPTION_LENGTH)
2182 								&& !perfect1 && !perfect2 && !prevSS.semiperfect){
2183 							prevSS.setLimits(minStart, maxStop);
2184 							prevSS.score=prevSS.quickScore=betterScore;
2185 							prevSS.setPerfect(bases);
2186 						}else{
2187 							ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1);
2188 							if(!perfect1){ss.setPerfect(bases);}
2189 //							assert(Read.CHECKSITE(ss, bases));
2190 							if(verbose){System.err.println("A) Index made SiteScore "+ss.toText()+", "+Arrays.toString(ss.gaps));}
2191 							assert(!perfect1 || ss.stop-ss.start==bases.length-1);
2192 						}
2193 						assert(!perfect2 || prevSS.stop-prevSS.start==bases.length-1);
2194 					}else if(inbounds){
2195 						ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1);
2196 						if(!perfect1){ss.setPerfect(bases);}
2197 //						assert(Read.CHECKSITE(ss, bases));
2198 						ss.gaps=gapArray;
2199 						if(verbose){System.err.println("B) Index made SiteScore "+ss.toText()+", "+Arrays.toString(ss.gaps));}
2200 					}
2201 
2202 					assert(ss==null || !ss.perfect || ss.semiperfect) : ss;
2203 					assert(prevSS==null || !prevSS.perfect || prevSS.semiperfect) : "\n"+SiteScore.header()+"\n"+ss+"\n"+prevSS;
2204 					if(ss!=null && ((SEMIPERFECTMODE && !ss.semiperfect) || (PERFECTMODE && !ss.perfect))){ss=null;}
2205 
2206 
2207 					if(ss!=null){
2208 //						System.err.println("Added site "+ss.toText()+", qscore="+qscore);
2209 						ssl.add(ss);
2210 						if(ss.perfect){
2211 
2212 							if(prevSS==null || !prevSS.perfect || !ss.overlaps(prevSS)){
2213 								if(prevSS==null){assert ssl.size()<2 || !ss.overlaps(ssl.get(ssl.size()-2));}
2214 								perfectsFound++;
2215 
2216 								//Human-specific code
2217 //								if(QUIT_AFTER_TWO_PERFECTS){
2218 //									if(perfectsFound>=3 || (perfectsFound>=2 && chrom<24)){break;}
2219 //								}
2220 
2221 								if(QUIT_AFTER_TWO_PERFECTS && perfectsFound>=2){break;}
2222 							}
2223 						}
2224 
2225 						prevSS=ss;
2226 					}else{
2227 //						System.err.println("Subsumed site "+new SiteScore(chrom, strand, site2, site3, score).toText());
2228 					}
2229 				}
2230 			}
2231 
2232 //			while(!active.isEmpty() && active.peek().site==site){ //Remove all identical elements, and add subsequent elements
2233 //				final Quad t2=active.poll();
2234 //				final int row=t2.row+1, col=t2.column;
2235 //
2236 //				//This is called the "increment" operation.  Very messy and slow due to rare cases at beginning of a chrom.
2237 //				if(row<stops[col]){//then increment and return to the heap
2238 //					t2.row=row;
2239 //
2240 //					int a=t2.list[row];
2241 //					int a2;
2242 //					if((a&SITE_MASK)>=offsets[col]){
2243 //						a2=a-offsets[col];
2244 //
2245 //						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
2246 //							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
2247 //							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
2248 //					}else{
2249 //						int ch=numberToChrom(a, baseChrom);
2250 //						int st=numberToSite(a);
2251 //						int st2=Tools.max(st-offsets[col], 0);
2252 //						a2=toNumber(st2, ch);
2253 //
2254 //						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
2255 //							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
2256 //							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
2257 //					}
2258 //
2259 //					assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
2260 //						"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+
2261 //						", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
2262 //
2263 //					t2.site=a2;
2264 //					values[col]=a2;
2265 //					if()
2266 //					heap.add(t2);
2267 //				}else if((heap.size()+active.size())<approxHitsCutoff || PERFECTMODE){ //Then there are not enough keys remaining for a site
2268 //					assert(USE_EXTENDED_SCORE);
2269 //					bestScores[0]=Tools.max(bestScores[0], currentTopScore);
2270 //					bestScores[1]=Tools.max(bestScores[1], maxHits);
2271 //					bestScores[2]=Tools.max(bestScores[2], qcutoff);
2272 //					bestScores[3]=Tools.max(bestScores[3], bestqscore);
2273 //
2274 //					bestScores[4]=maxQuickScore;
2275 //					bestScores[5]=perfectsFound; //***$ fixed by adding this line
2276 //					if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
2277 //
2278 //					return ssl;
2279 //				}
2280 //				if(heap.isEmpty() && active.isEmpty()){
2281 //					assert(false) : heap.size()+", "+active.size()+", "+approxHitsCutoff;
2282 //					break;
2283 //				}
2284 //			}
2285 
2286 		}
2287 
2288 		assert(USE_EXTENDED_SCORE);
2289 		bestScores[0]=Tools.max(bestScores[0], currentTopScore);
2290 		bestScores[1]=Tools.max(bestScores[1], maxHits);
2291 		bestScores[2]=Tools.max(bestScores[2], qcutoff);
2292 		bestScores[3]=Tools.max(bestScores[3], bestqscore);
2293 
2294 		bestScores[4]=maxQuickScore;
2295 		bestScores[5]=perfectsFound;
2296 		if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;}
2297 
2298 		return ssl;
2299 	}
2300 
2301 
findMaxQscore2(final int[] starts, final int[] stops, final int[] offsets, final int[] keyScores, final int baseChrom_, final Quad[] triples, final int[] values, final int prevMaxHits, boolean earlyExit, boolean perfectOnly)2302 	private final int[] findMaxQscore2(final int[] starts, final int[] stops, final int[] offsets, final int[] keyScores,
2303 			final int baseChrom_, final Quad[] triples, final int[] values, final int prevMaxHits,
2304 			boolean earlyExit, boolean perfectOnly){
2305 
2306 		final int numHits=offsets.length;
2307 		assert(numHits>=prevMaxHits);
2308 
2309 		final int baseChrom=baseChrom(baseChrom_);
2310 		final Block b=index[baseChrom];
2311 		final int[] sizes=sizeArray;
2312 
2313 		heap.clear();
2314 		for(int i=0; i<numHits; i++){
2315 			final int[] sites=b.sites;
2316 			final int start=starts[i];
2317 			sizes[i]=b.length(start, stops[i]);
2318 			assert(sizes[i]>0);
2319 
2320 			int a=sites[start];
2321 			int a2;
2322 			if((a&SITE_MASK)>=offsets[i]){
2323 				a2=a-offsets[i];
2324 			}else{
2325 				int ch=numberToChrom(a, baseChrom);
2326 				int st=numberToSite(a);
2327 				int st2=Tools.max(st-offsets[i], 0);
2328 				a2=toNumber(st2, ch);
2329 			}
2330 			assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom));
2331 
2332 			Quad t=triples[i];
2333 			assert(t!=null) : "Should be using tripleStorage";
2334 			assert(i==t.column);
2335 			t.row=start;
2336 			t.site=a2;
2337 			t.list=sites;
2338 			values[i]=a2;
2339 
2340 			heap.add(t);
2341 		}
2342 
2343 		final int maxQuickScore=maxQuickScore(offsets, keyScores);
2344 
2345 		int topQscore=-999999999;
2346 
2347 		int maxHits=0;
2348 //		int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP;
2349 
2350 
2351 		int approxHitsCutoff;
2352 		final int indelCutoff;
2353 		if(perfectOnly){
2354 			approxHitsCutoff=numHits;
2355 			indelCutoff=0;
2356 		}else{
2357 			approxHitsCutoff=Tools.max(prevMaxHits, Tools.min(MIN_APPROX_HITS_TO_KEEP, numHits-1)); //Faster, same accuracy
2358 			indelCutoff=MAX_INDEL2;
2359 		}
2360 
2361 
2362 		while(!heap.isEmpty()){
2363 			Quad t=heap.peek();
2364 			final int site=t.site;
2365 			final int centerIndex=t.column;
2366 
2367 			int maxNearbySite=site;
2368 
2369 
2370 			int approxHits=0;
2371 			{//Inner loop
2372 				final int minsite=site-Tools.min(MAX_INDEL, indelCutoff), maxsite=site+MAX_INDEL2;
2373 				for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){
2374 					final int x=values[column];
2375 					assert(x==triples[column].site);
2376 					if(x>=minsite && x<=maxsite){
2377 						maxNearbySite=(x>maxNearbySite ? x : maxNearbySite);
2378 						approxHits++;
2379 					}else{chances--;}
2380 				}
2381 			}
2382 			hist_hits[Tools.min(HIT_HIST_LEN, approxHits)]++;
2383 
2384 			assert(centerIndex>=0) : centerIndex;
2385 
2386 			//I don't remember what this assertion was for or why, but it's causing trouble.
2387 			//assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column;
2388 			if(approxHits>=approxHitsCutoff){
2389 
2390 				int qscore=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits);
2391 
2392 				if(ADD_SCORE_Z){
2393 					int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits);
2394 					qscore+=scoreZ;
2395 				}
2396 
2397 				if(qscore>topQscore){
2398 
2399 //					maxHits=Tools.max(approxHits, maxHits);
2400 //					approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits); //Best setting for pre-scan
2401 
2402 					maxHits=Tools.max(approxHits, maxHits);
2403 					approxHitsCutoff=Tools.max(approxHitsCutoff, approxHits-1); //Best setting for pre-scan
2404 
2405 					topQscore=qscore;
2406 
2407 					if(qscore>=maxQuickScore){
2408 						assert(qscore==maxQuickScore);
2409 						assert(approxHits==numHits);
2410 						if(earlyExit){
2411 							return new int[] {topQscore, maxHits};
2412 						}
2413 					}
2414 				}
2415 			}
2416 
2417 			while(heap.peek().site==site){ //Remove all identical elements, and add subsequent elements
2418 				final Quad t2=heap.poll();
2419 				final int row=t2.row+1, col=t2.column;
2420 				if(row<stops[col]){
2421 					t2.row=row;
2422 
2423 					int a=t2.list[row];
2424 					int a2;
2425 					if((a&SITE_MASK)>=offsets[col]){
2426 						a2=a-offsets[col];
2427 
2428 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
2429 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
2430 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
2431 					}else{
2432 						int ch=numberToChrom(a, baseChrom);
2433 						int st=numberToSite(a);
2434 						int st2=Tools.max(st-offsets[col], 0);
2435 						a2=toNumber(st2, ch);
2436 
2437 						assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
2438 							"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
2439 							", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
2440 					}
2441 
2442 					assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) :
2443 						"baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+
2444 						", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col];
2445 
2446 					t2.site=a2;
2447 					values[col]=a2;
2448 					heap.add(t2);
2449 				}else if(earlyExit && (perfectOnly || heap.size()<approxHitsCutoff)){
2450 					return new int[] {topQscore, maxHits};
2451 				}
2452 				if(heap.isEmpty()){break;}
2453 			}
2454 
2455 		}
2456 
2457 
2458 
2459 		return new int[] {topQscore, maxHits};
2460 	}
2461 
2462 
absdif(int a, int b)2463 	private static final int absdif(int a, int b){
2464 		return a>b ? a-b : b-a;
2465 	}
2466 
2467 
2468 	@Override
maxScore(int[] offsets, byte[] baseScores, int[] keyScores, int readlen, boolean useQuality)2469 	final int maxScore(int[] offsets, byte[] baseScores, int[] keyScores, int readlen, boolean useQuality){
2470 
2471 		if(useQuality){
2472 			//These lines apparently MUST be used if quality is used later on for slow align.
2473 			if(USE_AFFINE_SCORE){return msa.maxQuality(baseScores);}
2474 			if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5)+Tools.sumInt(baseScores);}
2475 		}else{
2476 			if(USE_AFFINE_SCORE){return msa.maxQuality(readlen);}
2477 			if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5);}
2478 		}
2479 
2480 		return maxQuickScore(offsets, keyScores);
2481 	}
2482 
2483 
maxQuickScore(int[] offsets, int[] keyScores)2484 	public final int maxQuickScore(int[] offsets, int[] keyScores){
2485 
2486 //		int x=offsets.length*BASE_KEY_HIT_SCORE;
2487 		int x=Tools.intSum(keyScores);
2488 		int y=Y_SCORE_MULT*(offsets[offsets.length-1]-offsets[0]);
2489 //		if(ADD_LIST_SIZE_BONUS){x+=(LIST_SIZE_BONUS[1]*offsets.length);}
2490 //		assert(!ADD_SCORE_Z) : "Need to make sure this is correct...";
2491 
2492 //		if(ADD_SCORE_Z){x+=((offsets[offsets.length-1]+CHUNKSIZE)*Z_SCORE_MULT);}
2493 		if(ADD_SCORE_Z){x+=maxScoreZ(offsets);}
2494 
2495 		return x+y;
2496 //		int bonus=(2*(HIT_SCORE/2)); //For matching both ends
2497 //		return x+y+bonus;
2498 	}
2499 
2500 
quickScore(final int[] locs, final int[] keyScores, final int centerIndex, final int offsets[], int[] sizes, final boolean penalizeIndels, final int numApproxHits, final int numHits)2501 	private final int quickScore(final int[] locs, final int[] keyScores, final int centerIndex, final int offsets[],
2502 			int[] sizes, final boolean penalizeIndels, final int numApproxHits, final int numHits){
2503 
2504 		hist_hits_score[Tools.min(HIT_HIST_LEN, numApproxHits)]++;
2505 		if(numApproxHits==1){return keyScores[centerIndex];}
2506 
2507 		//Done!
2508 		//Correct way to calculate score:
2509 		//Find the first chunk that exactly hits the center.
2510 		//Then, align leftward of it, and align rightward of it, and sum the scores.
2511 
2512 		//"-centerIndex" is a disambiguating term that, given otherwise identical match patterns
2513 		//(for example, a small indel will generate two valid site candidates), choose the lower site.
2514 
2515 		int x=keyScores[centerIndex]+scoreLeft(locs, keyScores, centerIndex, sizes, penalizeIndels)+
2516 			scoreRight(locs, keyScores, centerIndex, sizes, penalizeIndels, numHits)-centerIndex;
2517 
2518 		int y=Y_SCORE_MULT*scoreY(locs, centerIndex, offsets);
2519 		if(ADD_LIST_SIZE_BONUS){x+=calcListSizeBonus(sizes[centerIndex]);}
2520 //		int z=scoreZ(locs, hits);
2521 		return x+y;
2522 	}
2523 
2524 
2525 //	/** Generates a term that increases score with how many bases in the read match the ref. */
2526 //	private static final int scoreZ(int[] locs, int centerIndex, int offsets[]){
2527 //		final int center=locs[centerIndex];
2528 //
2529 //		final int[] refLoc=new int[offsets[offsets.length-1]+CHUNKSIZE];
2530 //
2531 //		final int maxLoc=center+MAX_INDEL2;
2532 //		final int minLoc=Tools.max(0, center-MAX_INDEL);
2533 //
2534 //		int score=0;
2535 //
2536 //		for(int i=0; i<locs.length; i++){
2537 //			int loc=locs[i];
2538 ////			int dif=absdif(loc, center);
2539 //			if(loc>=minLoc && loc<=maxLoc){
2540 ////				assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+
2541 ////					"\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets);
2542 //
2543 //				int offset=offsets[i];
2544 //				int max=CHUNKSIZE+offset;
2545 //
2546 //				for(int j=offset; j<max; j++){
2547 //					int old=refLoc[j];
2548 //					if(old==0){
2549 //						refLoc[j]=loc;
2550 //						score+=4;
2551 //					}else if(old>loc){
2552 //						refLoc[j]=loc;
2553 //						score-=2;
2554 //					}else if(old==loc){
2555 //						score-=1;
2556 //						//do nothing, perhaps, or add 1?
2557 //					}else{
2558 //						score-=2;
2559 //						assert(old<loc);
2560 //					}
2561 //				}
2562 //			}
2563 //		}
2564 //		return score;
2565 //	}
2566 
2567 
2568 
extendScore(final byte[] bases, final byte[] baseScores, final int[] offsets, final int[] values, final int chrom, final int centerIndex, final int[] locArray, final int numHits, final int numApproxHits)2569 	private final int extendScore(final byte[] bases, final byte[] baseScores, final int[] offsets, final int[] values,
2570 			final int chrom, final int centerIndex, final int[] locArray, final int numHits, final int numApproxHits){
2571 		callsToExtendScore++;
2572 		hist_hits_extend[Tools.min(HIT_HIST_LEN, numApproxHits)]++;
2573 
2574 		final int centerVal=values[centerIndex];
2575 		final int centerLoc=numberToSite(centerVal);
2576 
2577 		final int minLoc=Tools.max(0, centerLoc-MAX_INDEL); //Legacy, for assertions
2578 		final int maxLoc=centerLoc+MAX_INDEL2; //Legacy, for assertions
2579 
2580 		final int minVal=centerVal-MAX_INDEL;
2581 		final int maxVal=centerVal+MAX_INDEL2;
2582 
2583 		final byte[] ref=Data.getChromosome(chrom).array;
2584 
2585 		if(verbose){
2586 			System.err.println("\n");
2587 			System.err.println("minLoc="+minLoc+", maxLoc="+ maxLoc+", centerIndex="+centerIndex+", centerVal="+centerVal+", centerLoc="+centerLoc);
2588 			System.err.println("minVal="+minVal+", maxVal="+ maxVal+", numHits="+numHits+", numApproxHits="+numApproxHits);
2589 			System.err.println("offsets:\t"+Arrays.toString(offsets));
2590 			System.err.println("values:\t"+Arrays.toString(values));
2591 			System.err.println();
2592 			int centerOffset=offsets[centerIndex];
2593 
2594 			for(int i=0; i<centerOffset; i++){System.err.print(" ");}
2595 			for(int i=centerOffset; i<centerOffset+KEYLEN; i++){System.err.print((char)bases[i]);}
2596 			System.err.println();
2597 
2598 			System.err.println(new String(bases));
2599 			System.err.println(new String(KillSwitch.copyOfRange(ref, centerLoc, centerLoc+bases.length)));
2600 			System.err.println();
2601 		}
2602 
2603 //		int[] locArray=new int[bases.length];
2604 		Arrays.fill(locArray, -1);
2605 
2606 
2607 //		if(verbose){
2608 //			System.err.println("Reverse fill:");
2609 //		}
2610 
2611 		//First fill in reverse
2612 		for(int i=0, keynum=0; i<numHits; i++){
2613 			final int value=values[i];
2614 //			if(verbose){System.err.println("values["+i+"]="+value);}
2615 
2616 			if(value>=minVal && value<=maxVal){
2617 				final int refbase=numberToSite(value);
2618 //				if(verbose){System.err.println("refbase="+refbase);}
2619 
2620 //				Exception in thread "Thread-23" java.lang.AssertionError: 71543, 536356470, 536956470
2621 //		        at align2.BBIndex.extendScore(BBIndex.java:2620)
2622 //		        at align2.BBIndex.slowWalk3(BBIndex.java:1393)
2623 //		        at align2.BBIndex.find(BBIndex.java:777)
2624 //		        at align2.BBIndex.find(BBIndex.java:623)
2625 //		        at align2.BBIndex.findAdvanced(BBIndex.java:400)
2626 //		        at align2.AbstractMapThread.quickMap(AbstractMapThread.java:761)
2627 //		        at align2.BBMapThread.processRead(BBMapThread.java:408)
2628 //		        at align2.AbstractMapThread.run(AbstractMapThread.java:519)
2629 
2630 				//TODO - figure out why this assertion fires
2631 //				assert(refbase>=minLoc && refbase<=maxLoc) : refbase+", "+minLoc+", "+maxLoc; //Apparently not a correct assumption
2632 
2633 				//			System.err.println("Reverse: Trying key "+refbase+" @ "+offsets[i]);
2634 				//				System.err.println("Passed!");
2635 				keynum++;
2636 				final int callbase=offsets[i];
2637 
2638 				int misses=0;
2639 				for(int cloc=callbase+KEYLEN-1, rloc=refbase+cloc; cloc>=0 && rloc>=0 && rloc<ref.length; cloc--, rloc--){
2640 					int old=locArray[cloc];
2641 					if(old==refbase){
2642 //						if(verbose){System.err.println("Broke because old="+old+", refbase="+refbase);}
2643 						break;
2644 					} //Already filled with present value
2645 					if(misses>0 && old>=0){
2646 //						if(verbose){System.err.println("Broke because old="+old+", misses="+misses);}
2647 						break;
2648 					} //Already filled with something that has no errors
2649 					byte c=bases[cloc];
2650 					byte r=ref[rloc];
2651 
2652 					if(c==r){
2653 						if(old<0 || refbase==centerLoc){ //If the cell is empty or this key corresponds to center
2654 							locArray[cloc]=refbase;
2655 						}
2656 					}else{
2657 						misses++;
2658 						//Only extends first key all the way back.  Others stop at the first error.
2659 						if(old>=0 || keynum>1){
2660 //							if(verbose){System.err.println("Broke because old="+old+", keynum="+keynum);}
2661 							break;
2662 						}
2663 					}
2664 				}
2665 			}
2666 		}
2667 
2668 //		if(verbose){
2669 //			System.err.println("locArray:\t"+Arrays.toString(locArray));
2670 //			System.err.println("Forward fill:");
2671 //		}
2672 
2673 		//Then fill forward
2674 		for(int i=0; i<numHits; i++){
2675 			final int value=values[i];
2676 //			if(verbose){System.err.println("values["+i+"]="+value);}
2677 
2678 			if(value>=minVal && value<=maxVal){
2679 				final int refbase=numberToSite(value);
2680 //				if(verbose){System.err.println("refbase="+refbase);}
2681 				//TODO - figure out why this assertion fires
2682 //				assert(refbase>=minLoc && refbase<=maxLoc) : refbase+", "+minLoc+", "+maxLoc; //Apparently not a correct assumption
2683 				final int callbase=offsets[i];
2684 
2685 				int misses=0;
2686 				for(int cloc=callbase+KEYLEN, rloc=refbase+cloc; cloc<bases.length && rloc<ref.length; cloc++, rloc++){
2687 					int old=locArray[cloc];
2688 					if(old==refbase){break;} //Already filled with present value
2689 					if(misses>0 && old>=0){break;} //Already filled with something that has no errors
2690 					byte c=bases[cloc];
2691 					byte r=ref[rloc];
2692 
2693 					if(c==r){
2694 						if(old<0 || refbase==centerLoc){ //If the cell is empty or this key corresponds to center
2695 							locArray[cloc]=refbase;
2696 						}
2697 					}else{
2698 						misses++;
2699 						if(old>=0){break;} //Already filled with something that has no errors
2700 					}
2701 				}
2702 			}
2703 		}
2704 
2705 		//Try to subsume out-of-order locs where higher numbers come before lower numbers. Made things worse.
2706 //		for(int i=1; i<locArray.length; i++){
2707 //			final int loc=locArray[i];
2708 //			final int last=locArray[i-1];
2709 //			if(loc<last && last>-1){
2710 //				final byte c=bases[i];
2711 ////				final int rloc1=loc+i;
2712 //				final int rloc2=last+i;
2713 //				byte r2=ref[rloc2];
2714 //				if(r2==c){
2715 //					locArray[i]=last;
2716 //				}
2717 //			}
2718 //		}
2719 
2720 //		//Change 'N' to -2.  A bit slow.
2721 //		{
2722 //			int firstMatch=0;
2723 //			while(firstMatch<locArray.length && locArray[firstMatch]<0){firstMatch++;}
2724 //			assert(firstMatch<locArray.length) : new String(bases);
2725 //			int last=locArray[firstMatch];
2726 //			for(int i=firstMatch-1; i>=0; i--){
2727 //				final byte c=bases[i];
2728 //				if(c=='N'){locArray[i]=-2;}
2729 //				else{
2730 //					assert(locArray[i]==-1);
2731 //					final int rloc=last+i;
2732 //					byte r=ref[rloc];
2733 //					if(r=='N'){locArray[i]=-2;}
2734 //				}
2735 //			}
2736 //			for(int i=firstMatch; i<locArray.length; i++){
2737 //				final int loc=locArray[i];
2738 //				if(last<1){last=loc;}
2739 //				final byte c=bases[i];
2740 //				if(c=='N'){locArray[i]=-2;}
2741 //				else if(loc==-1 && last>0){
2742 //					final int rloc=last+i;
2743 //					byte r=ref[rloc];
2744 //					if(r=='N'){locArray[i]=-2;}
2745 //				}
2746 //			}
2747 //		}
2748 
2749 		//Change 'N' to -2, but only for nocalls, not norefs.  Much faster.
2750 		{
2751 			final byte nb=(byte)'N';
2752 			for(int i=0; i<bases.length; i++){
2753 				if(bases[i]==nb){locArray[i]=-2;}
2754 			}
2755 		}
2756 
2757 //
2758 //		{
2759 //			int last=locArray[0];
2760 //			for(int i=1; i<locArray.length; i++){
2761 //				final int loc=locArray[i];
2762 //				if(loc>0){
2763 //					if(last<1){last=loc;}
2764 //				}else{
2765 //					if(last>0){
2766 //						final byte c=bases[i];
2767 //						final int rloc2=last+i;
2768 //						byte r2=ref[rloc2];
2769 //						if(c=='N'){
2770 //
2771 //						}else if(c==r2){
2772 //							locArray[i]=last;
2773 //						}
2774 //					}
2775 //					last=-1;
2776 //				}
2777 //			}
2778 //		}
2779 //		{
2780 //			int last=locArray[locArray.length-1];
2781 //			for(int i=locArray.length-2; i>=0; i--){
2782 //				final int loc=locArray[i];
2783 //				if(loc>0){
2784 //					if(last<1){last=loc;}
2785 //				}else{
2786 //					if(last>0){
2787 //						final byte c=bases[i];
2788 //						final int rloc2=last+i;
2789 //						byte r2=ref[rloc2];
2790 //						if(c=='N'){
2791 //
2792 //						}else if(c==r2){
2793 //							locArray[i]=last;
2794 //						}
2795 //					}
2796 //					last=-1;
2797 //				}
2798 //			}
2799 //		}
2800 
2801 //		for(int i=locArray.length-2; i>=0; i--){
2802 //			final int loc=locArray[i];
2803 //			final int last=locArray[i+1];
2804 //			if(loc>last && last>-1){
2805 //				final byte c=bases[i];
2806 ////				final int rloc1=loc+i;
2807 //				final int rloc2=last+i;
2808 //				byte r2=ref[rloc2];
2809 //				if(r2==c){
2810 //					locArray[i]=last;
2811 //				}
2812 //			}
2813 //		}
2814 
2815 		if(verbose){
2816 //			System.err.println("locArray:\t"+Arrays.toString(locArray));
2817 
2818 			int centerOffset=offsets[centerIndex];
2819 			int lim=centerOffset+KEYLEN;
2820 			for(int i=centerOffset; i<lim; i++){
2821 				assert(locArray[i]!=-1) : " ( "+centerOffset+" < "+i+" < "+lim+" ) "+
2822 						"\nlocArray: "+Arrays.toString(locArray)+
2823 						"\nvalues: "+Arrays.toString(values)+
2824 						"\noffsets: "+Arrays.toString(offsets);
2825 			}
2826 		}
2827 
2828 		if(USE_AFFINE_SCORE){
2829 			/* TODO - sometimes returns a higher score than actual alignment.  This should never happen. */
2830 			int score=(KFILTER<2 ? msa.calcAffineScore(locArray, baseScores, bases) :
2831 				msa.calcAffineScore(locArray, baseScores, bases, KFILTER));
2832 			return score;
2833 		}
2834 
2835 		int score=0;
2836 		int lastLoc=-1;
2837 		int centerBonus=BASE_HIT_SCORE/5;
2838 		for(int i=0; i<locArray.length; i++){
2839 			int loc=locArray[i];
2840 			if(loc>=0){
2841 				score+=BASE_HIT_SCORE+baseScores[i];
2842 				if(loc==centerLoc){score+=centerBonus;}
2843 				if(loc!=lastLoc && lastLoc>=0){
2844 					int dif=absdif(loc, lastLoc);
2845 					int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*dif, MAX_PENALTY_FOR_MISALIGNED_HIT);
2846 					score-=penalty;
2847 				}
2848 				lastLoc=loc;
2849 			}
2850 		}
2851 
2852 //		System.err.println("Extended score: "+score);
2853 //		System.err.println(Arrays.toString(locArray));
2854 
2855 
2856 		return score;
2857 	}
2858 
2859 
2860 	/** NOTE!  This destroys the locArray, so use a copy if needed. */
makeGapArray(int[] locArray, int minLoc, int minGap)2861 	private static final int[] makeGapArray(int[] locArray, int minLoc, int minGap){
2862 		int gaps=0;
2863 		boolean doSort=false;
2864 
2865 		if(locArray[0]<0){locArray[0]=minLoc;}
2866 		for(int i=1; i<locArray.length; i++){
2867 			if(locArray[i]<0){locArray[i]=locArray[i-1]+1;}
2868 			else{locArray[i]+=i;}
2869 			if(locArray[i]<locArray[i-1]){doSort=true;}
2870 		}
2871 
2872 //		System.err.println(Arrays.toString(locArray)+"\n");
2873 
2874 		if(doSort){
2875 //			System.err.println("*");
2876 			Arrays.sort(locArray);
2877 		}
2878 //		System.err.println(Arrays.toString(locArray)+"\n");
2879 
2880 		for(int i=1; i<locArray.length; i++){
2881 			int dif=locArray[i]-locArray[i-1];
2882 			assert(dif>=0);
2883 			if(dif>minGap){
2884 				gaps++;
2885 			}
2886 		}
2887 		if(gaps<1){return null;}
2888 		int[] out=new int[2+gaps*2];
2889 		out[0]=locArray[0];
2890 		out[out.length-1]=locArray[locArray.length-1];
2891 
2892 		for(int i=1, j=1; i<locArray.length; i++){
2893 			int dif=locArray[i]-locArray[i-1];
2894 			assert(dif>=0);
2895 			if(dif>minGap){
2896 				out[j]=locArray[i-1];
2897 				out[j+1]=locArray[i];
2898 				j+=2;
2899 			}
2900 		}
2901 		return out;
2902 	}
2903 
2904 
2905 	/** Generates a term that increases score with how many bases in the read match the ref. */
scoreZ2(int[] locs, int centerIndex, int offsets[], int numApproxHits, int numHits)2906 	private final int scoreZ2(int[] locs, int centerIndex, int offsets[], int numApproxHits, int numHits){
2907 
2908 		if(numApproxHits==1){return SCOREZ_1KEY;}
2909 
2910 		final int center=locs[centerIndex];
2911 
2912 		final int maxLoc=center+MAX_INDEL2;
2913 		final int minLoc=Tools.max(0, center-MAX_INDEL);
2914 
2915 		int score=0;
2916 
2917 		int a0=-1, b0=-1;
2918 
2919 		for(int i=0; i<numHits; i++){
2920 			int loc=locs[i];
2921 //			int dif=absdif(loc, center);
2922 			if(loc>=minLoc && loc<=maxLoc){
2923 //				assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+
2924 //					"\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets);
2925 				int a=offsets[i];
2926 
2927 				if(b0<a){
2928 					score+=b0-a0;
2929 					a0=a;
2930 				}
2931 				b0=a+KEYLEN;
2932 			}
2933 		}
2934 		score+=b0-a0;
2935 		score=score*Z_SCORE_MULT;
2936 //		assert(score==scoreZslow(locs, centerIndex, offsets, false)) : scoreZslow(locs, centerIndex, offsets, true)+" != "+score;
2937 		return score;
2938 	}
2939 
2940 	@Deprecated
2941 	/** This was just to verify scoreZ2. */
scoreZslow(int[] locs, int centerIndex, int offsets[], boolean display, int numHits)2942 	private final int scoreZslow(int[] locs, int centerIndex, int offsets[], boolean display, int numHits){
2943 		final int center=locs[centerIndex];
2944 
2945 		final int maxLoc=center+MAX_INDEL2;
2946 		final int minLoc=Tools.max(0, center-MAX_INDEL);
2947 
2948 		byte[] array=new byte[offsets[offsets.length-1]+KEYLEN];
2949 		int score=0;
2950 
2951 		for(int i=0; i<numHits; i++){
2952 			int loc=locs[i];
2953 //			int dif=absdif(loc, center);
2954 			if(loc>=minLoc && loc<=maxLoc){
2955 				int pos=offsets[i];
2956 //				if(true){
2957 //					System.err.println("\ni="+i+", pos="+pos+", array=["+array.length+"], limit="+(pos+CHUNKSIZE-1));
2958 //				}
2959 				for(int j=pos; j<pos+KEYLEN; j++){
2960 					if(array[j]==0){score++;}
2961 					array[j]=1;
2962 				}
2963 			}
2964 		}
2965 
2966 		if(display){System.err.println("\n"+Arrays.toString(array)+"\n");}
2967 
2968 		return score*Z_SCORE_MULT;
2969 	}
2970 
2971 	/** Generates a term that increases score with how many bases in the read match the ref. */
maxScoreZ(int offsets[])2972 	private final int maxScoreZ(int offsets[]){
2973 		int score=0;
2974 		int a0=-1, b0=-1;
2975 
2976 		for(int i=0; i<offsets.length; i++){
2977 			int a=offsets[i];
2978 
2979 			if(b0<a){
2980 				score+=b0-a0;
2981 				a0=a;
2982 			}
2983 			b0=a+KEYLEN;
2984 
2985 		}
2986 		score+=b0-a0;
2987 		return score*Z_SCORE_MULT;
2988 	}
2989 
2990 
scoreRight(int[] locs, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels, int numHits)2991 	private final int scoreRight(int[] locs, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels, int numHits){
2992 
2993 		int score=0;
2994 
2995 		int prev, loc=locs[centerIndex];
2996 
2997 		for(int i=centerIndex+1; i<numHits; i++){
2998 
2999 			if(locs[i]>=0){
3000 				prev=loc;
3001 				loc=locs[i];
3002 
3003 				int offset=absdif(loc, prev);
3004 
3005 				if(offset<=MAX_INDEL){
3006 					score+=keyScores[i];
3007 					if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);}
3008 
3009 //					if(i==locs.length-1){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key
3010 
3011 					if(penalizeIndels && offset!=0){
3012 						int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT);
3013 						score-=penalty;
3014 //						score-=(INDEL_PENALTY+Tools.min(INDEL_PENALTY_MULT*offset, 1+HIT_SCORE/4));
3015 					}
3016 				}else{
3017 					loc=prev;
3018 				}
3019 			}
3020 
3021 		}
3022 		return score;
3023 
3024 	}
3025 
scoreLeft(int[] locs, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels)3026 	private final int scoreLeft(int[] locs, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels){
3027 
3028 		callsToScore++;
3029 
3030 		int score=0;
3031 
3032 		int prev, loc=locs[centerIndex];
3033 
3034 		for(int i=centerIndex-1; i>=0; i--){
3035 
3036 			if(locs[i]>=0){
3037 				prev=loc;
3038 				loc=locs[i];
3039 
3040 				int offset=absdif(loc, prev);
3041 
3042 				if(offset<=MAX_INDEL){
3043 					score+=keyScores[i];
3044 					if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);}
3045 
3046 //					if(i==0){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key
3047 					if(penalizeIndels && offset!=0){
3048 						int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT);
3049 						score-=penalty;
3050 					}
3051 				}else{
3052 					loc=prev;
3053 				}
3054 			}
3055 
3056 		}
3057 		return score;
3058 
3059 	}
3060 
3061 	/** Encode a (location, chrom) pair to an index */
toNumber(int site, int chrom)3062 	private static final int toNumber(int site, int chrom){
3063 		int out=(chrom&CHROM_MASK_LOW);
3064 		out=out<<SHIFT_LENGTH;
3065 		out=(out|site);
3066 		return out;
3067 	}
3068 
3069 	/** Decode an (index, baseChrom) pair to a chromosome */
numberToChrom(int number, int baseChrom)3070 	private static final int numberToChrom(int number, int baseChrom){
3071 		assert((baseChrom&CHROM_MASK_LOW)==0) : Integer.toHexString(number)+", baseChrom="+baseChrom;
3072 		assert(baseChrom>=0) : Integer.toHexString(number)+", baseChrom="+baseChrom;
3073 		int out=(number>>>SHIFT_LENGTH);
3074 		out=out+(baseChrom&CHROM_MASK_HIGH);
3075 		return out;
3076 	}
3077 
3078 	/** Decode an index to a location */
numberToSite(int number)3079 	private static final int numberToSite(int number){
3080 		return (number&SITE_MASK);
3081 	}
3082 
minChrom(int chrom)3083 	private static final int minChrom(int chrom){return Tools.max(MINCHROM, chrom&CHROM_MASK_HIGH);}
baseChrom(int chrom)3084 	private static final int baseChrom(int chrom){return Tools.max(0, chrom&CHROM_MASK_HIGH);}
maxChrom(int chrom)3085 	private static final int maxChrom(int chrom){return Tools.max(MINCHROM, Tools.min(MAXCHROM, chrom|CHROM_MASK_LOW));}
3086 
3087 
getOffsetArray(int len)3088 	private final int[] getOffsetArray(int len){
3089 		if(offsetArrays[len]==null){offsetArrays[len]=new int[len];}
3090 		return offsetArrays[len];
3091 	}
getLocArray(int len)3092 	private final int[] getLocArray(int len){
3093 		if(len>=locArrays.length){return new int[len];}
3094 		if(locArrays[len]==null){locArrays[len]=new int[len];}
3095 		return locArrays[len];
3096 	}
getGreedyListArray(int len)3097 	private final int[] getGreedyListArray(int len){
3098 		if(greedyListArrays[len]==null){greedyListArrays[len]=new int[len];}
3099 		return greedyListArrays[len];
3100 	}
getGenericArray(int len)3101 	private final int[] getGenericArray(int len){
3102 		if(genericArrays[len]==null){genericArrays[len]=new int[len];}
3103 		return genericArrays[len];
3104 	}
3105 
3106 	@Override
getBaseScoreArray(int len, int strand)3107 	final byte[] getBaseScoreArray(int len, int strand){
3108 		if(len>=baseScoreArrays[0].length){return new byte[len];}
3109 		if(baseScoreArrays[strand][len]==null){baseScoreArrays[strand][len]=new byte[len];}
3110 		return baseScoreArrays[strand][len];
3111 	}
3112 	@Override
getKeyScoreArray(int len, int strand)3113 	final int[] getKeyScoreArray(int len, int strand){
3114 		if(len>=keyScoreArrays.length){return new int[len];}
3115 		if(keyScoreArrays[strand][len]==null){keyScoreArrays[strand][len]=new int[len];}
3116 		return keyScoreArrays[strand][len];
3117 	}
getKeyWeightArray(int len)3118 	private final float[] getKeyWeightArray(int len){
3119 		if(len>=keyWeightArrays.length){return new float[len];}
3120 		if(keyWeightArrays[len]==null){keyWeightArrays[len]=new float[len];}
3121 		return keyWeightArrays[len];
3122 	}
3123 	@Override
keyProbArray()3124 	float[] keyProbArray() {
3125 		return keyProbArray;
3126 	}
3127 
3128 	private static final int KEY_BUFFER_LENGTH=256; //Boosted from 128 to 256 to deal with crashes from short kmers, long reads, and vslow.
3129 
3130 	private final int[][] locArrays=new int[601][];
3131 	private final int[] valueArray=new int[KEY_BUFFER_LENGTH];
3132 	private final int[] sizeArray=new int[KEY_BUFFER_LENGTH];
3133 	private final int[][] offsetArrays=new int[KEY_BUFFER_LENGTH][];
3134 	private final int[][] greedyListArrays=new int[KEY_BUFFER_LENGTH][];
3135 	private final int[][] genericArrays=new int[KEY_BUFFER_LENGTH][];
3136 	private final int[] startArray=new int[KEY_BUFFER_LENGTH];
3137 	private final int[] stopArray=new int[KEY_BUFFER_LENGTH];
3138 	private final Quad[] tripleStorage=makeQuadStorage(KEY_BUFFER_LENGTH);
3139 	private final int[] greedyReturn=KillSwitch.allocInt1D(2);
3140 	private final int[][] shrinkReturn2=new int[3][];
3141 	private final int[][] shrinkReturn3=new int[5][];
3142 	private final int[][] prescanReturn=new int[2][];
3143 	private final int[] prescoreArray;
3144 	private final int[] precountArray;
3145 
3146 	private final byte[][][] baseScoreArrays=new byte[2][601][];
3147 	private final int[][][] keyScoreArrays=new int[2][KEY_BUFFER_LENGTH][];
3148 	final float[] keyProbArray=new float[601];
3149 	private final float[][] keyWeightArrays=new float[KEY_BUFFER_LENGTH][];
3150 
3151 
makeQuadStorage(int number)3152 	private final static Quad[] makeQuadStorage(int number){
3153 		Quad[] r=new Quad[number];
3154 		for(int i=0; i<number; i++){r[i]=new Quad(i, 0, 0);}
3155 		return r;
3156 	}
3157 
3158 
3159 	private final QuadHeap heap=new QuadHeap(KEY_BUFFER_LENGTH-1);
3160 	private final QuadHeap active=new QuadHeap(KEY_BUFFER_LENGTH-1);
3161 
3162 	static int SHIFT_LENGTH=(32-1-NUM_CHROM_BITS);
3163 	static int MAX_ALLOWED_CHROM_INDEX=~((-1)<<SHIFT_LENGTH);
3164 
3165 	/** Mask the number to get the site, which is in the lower bits */
3166 	static int SITE_MASK=((-1)>>>(NUM_CHROM_BITS+1));
3167 
3168 	/** Mask the chromosome's high bits to get the low bits */
3169 	static int CHROM_MASK_LOW=CHROMS_PER_BLOCK-1;
3170 
3171 	/** Mask the chromosome's lower bits to get the high bits */
3172 	static int CHROM_MASK_HIGH=~CHROM_MASK_LOW;
3173 
setChromBits(int x)3174 	static void setChromBits(int x){
3175 
3176 		NUM_CHROM_BITS=x;
3177 		CHROMS_PER_BLOCK=(1<<(NUM_CHROM_BITS));
3178 		SHIFT_LENGTH=(32-1-NUM_CHROM_BITS);
3179 		MAX_ALLOWED_CHROM_INDEX=~((-1)<<SHIFT_LENGTH);
3180 		SITE_MASK=((-1)>>>(NUM_CHROM_BITS+1));
3181 		CHROM_MASK_LOW=CHROMS_PER_BLOCK-1;
3182 		CHROM_MASK_HIGH=~CHROM_MASK_LOW;
3183 
3184 //		assert(NUM_CHROM_BITS<30);
3185 		assert(NUM_CHROM_BITS>=0); //max is 3 for human; perhaps more for other organisms
3186 //		assert((1<<(NUM_CHROM_BITS))>=CHROMSPERBLOCK) : (1<<(NUM_CHROM_BITS))+" < "+CHROMSPERBLOCK;
3187 		assert((1<<(NUM_CHROM_BITS))==CHROMS_PER_BLOCK) : (1<<(NUM_CHROM_BITS))+" < "+CHROMS_PER_BLOCK;
3188 		assert(Integer.bitCount(CHROMS_PER_BLOCK)==1);
3189 		assert(Integer.numberOfLeadingZeros(SITE_MASK)==(NUM_CHROM_BITS+1)) : Integer.toHexString(SITE_MASK);
3190 	}
3191 
3192 	private final int cycles;
3193 
3194 	public static final int BASE_HIT_SCORE=100;
3195 	public static final int ALIGN_COLUMNS=3000;
3196 	public static int MAX_INDEL=16000; //Max indel length, min 0, default 400; longer is more accurate
3197 	public static int MAX_INDEL2=2*MAX_INDEL;
3198 
3199 	private final float INV_BASE_KEY_HIT_SCORE;
3200 	private final int INDEL_PENALTY; //default (HIT_SCORE/2)-1
3201 	private final int INDEL_PENALTY_MULT; //default 20; penalty for indel length
3202 	private final int MAX_PENALTY_FOR_MISALIGNED_HIT;
3203 	private final int SCOREZ_1KEY;
3204 
3205 	public static final boolean ADD_SCORE_Z=true; //Increases quality, decreases speed
3206 	public static final int Z_SCORE_MULT=20;
3207 	public static final int Y_SCORE_MULT=10;
3208 
3209 
3210 	/**
3211 	 * Return only sites that match completely or with partial no-reference
3212 	 */
setSemiperfectMode()3213 	public static void setSemiperfectMode() {
3214 		assert(!PERFECTMODE);
3215 		SEMIPERFECTMODE=true;
3216 		PRESCAN_QSCORE=false;
3217 //		MIN_APPROX_HITS_TO_KEEP++;
3218 
3219 
3220 
3221 		MAX_INDEL=0;
3222 		MAX_INDEL2=0;
3223 	}
3224 
3225 	/**
3226 	 * Return only sites that match completely
3227 	 */
setPerfectMode()3228 	public static void setPerfectMode() {
3229 		assert(!SEMIPERFECTMODE);
3230 		PERFECTMODE=true;
3231 		PRESCAN_QSCORE=false;
3232 //		MIN_APPROX_HITS_TO_KEEP++;
3233 
3234 
3235 
3236 		MAX_INDEL=0;
3237 		MAX_INDEL2=0;
3238 	}
3239 
3240 	static float FRACTION_GENOME_TO_EXCLUDE=0.03f; //Default .03; lower is slower and more accurate.  For perfect reads and small genomes, lower is FASTER.
3241 
setFractionToExclude(float f)3242 	public static final void setFractionToExclude(float f){
3243 		assert(f>=0 && f<1);
3244 		FRACTION_GENOME_TO_EXCLUDE=f;
3245 		MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810
3246 		MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840
3247 		MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910
3248 		MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935
3249 		MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860
3250 	}
3251 
3252 
3253 	/** Default .75.  Range: 0 to 1 (but 0 will break everything).  Lower is faster and less accurate. */
3254 	static final float HIT_FRACTION_TO_RETAIN=0.85f; //default: .8
3255 	/** Range: 0 to 1000.  Lower should be faster and less accurate. */
3256 	static int MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810
3257 	/** Range: 2 to infinity.  Lower should be faster and less accurate. */
3258 	static final int MIN_HIT_LISTS_TO_RETAIN=6;
3259 
3260 	static int MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840
3261 	//lower is faster
3262 	static int MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910
3263 	//lower is faster
3264 	static int MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935
3265 	//lower is faster
3266 	static int MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860
3267 
3268 	/** To increase accuracy on small genomes, override greedy list dismissal when the list is at most this long. */
3269 	public static final int SMALL_GENOME_LIST=20;
3270 
3271 	static{assert(!(TRIM_BY_GREEDY && TRIM_BY_TOTAL_SITE_COUNT)) : "Pick one.";}
3272 
3273 	static final int CLUMPY_MAX_DIST=5; //Keys repeating over intervals of this or less are clumpy.
3274 
3275 	/** Minimum length of list before clumpiness is considered. This is an index in the length histogram, from 0 to 1000. */
3276 	static final int CLUMPY_MIN_LENGTH_INDEX=2000;
3277 	static final float CLUMPY_FRACTION=0.75f; //0 to 1; higher is slower but more stringent. 0.5 means the median distance is clumpy.
3278 
3279 	static final int MAX_SUBSUMPTION_LENGTH=MAX_INDEL2;
3280 
3281 	/** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION1 when slowWalk3 is first entered */
3282 	public static final int MAX_HITS_REDUCTION1=0;
3283 
3284 	/** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION2 dynamically when best score is exceeded */
3285 	public static int MAX_HITS_REDUCTION2=2; //default 1; higher is more accurate (more mapping and less FP) but slower
3286 
3287 	/** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION_PERFECT when perfect score is found */
3288 	public static final int MAX_HITS_REDUCTION_PERFECT=0;
3289 
3290 	public static int MAXIMUM_MAX_HITS_REDUCTION=3;
3291 	public static int HIT_REDUCTION_DIV=5;
3292 
calcApproxHitsCutoff(final int keys, final int hits, int currentCutoff, final boolean perfect)3293 	private static final int calcApproxHitsCutoff(final int keys, final int hits, int currentCutoff, final boolean perfect){ //***$
3294 		assert(keys>=hits) : keys+", "+hits;
3295 		assert(hits>=0);
3296 
3297 		int mahtk=MIN_APPROX_HITS_TO_KEEP;
3298 		if(SEMIPERFECTMODE || PERFECTMODE){
3299 			if(keys==1){return 1;}
3300 			else if(MIN_APPROX_HITS_TO_KEEP<keys){
3301 				mahtk++;
3302 				if(currentCutoff==MIN_APPROX_HITS_TO_KEEP){currentCutoff++;}
3303 			}
3304 		}
3305 
3306 		int reduction=Tools.min(Tools.max((hits)/HIT_REDUCTION_DIV, MAX_HITS_REDUCTION2), Tools.max(MAXIMUM_MAX_HITS_REDUCTION, keys/8));
3307 //		if(hits<3){reduction=0;}
3308 //		else if(hits<4){reduction=Tools.min(1, reduction);}
3309 //		else if(hits<5){reduction=Tools.min(2, reduction);}
3310 //		else if(hits<6){reduction=Tools.min(3, reduction);}
3311 		assert(reduction>=0);
3312 		int r=hits-reduction;
3313 
3314 		r=Tools.max(mahtk, currentCutoff, r);
3315 
3316 		if(perfect){
3317 			r=Tools.max(r, keys-MAX_HITS_REDUCTION_PERFECT);
3318 		}
3319 		return r;
3320 	}
3321 
3322 	public static final boolean USE_SLOWALK3=true && USE_EXTENDED_SCORE;
3323 	public static boolean PRESCAN_QSCORE=true && USE_EXTENDED_SCORE; //Decrease quality and increase speed
3324 	public static final boolean FILTER_BY_QSCORE=true; //Slightly lower quality, but very fast.
3325 	public static final float MIN_SCORE_MULT=(USE_AFFINE_SCORE ? 0.15f : USE_EXTENDED_SCORE ? .3f : 0.10f);  //Fraction of max score to use as cutoff.  Default 0.15, max is 1; lower is more accurate
3326 	public static final float MIN_QSCORE_MULT=0.025f;  //Fraction of max score to use as cutoff.  Default 0.15, max is 1; lower is more accurate
3327 	public static final float MIN_QSCORE_MULT2=0.1f;
3328 	static final float DYNAMIC_SCORE_THRESH=(USE_AFFINE_SCORE ? 0.84f : USE_EXTENDED_SCORE ? .84f : 0.6f); //Default .85f; lower is more accurate
3329 	static final float DYNAMIC_QSCORE_THRESH=0.6f; //default .58f
3330 	static final float DYNAMIC_QSCORE_THRESH_PERFECT=0.8f; //***$
3331 	static final float PRESCAN_QSCORE_THRESH=DYNAMIC_QSCORE_THRESH*.95f; //default 1.0f; lower is more accurate and 0 essentially sets PRESCAN_QSCORE=false
3332 	static{
3333 		assert(MIN_SCORE_MULT>=0 && MIN_SCORE_MULT<1);
3334 		assert(DYNAMIC_SCORE_THRESH>=0 && DYNAMIC_SCORE_THRESH<1);
3335 	}
3336 
3337 
3338 }
3339