1 package align2; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 6 import dna.AminoAcid; 7 import dna.Data; 8 import shared.KillSwitch; 9 import shared.Shared; 10 import shared.Tools; 11 import stream.Read; 12 import stream.SiteScore; 13 14 15 /** 16 * Based on Index11f 17 * Index stored in single array per block. 18 * Supports 32-bit unsigned index. 19 * 20 * @author Brian Bushnell 21 * @date Jan 3, 2013 22 * 23 */ 24 public final class BBIndex5 extends AbstractIndex { 25 26 main(String[] args)27 public static void main(String[] args){ 28 29 int k=13; 30 31 for(int i=0; i<args.length; i++){ 32 String s=args[i].toLowerCase(); 33 if(s.contains("=")){ 34 String[] split=s.split("="); 35 String a=split[0]; 36 String b=split[1]; 37 if(a.equals("build") || a.equals("b")){ 38 Data.setGenome(Integer.parseInt(b)); 39 }else if(a.equals("minchrom")){ 40 MINCHROM=Integer.parseInt(b); 41 }else if(a.equals("maxchrom")){ 42 MAXCHROM=Integer.parseInt(b); 43 }else if(a.equals("keylen") || a.equals("k")){ 44 k=Integer.parseInt(b); 45 } 46 } 47 } 48 49 if(MINCHROM==-1){MINCHROM=1;} 50 if(MAXCHROM==-1){ 51 assert(Data.numChroms<=Byte.MAX_VALUE) : "TODO"; 52 MAXCHROM=Data.numChroms; 53 } 54 55 56 System.err.println("Writing build "+Data.GENOME_BUILD+" "+ 57 "BASESPACE index, keylen="+k+", chrom bits="+NUM_CHROM_BITS); 58 59 60 int first=(NUM_CHROM_BITS==0 ? 1 : 0); 61 // first=24; 62 63 Data.sysout.println("Loading index for chunk "+first+"-"+MAXCHROM+", build "+Data.GENOME_BUILD); 64 index=IndexMaker5.makeIndex(Data.GENOME_BUILD, first, MAXCHROM, 65 k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, true, false, index); 66 67 68 System.err.println("Finished all chroms, may still be writing."); 69 } 70 71 BBIndex5(int k_, int minChrom_, int maxChrom_, int kfilter_, MSA msa_)72 public BBIndex5(int k_, int minChrom_, int maxChrom_, int kfilter_, MSA msa_){ 73 super(k_, kfilter_, BASE_HIT_SCORE, minChrom_, maxChrom_, msa_); 74 INV_BASE_KEY_HIT_SCORE=1f/BASE_KEY_HIT_SCORE; 75 INDEL_PENALTY=(BASE_KEY_HIT_SCORE/2)-1; //default (HIT_SCORE/2)-1 76 INDEL_PENALTY_MULT=20; //default 20; penalty for indel length 77 MAX_PENALTY_FOR_MISALIGNED_HIT=BASE_KEY_HIT_SCORE-(1+BASE_KEY_HIT_SCORE/8); 78 SCOREZ_1KEY=Z_SCORE_MULT*KEYLEN; 79 { 80 int cyc=0; 81 for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){cyc+=2;} 82 cycles=cyc; 83 } 84 prescoreArray=new int[cycles]; 85 precountArray=new int[cycles]; 86 } 87 88 /** Load or generate index from minChrom to maxChrom, inclusive, with keylength k. 89 * This range can encompass multiple blocks. 90 * Should only be called once in a process. */ loadIndex(int minChrom, int maxChrom, int k, boolean writeToDisk, boolean diskInvalid)91 public static final synchronized void loadIndex(int minChrom, int maxChrom, int k, boolean writeToDisk, boolean diskInvalid){ 92 if(minChrom<1){minChrom=1;} 93 if(maxChrom>Data.numChroms){maxChrom=Data.numChroms;} 94 assert(minChrom<=maxChrom); 95 Data.sysout.println("Loading index for chunk "+minChrom+"-"+maxChrom+", build "+Data.GENOME_BUILD); 96 index=IndexMaker5.makeIndex(Data.GENOME_BUILD, minChrom, maxChrom, 97 k, NUM_CHROM_BITS, MAX_ALLOWED_CHROM_INDEX, CHROM_MASK_LOW, CHROM_MASK_HIGH, SITE_MASK, SHIFT_LENGTH, writeToDisk, diskInvalid, index); 98 99 } 100 101 /** Calculate statistics of index, such as list lengths, and find clumpy keys */ analyzeIndex(int minChrom, int maxChrom, float fractionToExclude, int k)102 public static final synchronized void analyzeIndex(int minChrom, int maxChrom, float fractionToExclude, int k){ 103 104 assert(lengthHistogram==null); 105 assert(COUNTS==null); 106 107 int KEYSPACE=1<<(2*k); 108 COUNTS=new int[KEYSPACE]; 109 110 maxChrom=maxChrom(maxChrom); 111 112 for(int key=0; key<KEYSPACE; key++){ 113 int rkey=KeyRing.reverseComplementKey(key, k); 114 assert(key==KeyRing.reverseComplementKey(rkey, k)); 115 116 if(key<=rkey){ 117 118 long clumps=0; 119 long len=0; 120 121 for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){ 122 Block b=index[chrom]; 123 124 final int[] sites=b.sites; 125 final int start1=b.starts[key]; 126 final int stop1=start1+b.length(key); 127 final int start2=(rkey==key ? -1 : b.starts[rkey]); 128 final int stop2=(rkey==key ? -1 : start2+b.length(rkey)); 129 final int len1=stop1-start1; 130 final int len2=stop2-start2; 131 132 len=len+len1+len2; 133 // System.out.println(len+", len1="+len1+", len2="+len2+", start1="+start1+", stop1="+stop1+", start2="+start2+", stop2="+stop2); 134 135 if(REMOVE_CLUMPY){ 136 for(int i=start1+1; i<stop1; i++){ 137 int dif=sites[i]-sites[i-1]; 138 assert(dif!=0); 139 if(dif>0 && dif<=CLUMPY_MAX_DIST){ 140 clumps++; 141 } 142 } 143 144 for(int i=start2+1; i<stop2; i++){ 145 int dif=sites[i]-sites[i-1]; 146 assert(dif!=0); 147 if(dif>0 && dif<=CLUMPY_MAX_DIST){ 148 clumps++; 149 } 150 } 151 } 152 153 } 154 155 COUNTS[key]=(int)Tools.min(Integer.MAX_VALUE, COUNTS[key]+len); 156 if(key!=rkey){COUNTS[rkey]=(int)Tools.min(Integer.MAX_VALUE, COUNTS[rkey]+len);} 157 assert(COUNTS[key]==COUNTS[rkey]) : key+", "+rkey; 158 159 if(REMOVE_CLUMPY && len>CLUMPY_MIN_LENGTH_INDEX && clumps>(CLUMPY_FRACTION*len)){ 160 COUNTS[key]=0; 161 COUNTS[rkey]=0; 162 for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){ 163 Block b=index[chrom]; 164 final int[] sites=b.sites; 165 sites[b.starts[key]]=-1; 166 sites[b.starts[rkey]]=-1; 167 } 168 } 169 170 // System.err.println("COUNTS["+key+"] = "+COUNTS[key]+", COUNTS["+rkey+"] = "+COUNTS[rkey]); 171 } 172 } 173 174 lengthHistogram=Tools.makeLengthHistogram3(COUNTS, 1000, verbose2); 175 176 //if(verbose2){System.err.println("lengthHistogram: "+Arrays.toString(lengthHistogram));} 177 178 if(REMOVE_FREQUENT_GENOME_FRACTION){ 179 180 int lengthLimitIndex=(int)((1-fractionToExclude)*(lengthHistogram.length-1)); 181 int lengthLimitIndex2=(int)((1-fractionToExclude*DOUBLE_SEARCH_THRESH_MULT)*(lengthHistogram.length-1)); 182 183 MAX_USABLE_LENGTH=Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex]); 184 MAX_USABLE_LENGTH2=Tools.max(6*SMALL_GENOME_LIST, lengthHistogram[lengthLimitIndex2]); 185 186 if(verbose2){System.err.println("MAX_USABLE_LENGTH: "+MAX_USABLE_LENGTH+"\nMAX_USABLE_LENGTH2: "+MAX_USABLE_LENGTH2);} 187 } 188 189 Solver.POINTS_PER_SITE=(int)Math.floor((Solver.BASE_POINTS_PER_SITE*4000f)/Tools.max(2*SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH])); 190 if(Solver.POINTS_PER_SITE==0){Solver.POINTS_PER_SITE=-1;} 191 if(verbose2){System.err.println("POINTS_PER_SITE: "+Solver.POINTS_PER_SITE);} 192 assert(Solver.POINTS_PER_SITE<0) : Solver.POINTS_PER_SITE; 193 } 194 195 196 /** Returns the filename for the block holding this chrom */ 197 public static final String fname(int chrom, int k){ 198 return IndexMaker5.fname(minChrom(chrom), maxChrom(chrom), k, NUM_CHROM_BITS); 199 } 200 201 /** Ensure key offsets are strictly ascending. */ 202 private static boolean checkOffsets(int[] offsets){ 203 for(int i=1; i<offsets.length; i++){ 204 if(offsets[i]<=offsets[i-1]){return false;} 205 } 206 return true; 207 } 208 209 @Deprecated 210 private final int trimExcessHitLists(int[] keys, int[][] hits){ 211 212 assert(false) : "Needs to be redone because hits are no longer sorted by length."; 213 214 assert(hits.length==keys.length); 215 // assert(false) : "modify this function so that it gives more weight to trimming lists over highly covered baits"; 216 //And also, incorporate the "remove the longest list" function 217 218 final int limit=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH])*keys.length; 219 final int limit2=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH2]); 220 final int limit3=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_SHORTEST_LIST_TO_SEARCH]); 221 222 int sum=0; 223 int initialHitCount=0; 224 225 int shortest=Integer.MAX_VALUE-1; 226 int shortest2=Integer.MAX_VALUE; 227 228 for(int i=0; i<keys.length; i++){ 229 int key=keys[i]; 230 int x=count(key); 231 sum+=x; 232 initialHitCount+=(x==0 ? 0 : 1); 233 if(x>0){ 234 if(x<shortest2){ 235 shortest2=x; 236 if(shortest2<shortest){ 237 shortest2=shortest; 238 shortest=x; 239 } 240 } 241 } 242 } 243 assert(shortest2>=shortest); 244 if(initialHitCount<MIN_APPROX_HITS_TO_KEEP){return initialHitCount;} 245 if(shortest>limit3 && !SLOW){ 246 for(int i=0; i<hits.length; i++){hits[i]=null;} 247 return 0; 248 } 249 if(sum<=limit && sum/initialHitCount<=limit2){return initialHitCount;} 250 251 Pointer[] ptrs=Pointer.loadMatrix(hits); 252 // ptrs[0].value/=2; 253 // ptrs[ptrs.length-1].value/=2; 254 Arrays.sort(ptrs); 255 256 int finalHitCount=initialHitCount; 257 for(int i=ptrs.length-1; sum>limit || sum/finalHitCount>limit2; i--){ 258 Pointer p=ptrs[i]; 259 sum-=hits[p.key].length; 260 hits[p.key]=null; 261 finalHitCount--; 262 } 263 264 return finalHitCount; 265 } 266 267 /** Remove least useful keys to accelerate search */ 268 private final int trimExcessHitListsByGreedy(int[] offsets, int[] keyScores, int maxHitLists, int[] keys){ 269 270 float[] keyWeights=getKeyWeightArray(keyScores.length); 271 for(int i=0; i<keyScores.length; i++){ 272 keyWeights[i]=keyScores[i]*INV_BASE_KEY_HIT_SCORE; 273 } 274 275 // assert(false) : "modify this function so that it gives more weight to trimming lists over highly covered baits"; 276 //And also, incorporate the "remove the longest list" function 277 278 final int limit=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH])*keys.length; 279 final int limit2=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_AVERAGE_LIST_TO_SEARCH2]); 280 final int limit3=Tools.max(SMALL_GENOME_LIST, lengthHistogram[MAX_SHORTEST_LIST_TO_SEARCH]); 281 // final int limitS=lengthHistogram[chrom][MAX_SINGLE_LIST_TO_SEARCH]; 282 283 int sum=0; 284 int initialHitCount=0; 285 286 int shortest=Integer.MAX_VALUE-1; 287 int shortest2=Integer.MAX_VALUE; 288 289 // for(int i=0; i<hits.length; i++){ 290 // if(hits[i]!=null && hits[i].length>limitS){hits[i]=null;} 291 // } 292 293 final int[] lengths=getGenericArray(keys.length); 294 295 for(int i=0; i<keys.length; i++){ 296 int key=keys[i]; 297 int x=count(key); 298 lengths[i]=x; 299 sum+=x; 300 initialHitCount+=(x==0 ? 0 : 1); 301 if(x>0){ 302 if(x<shortest2){ 303 shortest2=x; 304 if(shortest2<shortest){ 305 shortest2=shortest; 306 shortest=x; 307 } 308 } 309 } 310 } 311 assert(shortest2>=shortest); 312 if(initialHitCount<MIN_APPROX_HITS_TO_KEEP){return initialHitCount;} 313 if(shortest>limit3 && !SLOW){ 314 for(int i=0; i<keys.length; i++){keys[i]=-1;} 315 return 0; 316 } 317 318 int hitsCount=initialHitCount; 319 int worstValue=Integer.MIN_VALUE; 320 321 while(hitsCount>=MIN_APPROX_HITS_TO_KEEP && (sum>limit || sum/initialHitCount>limit2 || hitsCount>maxHitLists/* || worstValue<0*/)){ 322 final int[] lists=getGreedyListArray(hitsCount); 323 for(int i=0, j=0; j<lists.length; i++){ 324 if(lengths[i]>0){ 325 lists[j]=i; 326 j++; 327 } 328 } 329 330 Solver.findWorstGreedy(offsets, lengths, keyWeights, KEYLEN, lists, greedyReturn); 331 int worstIndex=greedyReturn[0]; 332 int worst=lists[worstIndex]; 333 worstValue=greedyReturn[1]; 334 sum-=lengths[worst]; 335 336 // if(worstValue>0 && (hitsCount<=maxHitLists || lengths[worst]<excessListLimit)){return hitsCount;} 337 if(worstValue>0 || lengths[worst]<SMALL_GENOME_LIST){return hitsCount;} //This line increases accuracy at expense of speed. Lower constant = more accurate, default 0. 338 hitsCount--; 339 lengths[worst]=0; 340 keys[worst]=-1; 341 } 342 return hitsCount; 343 } 344 345 346 private final int getHits(final int[] keys, final int chrom, final int maxLen, final int[] starts, final int[] stops){ 347 int numHits=0; 348 final Block b=index[chrom]; 349 for(int i=0; i<keys.length; i++){ 350 final int key=keys[i]; 351 starts[i]=-1; 352 stops[i]=-1; 353 if(key>=0){ 354 final int len=count(key); 355 if(len>0 && len<maxLen){ 356 final int len2=b.length(key); 357 if(len2>0){ 358 starts[i]=b.starts[key]; 359 stops[i]=starts[i]+len2; 360 numHits++; 361 } 362 } 363 } 364 } 365 return numHits; 366 } 367 368 369 private final int countHits(final int[] keys, final int maxLen, boolean clearBadKeys){ 370 int numHits=0; 371 for(int i=0; i<keys.length; i++){ 372 final int key=keys[i]; 373 if(key>=0){ 374 final int len=count(key); 375 if(len>0 && len<maxLen){ 376 numHits++; 377 }else if(clearBadKeys){ 378 keys[i]=-1; 379 } 380 } 381 } 382 return numHits; 383 } 384 385 386 @Override 387 public final ArrayList<SiteScore> findAdvanced(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsets, long id){ 388 assert(minChrom<=maxChrom && minChrom>=0); 389 ArrayList<SiteScore> result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, true, id); 390 if(DOUBLE_SEARCH_NO_HIT && (result==null || result.isEmpty())){result=find(basesP, basesM, qual, baseScoresP, keyScoresP, offsets, false, id);} 391 392 return result; 393 } 394 395 396 public final ArrayList<SiteScore> find(byte[] basesP, byte[] basesM, byte[] qual, byte[] baseScoresP, int[] keyScoresP, int[] offsetsP, boolean obeyLimits, long id){ 397 398 assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP); 399 final int[] keysOriginal=KeyRing.makeKeys(basesP, offsetsP, KEYLEN); 400 int[] keysP=Arrays.copyOf(keysOriginal, keysOriginal.length); 401 402 initialKeys+=offsetsP.length; 403 initialKeyIterations++; 404 405 final int maxLen=(obeyLimits ? MAX_USABLE_LENGTH : MAX_USABLE_LENGTH2); 406 407 int numHits=0; 408 numHits=countHits(keysP, maxLen, true); 409 if(numHits>0){ //TODO: Change these to higher numbers 410 int trigger=(3*keysP.length)/4; 411 if(numHits<4 && numHits<trigger){ 412 for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];} 413 numHits=countHits(keysP, (maxLen*3)/2, true); 414 } 415 if(numHits<3 && numHits<trigger){ 416 for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];} 417 numHits=countHits(keysP, maxLen*2, true); 418 } 419 if(numHits<3 && numHits<trigger){ 420 for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];} 421 numHits=countHits(keysP, maxLen*3, true); 422 } 423 if(numHits<2 && numHits<trigger){ 424 for(int i=0; i<keysP.length; i++){keysP[i]=keysOriginal[i];} 425 numHits=countHits(keysP, maxLen*5, true); 426 } 427 } 428 // assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP); 429 if(numHits<keysP.length){ 430 int[][] r=shrink2(offsetsP, keysP, keyScoresP); 431 assert(r!=null); 432 if(r!=null){ 433 offsetsP=r[0]; 434 keysP=r[1]; 435 keyScoresP=r[2]; 436 } 437 }else{ 438 assert(shrink2(offsetsP, keysP, keyScoresP)==null); 439 } 440 initialKeys2+=numHits; 441 //assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP); 442 443 // assert(checkOffsets(offsets)) : Arrays.toString(offsets); 444 if(TRIM_BY_GREEDY && obeyLimits){ 445 int maxLists=Tools.max((int)(HIT_FRACTION_TO_RETAIN*keysP.length), MIN_HIT_LISTS_TO_RETAIN); 446 numHits=trimExcessHitListsByGreedy(offsetsP, keyScoresP, maxLists, keysP); 447 } 448 // System.out.println("After greedy: numHits = "+numHits); 449 450 if(TRIM_BY_TOTAL_SITE_COUNT && obeyLimits){ 451 throw new RuntimeException("Needs to be redone."); 452 // numHits=trimExcessHitLists(keys, hits); 453 } 454 455 if(TRIM_LONG_HIT_LISTS && obeyLimits && numHits>MIN_APPROX_HITS_TO_KEEP){ 456 int cutoffIndex=((int) (HIT_FRACTION_TO_RETAIN*(keysP.length)-0.01f))+(keysP.length-numHits); 457 458 int zeroes=keysP.length-numHits; 459 int altMinIndex=(zeroes+(MIN_HIT_LISTS_TO_RETAIN-1)); 460 cutoffIndex=Tools.max(cutoffIndex, altMinIndex); 461 462 assert(cutoffIndex>0) : cutoffIndex+"\n"+numHits; 463 464 if(cutoffIndex<(keysP.length-1)){ 465 int[] lens=getGenericArray(keysP.length); 466 for(int i=0; i<keysP.length; i++){lens[i]=count(keysP[i]);} 467 Arrays.sort(lens); 468 int cutoff=lens[cutoffIndex]; 469 470 cutoff=Tools.max(lengthHistogram[MIN_INDEX_TO_DROP_LONG_HIT_LIST], cutoff); 471 472 int removed=0; 473 474 for(int i=0; i<keysP.length; i++){ 475 int key=keysP[i]; 476 if(count(key)>cutoff){ 477 keysP[i]=-1; 478 removed++; 479 numHits--; 480 } 481 } 482 } 483 } 484 // assert(checkOffsets(offsets)) : Arrays.toString(offsets); 485 486 final ArrayList<SiteScore> result=new ArrayList<SiteScore>(8); 487 if(numHits<MIN_APPROX_HITS_TO_KEEP){return result;} 488 //assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP); 489 if(numHits<keysP.length){ 490 int[][] r=shrink2(offsetsP, keysP, keyScoresP); 491 assert(r!=null); 492 if(r!=null){ 493 offsetsP=r[0]; 494 keysP=r[1]; 495 keyScoresP=r[2]; 496 } 497 }else{ 498 assert(shrink2(offsetsP, keysP, keyScoresP)==null); 499 } 500 assert(keysP.length==numHits); 501 //assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP); 502 //Reverse the offsets for minus-strand mapping, since they are generated based on quality 503 int[] offsetsM=KeyRing.reverseOffsets(offsetsP, KEYLEN, basesP.length); 504 final int[] keysM=KeyRing.reverseComplementKeys(keysP, KEYLEN); 505 506 // assert(checkOffsets(offsetsP)) : Arrays.toString(offsetsP); 507 // assert(checkOffsets(offsetsM)) : Arrays.toString(offsetsM); 508 509 assert(!USE_EXTENDED_SCORE || (baseScoresP!=null && (qual==null || baseScoresP.length==qual.length))); 510 assert(keyScoresP!=null); 511 assert(keyScoresP.length==offsetsP.length) : keyScoresP.length+", "+offsetsP.length+", "+Arrays.toString(keyScoresP); 512 final byte[] baseScoresM=Tools.reverseAndCopy(baseScoresP, getBaseScoreArray(baseScoresP.length, 1)); 513 final int[] keyScoresM=Tools.reverseAndCopy(keyScoresP, getKeyScoreArray(keyScoresP.length, 1)); 514 final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP); 515 516 assert(offsetsM.length==offsetsP.length); 517 assert(maxQuickScore==maxQuickScore(offsetsM, keyScoresM)); 518 519 final int[] bestScores=KillSwitch.allocInt1D(6); 520 521 //This prevents filtering by qscore when a low-quality read only uses a few keys. 522 //In that case, extending is more important. 523 final boolean prescan_qscore=(PRESCAN_QSCORE && numHits>=5); 524 525 int[][] prescanResults=null; 526 int[] precounts=null; 527 int[] prescores=null; 528 529 int hitsCutoff=0; 530 int qscoreCutoff=(int)(MIN_QSCORE_MULT*maxQuickScore); 531 532 boolean allBasesCovered=true; 533 { 534 if(offsetsP[0]!=0){allBasesCovered=false;} 535 else if(offsetsP[offsetsP.length-1]!=(basesP.length-KEYLEN)){allBasesCovered=false;} 536 else{ 537 for(int i=1; i<offsetsP.length; i++){ 538 if(offsetsP[i]>offsetsP[i-1]+KEYLEN){ 539 allBasesCovered=false; 540 break; 541 } 542 } 543 } 544 } 545 546 //TODO I don't understand this logic 547 final boolean pretendAllBasesAreCovered=(allBasesCovered || 548 keysP.length>=keysOriginal.length-4 || 549 (keysP.length>=9 && (offsetsP[offsetsP.length-1]-offsetsP[0]+KEYLEN)>Tools.max(40, (int)(basesP.length*.75f)))); 550 551 // System.err.println(allBasesCovered+"\t"+Arrays.toString(offsetsP)); 552 // assert(allBasesCovered); 553 554 if(prescan_qscore){ 555 prescanResults=prescanAllBlocks(bestScores, 556 keysP, keyScoresP, offsetsP, 557 keysM, keyScoresM, offsetsM, 558 pretendAllBasesAreCovered); 559 560 if(prescanResults!=null){ 561 precounts=prescanResults[0]; 562 prescores=prescanResults[1]; 563 } 564 565 if(bestScores[1]<MIN_APPROX_HITS_TO_KEEP){return result;} 566 if(bestScores[3]<maxQuickScore*MIN_QSCORE_MULT2){return result;} //if(bestScores[3]<maxQuickScore(offsetsP, keyScoresP)*.10f){return result;} 567 568 if(bestScores[3]>=maxQuickScore && pretendAllBasesAreCovered){ 569 assert(bestScores[3]==maxQuickScore); 570 assert(bestScores[1]==numHits); 571 572 hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, true); 573 qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*DYNAMIC_QSCORE_THRESH_PERFECT)); 574 }else{ 575 hitsCutoff=calcApproxHitsCutoff(keysP.length, bestScores[1], MIN_APPROX_HITS_TO_KEEP, false); 576 qscoreCutoff=Tools.max(qscoreCutoff, (int)(bestScores[3]*PRESCAN_QSCORE_THRESH)); 577 } 578 } 579 580 final int maxScore=maxScore(offsetsP, baseScoresP, keyScoresP, basesP.length, true); 581 final boolean fullyDefined=AminoAcid.isFullyDefined(basesP); 582 assert(bestScores[2]<=0) : Arrays.toString(bestScores); 583 584 int cycle=0; 585 for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){ 586 if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){ 587 find(keysP, basesP, baseScoresP, keyScoresP, chrom, Shared.PLUS, 588 offsetsP, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined); 589 } 590 if(QUIT_AFTER_TWO_PERFECTS){ 591 if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human 592 } 593 cycle++; 594 if(precounts==null || precounts[cycle]>=hitsCutoff || prescores[cycle]>=qscoreCutoff){ 595 find(keysM, basesM, baseScoresM, keyScoresM, chrom, Shared.MINUS, 596 offsetsM, obeyLimits, result, bestScores, allBasesCovered, maxScore, fullyDefined); 597 } 598 if(QUIT_AFTER_TWO_PERFECTS){ 599 if(bestScores[5]>=2){break;} //if(bestScores[5]>=3 || (bestScores[5]>=2 && chrom<24)){break;} //for human 600 } 601 cycle++; 602 } 603 604 assert(Read.CHECKSITES(result, basesP, basesM, id, false)); //TODO: Comment out once checked 605 606 return result; 607 } 608 609 /** Search blocks rapidly to find max hits, and perfect sites. May indicate some blocks can be skipped. */ 610 private final int[][] prescanAllBlocks(int[] bestScores, 611 int[] keysP, int[] keyScoresP, int[] offsetsP, 612 int[] keysM, int[] keyScoresM, int[] offsetsM, 613 final boolean allBasesCovered){ 614 615 int[][][] pm=new int[][][] {{keysP, keyScoresP, offsetsP}, {keysM, keyScoresM, offsetsM}}; 616 617 int bestqscore=0; 618 int maxHits=0; 619 int minHitsToScore=MIN_APPROX_HITS_TO_KEEP; 620 621 final int maxQuickScore=maxQuickScore(offsetsP, keyScoresP); 622 623 final int[] counts=precountArray; 624 final int[] scores=prescoreArray; 625 final int[][] ret=prescanReturn; 626 Arrays.fill(counts, keysP.length); 627 Arrays.fill(scores, maxQuickScore); 628 ret[0]=counts; 629 ret[1]=scores; 630 631 int cycle=0; 632 for(int chrom=minChrom; chrom<=maxChrom; chrom=((chrom&CHROM_MASK_HIGH)+CHROMS_PER_BLOCK)){ 633 final int baseChrom=baseChrom(chrom); 634 for(int pmi=0; pmi<2; pmi++, cycle++){ 635 636 int[] keys=pm[pmi][0]; 637 int[] keyScores=pm[pmi][1]; 638 int[] offsets=pm[pmi][2]; 639 // int[][] hits=getHitArray(offsets.length); 640 641 int[] starts=startArray; 642 int[] stops=stopArray; 643 final int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops); 644 645 if(numHits<minHitsToScore){ 646 scores[cycle]=-9999; 647 counts[cycle]=0; 648 }else{ 649 650 // final int maxQuickScore=maxQuickScore(offsets, keyScores); 651 // System.err.println("maxScore = "+maxScore); 652 653 if(numHits<keys.length){ 654 int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length); 655 if(r!=null){ 656 starts=r[0]; 657 stops=r[1]; 658 offsets=r[2]; 659 keyScores=r[4]; 660 // assert(numHits==offsets.length) : "\n"+Arrays.toString(starts)+"\n\n"+Arrays.toString(starts2)+"\n\n"+Arrays.toString(stops2) 661 // +"\n\n"+numHits+", "+offsets.length; 662 } 663 } 664 665 assert(numHits==offsets.length) : numHits+", "+offsets.length+", "+starts.length+", "+keys.length; 666 assert(numHits==keyScores.length); 667 heap.clear(); 668 final Quad64[] triples=tripleStorage; 669 final int[] values=valueArray; 670 671 int[] temp=findMaxQscore2(starts, stops, offsets, keyScores, baseChrom, triples, values, minHitsToScore, true, 672 bestqscore>=maxQuickScore && allBasesCovered); 673 674 scores[cycle]=temp[0]; 675 counts[cycle]=temp[1]; 676 677 bestqscore=Tools.max(temp[0], bestqscore); 678 maxHits=Tools.max(maxHits, temp[1]); 679 if(bestqscore>=maxQuickScore && allBasesCovered){ 680 assert(bestqscore==maxQuickScore); 681 assert(maxHits==keysP.length) : 682 "\nTemp: \t"+Arrays.toString(temp)+", cycle="+cycle+"\n" + 683 "Scores: \t"+Arrays.toString(scores)+ 684 "Counts: \t"+Arrays.toString(counts)+ 685 "bestqscore: \t"+bestqscore+ 686 "maxHits: \t"+maxHits+ 687 "maxQuickScore: \t"+maxQuickScore+ 688 "numHits: \t"+numHits+ 689 "minHitsToScore: \t"+minHitsToScore+ 690 "keys.length: \t"+keys.length; 691 692 minHitsToScore=Tools.max(minHitsToScore, maxHits); 693 694 { 695 //This early exit is optional. Does not seem to impact speed much either way. 696 bestScores[1]=Tools.max(bestScores[1], maxHits); 697 bestScores[3]=Tools.max(bestScores[3], bestqscore); 698 return ret; 699 } 700 } 701 } 702 } 703 } 704 705 bestScores[1]=Tools.max(bestScores[1], maxHits); 706 bestScores[3]=Tools.max(bestScores[3], bestqscore); 707 708 if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;} 709 710 return ret; 711 } 712 713 714 /** Search a single block and strand */ 715 public final ArrayList<SiteScore> find(int[] keys, final byte[] bases, final byte[] baseScores, int[] keyScores, 716 final int chrom, final byte strand, 717 int[] offsets, final boolean obeyLimits, ArrayList<SiteScore> ssl, int[] bestScores, 718 final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){ 719 720 assert(checkOffsets(offsets)) : Arrays.toString(offsets); 721 722 int[] starts=startArray; 723 int[] stops=stopArray; 724 725 int numHits=getHits(keys, chrom, Integer.MAX_VALUE, starts, stops); 726 if(numHits<MIN_APPROX_HITS_TO_KEEP){return ssl;} 727 728 if(USE_SLOWALK3){ 729 if(!RETAIN_BEST_SCORES){Arrays.fill(bestScores, 0);} 730 ssl=slowWalk3(starts, stops, bases, baseScores, keyScores, offsets, chrom, strand, obeyLimits, ssl, bestScores, allBasesCovered, maxScore, fullyDefined); 731 }else{ 732 ssl=slowWalk2(starts, stops, bases, baseScores, keyScores, offsets, chrom, strand, obeyLimits, ssl, fullyDefined); 733 } 734 735 return ssl; 736 737 } 738 739 /** Compress arrays by removing null/empty lists */ 740 private final int[][] shrink(int[] starts, int[] stops, int[] offsets, int[] keyScores, final int len){ 741 742 743 int numHits=0; 744 for(int i=0; i<len; i++){ 745 if(starts[i]>=0){numHits++;} 746 } 747 748 if(numHits==offsets.length){ 749 return null; 750 } 751 int[][] r=shrinkReturn3; 752 int[] starts2=startArray; 753 int[] stops2=stopArray; 754 int[] offsets2=getOffsetArray(numHits); 755 int[] keyScores2=new int[numHits]; 756 757 for(int i=0, j=0; i<len; i++){ 758 if(starts[i]>=0){ 759 starts2[j]=starts[i]; 760 stops2[j]=stops[i]; 761 offsets2[j]=offsets[i]; 762 keyScores2[j]=keyScores[i]; 763 j++; 764 } 765 } 766 r[0]=starts2; 767 r[1]=stops2; 768 r[2]=offsets2; 769 r[4]=keyScores2; 770 return r; 771 } 772 773 /** Removes "-1" keys. */ 774 private final int[][] shrink2(int[] offsets, int[] keys, int[] keyScores){ 775 776 777 int numHits=0; 778 for(int i=0; i<keys.length; i++){ 779 if(keys[i]>=0){numHits++;} 780 } 781 782 783 assert(checkOffsets(offsets)) : Arrays.toString(offsets); 784 if(numHits==keys.length){ 785 return null; 786 } 787 int[][] r=shrinkReturn2; 788 int[] offsets2=getOffsetArray(numHits); 789 assert(offsets2!=offsets); 790 assert(offsets2.length<offsets.length); 791 int[] keys2=new int[numHits]; 792 int[] keyScores2=new int[numHits]; 793 794 for(int i=0, j=0; i<keys.length; i++){ 795 if(keys[i]>=0){ 796 offsets2[j]=offsets[i]; 797 keys2[j]=keys[i]; 798 keyScores2[j]=keyScores[i]; 799 j++; 800 } 801 } 802 assert(checkOffsets(offsets2)) : "\nnumHits="+numHits+"\n"+Arrays.toString(offsets)+" -> \n"+Arrays.toString(offsets2)+"\n"+ 803 "\n"+Arrays.toString(keys)+" -> \n"+Arrays.toString(keys2)+"\n"; 804 r[0]=offsets2; 805 r[1]=keys2; 806 r[2]=keyScores2; 807 return r; 808 } 809 810 811 /** This uses a heap to track next column to increment */ 812 private final ArrayList<SiteScore> slowWalk2(int[] starts, int[] stops, final byte[] bases, 813 final byte[] baseScores, int[] keyScores, int[] offsets, 814 final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList<SiteScore> ssl, final boolean fullyDefined){ 815 816 if(SHRINK_BEFORE_WALK){ 817 int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length); 818 if(r!=null){ 819 starts=r[0]; 820 stops=r[1]; 821 offsets=r[2]; 822 keyScores=r[4]; 823 } 824 } 825 826 final int numHits=offsets.length; //After shrink 827 828 829 assert(numHits==offsets.length); 830 assert(numHits==keyScores.length); 831 832 //// System.out.println("After SHRINK_BEFORE_WALK: numHits = "+hits.length); 833 // Block b=index[baseChrom_]; 834 // int[][] hits=b.getHitLists(starts, stops); 835 // if(SHRINK_BEFORE_WALK){ 836 // Object[] r=shrink(hits, offsets, keyScores); 837 // if(r!=null){ 838 // hits=(int[][])r[0]; 839 // offsets=(int[])r[1]; 840 // keyScores=(int[])r[3]; 841 // } 842 // } 843 // 844 // final int numHits=hits.length; //After shrink 845 846 847 assert(numHits==offsets.length); 848 assert(numHits==keyScores.length); 849 850 assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z)); 851 852 //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc. 853 final int maxScore=maxScore(offsets, baseScores, keyScores, bases.length, true); 854 // final int maxQuickScore=(!USE_EXTENDED_SCORE ? maxScore : maxQuickScore(offsets)); 855 // System.err.println("maxScore = "+maxScore); 856 857 // final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore)); 858 final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore)); 859 // final int minQuickScore=(!USE_EXTENDED_SCORE ? minScore : (int)(maxQuickScore*0.15f)); 860 // final int minScore=(int)(MIN_SCORE_MULT*maxScore); 861 // System.err.println("minScore = "+minScore); 862 863 final int baseChrom=baseChrom(baseChrom_); 864 865 866 // final PriorityQueue<Quad64> heap=new PriorityQueue<Quad64>(numHits); 867 heap.clear(); 868 // final Quad64[] triples=new Quad64[numHits]; 869 final Quad64[] triples=tripleStorage; 870 871 final Block b=index[baseChrom]; 872 final int[] values=valueArray; 873 final int[] sizes=sizeArray; 874 final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null); 875 876 for(int i=0; i<numHits; i++){ 877 final int[] sites=b.sites; 878 final int start=starts[i]; 879 sizes[i]=b.length(start, stops[i]); 880 assert(sizes[i]>0); 881 882 int a=sites[start]; 883 int a2; 884 if((a&SITE_MASK)>=offsets[i]){ 885 a2=a-offsets[i]; 886 }else{ 887 int ch=numberToChrom(a, baseChrom); 888 int st=numberToSite(a); 889 int st2=Tools.max(st-offsets[i], 0); 890 a2=toNumber(st2, ch); 891 } 892 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)); 893 894 Quad64 t=triples[i]; 895 assert(t!=null) : "Should be using tripleStorage"; 896 assert(i==t.column); 897 t.row=start; 898 t.site=((long)a2)&0xFFFFFFFFL; 899 t.list=sites; 900 values[i]=a2; 901 902 heap.add(t); 903 } 904 905 if(ssl==null){ssl=new ArrayList<SiteScore>(8);} 906 907 int currentTopScore=-999999999; 908 909 int cutoff=minScore; 910 911 int maxHits=0; 912 int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP; 913 914 // System.out.println("\nEntering SS loop:"); 915 // System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" + 916 // "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff); 917 // System.out.println(); 918 919 SiteScore prevSS=null; 920 while(!heap.isEmpty()){ 921 Quad64 t=heap.peek(); 922 final int site=(int)t.site; //*** TODO 923 final int centerIndex=t.column; 924 925 int maxNearbySite=site; 926 927 928 int approxHits=0; 929 930 {//Inner loop 931 final int minsite=subUnsigned(site, MAX_INDEL); 932 final int maxsite=addUnsigned(site, MAX_INDEL2); 933 for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){ 934 final int x=values[column]; 935 assert(x==(int)triples[column].site); 936 if(x>=minsite && x<=maxsite){ 937 maxNearbySite=(x>maxNearbySite ? x : maxNearbySite); 938 approxHits++; 939 }else{chances--;} 940 } 941 } 942 943 assert(centerIndex>=0) : centerIndex; 944 assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column; 945 if(approxHits>=approxHitsCutoff){ 946 947 int score; 948 949 int mapStart=site, mapStop=maxNearbySite; 950 951 if(USE_EXTENDED_SCORE){ 952 final int chrom=numberToChrom(site, baseChrom); 953 score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits); 954 if(true/*USE_AFFINE_SCORE*/){ 955 //Correct begin and end positions if they changed. 956 int min=Integer.MAX_VALUE; 957 int max=Integer.MIN_VALUE; 958 for(int i=0; i<locArray.length; i++){ 959 int x=locArray[i]; 960 if(x>-1){ 961 if(x<min){min=x;} 962 if(x>max){max=x;} 963 } 964 } 965 966 // assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger? 967 if(min<0 || max<0){ 968 //Note: This error can trigger if minChrom and maxChrom do not align to block boundaries 969 if(!Shared.anomaly){ 970 Shared.anomaly=true; 971 System.err.println("Anomaly in "+getClass().getName()+".slowWalk: "+ 972 chrom+", "+mapStart+", "+mapStop+", "+centerIndex+", "+ 973 Arrays.toString(locArray)+"\n"+ 974 Arrays.toString(values)+"\n"+ 975 new String(bases)+"\nstrand="+strand+"\n"); 976 System.err.println(); 977 } 978 score=-99999; 979 } 980 981 982 mapStart=toNumber(min, chrom); 983 mapStop=toNumber(max, chrom); 984 985 986 987 // System.err.println("site="+site+", maxNearbySite="+maxNearbySite+", min="+min+", max="+max+ 988 // ", mapStart="+mapStart+", mapStop="+mapStop); 989 990 // if(chrom==17 && absdif(min, 30354420)<2000){ 991 // System.err.println("\n*****\n"); 992 // System.err.println("site="+site+" ("+numberToSite(site)+"), maxNearbySite="+maxNearbySite+ 993 // " ("+numberToSite(maxNearbySite)+"), min="+min+", max="+max+ 994 // ", mapStart="+mapStart+", mapStop="+mapStop); 995 // System.err.println(); 996 // System.err.println(Arrays.toString(locArray)); 997 // System.err.println(); 998 // System.err.println("chrom="+chrom); 999 // System.err.println("score="+score); 1000 // } 1001 } 1002 }else{ 1003 score=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits); 1004 if(ADD_SCORE_Z){ 1005 int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits); 1006 score+=scoreZ; 1007 } 1008 } 1009 1010 1011 // score=score(values, centerIndex, offsets, hits); 1012 // if(ADD_SCORE_Z){ 1013 // int scoreZ=scoreZ2(values, centerIndex, offsets); 1014 // score+=scoreZ; 1015 // } 1016 // 1017 // if(USE_EXTENDED_SCORE){ 1018 // if(score>minQuickScore){ 1019 //// System.out.println(score+" > "+minQuickScore); 1020 // score=extendScore(read, offsets, values, numberToChrom(site, baseChrom), centerIndex, locArray); 1021 // }else{ 1022 //// System.out.print("."); 1023 // score=-1; 1024 // } 1025 // } 1026 1027 1028 // System.err.println("maxScore = "+maxScore); 1029 // System.err.println("hits = "+approxHits+" / "+approxHitsCutoff); 1030 // System.err.println("score = "+score+" / "+cutoff); 1031 1032 if(score>=cutoff){ 1033 1034 // System.err.println("Passed!"); 1035 1036 // System.out.println("approxHits="+approxHits+" / "+approxHitsCutoff); 1037 // System.out.println("score="+score+" / "+cutoff); 1038 // System.out.println("strand="+Gene.strandCodes[strand]); 1039 // System.out.println("center="+values[centerIndex]); 1040 // System.out.println("values="+Arrays.toString(values)); 1041 // extendScore(read, offsets, values, numberToChrom(site, baseChrom), centerIndex); 1042 // System.out.println(); 1043 1044 if(score>currentTopScore){ 1045 // System.err.println("New top score!"); 1046 1047 if(DYNAMICALLY_TRIM_LOW_SCORES){ 1048 1049 maxHits=Tools.max(approxHits, maxHits); 1050 // approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits); 1051 approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits-1); //More sensitive, but slower 1052 1053 cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH)); 1054 1055 // cutoff=Tools.max(cutoff, minScore+(int)((score-minScore)*DYNAMIC_SCORE_THRESH)); 1056 if(USE_EXTENDED_SCORE && score>=maxScore){ 1057 cutoff=Tools.max(cutoff, (int)(score*0.95f)); 1058 } 1059 } 1060 1061 currentTopScore=score; 1062 1063 // System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")"); 1064 } 1065 1066 // final int chrom=numberToChrom(site, baseChrom); 1067 // final int site2=numberToSite(site); 1068 // final int site3=numberToSite(maxNearbySite)+read.length; 1069 1070 final int chrom=numberToChrom(mapStart, baseChrom); 1071 final int site2=numberToSite(mapStart); 1072 final int site3=numberToSite(mapStop)+bases.length-1; 1073 1074 assert(NUM_CHROM_BITS==0 || site2<SITE_MASK-1000) : "chrom="+chrom+", strand="+strand+", site="+site+ 1075 ", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length+ 1076 "\n\n"+Arrays.toString(b.getHitList(centerIndex)); 1077 assert(site2<site3) : "chrom="+chrom+", strand="+strand+", site="+site+ 1078 ", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length; 1079 1080 1081 //Note: I could also do this as soon as score is calculated. 1082 // if(ADD_SCORE_Z){ 1083 // int scoreZ=scoreZ2(values, centerIndex, offsets); 1084 // score+=scoreZ; 1085 // } 1086 1087 //This block is optional, but tries to eliminate multiple identical alignments 1088 // SiteScore prevSS=(ssl.size()<1 ? null : ssl.get(ssl.size()-1)); 1089 1090 SiteScore ss=null; 1091 final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined; 1092 1093 int[] gapArray=null; 1094 if(site3-site2>=MINGAP+bases.length){ 1095 gapArray=makeGapArray(locArray, mapStart, MINGAP); 1096 if(gapArray!=null){ 1097 int sub=site2-mapStart;//thus site2=mapStart+sub 1098 for(int i=0; i<gapArray.length; i++){ 1099 gapArray[i]+=sub; 1100 } 1101 assert(gapArray[0]==mapStart); 1102 assert(gapArray[gapArray.length-1]==mapStop); 1103 } 1104 assert(false) : Arrays.toString(locArray); 1105 } 1106 1107 if(gapArray==null && prevSS!=null && prevSS.gaps==null && 1108 prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){ 1109 1110 int betterScore=Tools.max(score, prevSS.score); 1111 int minStart=Tools.min(prevSS.start, site2); 1112 int maxStop=Tools.max(prevSS.stop, site3); 1113 final boolean perfect2=USE_EXTENDED_SCORE && prevSS.score==maxScore && fullyDefined; 1114 assert(!USE_EXTENDED_SCORE || perfect2==prevSS.perfect); 1115 1116 boolean shortEnough=(!LIMIT_SUBSUMPTION_LENGTH_TO_2X || (maxStop-minStart<2*bases.length)); 1117 1118 if(prevSS.start==site2 && prevSS.stop==site3){ 1119 prevSS.score=prevSS.quickScore=betterScore; 1120 }else if(SUBSUME_SAME_START_SITES && shortEnough && prevSS.start==site2){ 1121 if(perfect2){ 1122 //do nothing 1123 }else if(perfect1){ 1124 prevSS.setStop(site3); 1125 prevSS.setPerfect(); 1126 }else{ 1127 prevSS.setStop(maxStop); 1128 } 1129 prevSS.score=prevSS.quickScore=betterScore; 1130 }else if(SUBSUME_SAME_STOP_SITES && shortEnough && prevSS.stop==site3){ 1131 if(perfect2){ 1132 //do nothing 1133 }else if(perfect1){ 1134 prevSS.setStart(site2); 1135 prevSS.setPerfect(); 1136 }else{ 1137 prevSS.setStart(minStart); 1138 } 1139 prevSS.score=prevSS.quickScore=betterScore; 1140 }else if(SUBSUME_OVERLAPPING_SITES && shortEnough && (maxStop-minStart<=bases.length+MAX_SUBSUMPTION_LENGTH) 1141 && !perfect1 && !perfect2){ 1142 prevSS.setLimits(minStart, maxStop); 1143 prevSS.score=prevSS.quickScore=betterScore; 1144 }else{ 1145 ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1); 1146 if(!perfect1){ss.setPerfect(bases);} 1147 assert(!perfect1 || ss.stop-ss.start==bases.length-1); 1148 } 1149 assert(!perfect2 || prevSS.stop-prevSS.start==bases.length-1); 1150 }else{ 1151 ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1); 1152 if(!perfect1){ss.setPerfect(bases);} 1153 ss.gaps=gapArray; 1154 if(gapArray!=null){ 1155 System.err.println(ss.toText()+"\t"+Arrays.toString(gapArray)+"\n"+Arrays.toString(locArray)+"\n"); 1156 } 1157 } 1158 1159 if(ss!=null){ 1160 // System.out.println("Added site "+ss.toText()); 1161 ssl.add(ss); 1162 prevSS=ss; 1163 }else{ 1164 // System.out.println("Subsumed site "+new SiteScore(chrom, strand, site2, site3, score).toText()); 1165 } 1166 1167 // if(prevSS!=null && prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){ 1168 // int betterScore=Tools.max(score, prevSS.score); 1169 // if(prevSS.start==site2 && prevSS.stop==site3){ 1170 // prevSS.score=prevSS.quickScore=betterScore; 1171 // }else if(prevSS.start==site2 1172 // /*isWithin(prevSS.start, prevSS.stop, site2, site3) || 1173 // isWithin(site2, site3, prevSS.start, prevSS.stop)*/){ 1174 // prevSS.score=prevSS.quickScore=betterScore; 1175 // assert(prevSS.start<prevSS.stop); 1176 //// prevSS.start=Tools.min(prevSS.start, site2); 1177 // prevSS.stop=Tools.max(prevSS.stop, site3); 1178 // assert(prevSS.start<prevSS.stop); 1179 // }else{ 1180 // SiteScore ss=new SiteScore(chrom, strand, site2, site3, score); 1181 // ssl.add(ss); 1182 // prevSS=ss; 1183 // } 1184 // }else{ 1185 // SiteScore ss=new SiteScore(chrom, strand, site2, site3, score); 1186 // ssl.add(ss); 1187 // prevSS=ss; 1188 // } 1189 1190 } 1191 } 1192 1193 while(site==(int)heap.peek().site){ //Remove all identical elements, and add subsequent elements 1194 final Quad64 t2=heap.poll(); 1195 final int row=t2.row+1, col=t2.column; 1196 if(row<stops[col]){ 1197 t2.row=row; 1198 1199 int a=t2.list[row]; 1200 int a2; 1201 if((a&SITE_MASK)>=offsets[col]){ 1202 a2=a-offsets[col]; 1203 1204 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1205 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+ 1206 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1207 }else{ 1208 int ch=numberToChrom(a, baseChrom); 1209 int st=numberToSite(a); 1210 int st2=Tools.max(st-offsets[col], 0); 1211 a2=toNumber(st2, ch); 1212 1213 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1214 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+ 1215 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1216 } 1217 1218 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1219 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+ 1220 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1221 1222 t2.site=((long)a2)&0xFFFFFFFFL; 1223 values[col]=a2; 1224 heap.add(t2); 1225 } 1226 if(heap.isEmpty()){break;} 1227 } 1228 1229 } 1230 1231 return ssl; 1232 } 1233 1234 /** This uses a heap to track next column to increment */ 1235 private final ArrayList<SiteScore> slowWalk3(int[] starts, int[] stops, final byte[] bases, 1236 final byte[] baseScores, int[] keyScores, int[] offsets, 1237 final int baseChrom_, final byte strand, final boolean obeyLimits, ArrayList<SiteScore> ssl, 1238 int[] bestScores, final boolean allBasesCovered, final int maxScore, final boolean fullyDefined){ 1239 assert(USE_EXTENDED_SCORE); 1240 1241 final int numKeys=offsets.length; //Before shrink 1242 1243 //This can be done before or after shrinking, but the results will change depending on MIN_SCORE_MULT and etc. 1244 final int maxQuickScore=maxQuickScore(offsets, keyScores); 1245 1246 if(SHRINK_BEFORE_WALK){ 1247 int[][] r=shrink(starts, stops, offsets, keyScores, offsets.length); 1248 if(r!=null){ 1249 starts=r[0]; 1250 stops=r[1]; 1251 offsets=r[2]; 1252 keyScores=r[4]; 1253 } 1254 } 1255 1256 final int numHits=offsets.length; //After shrink 1257 1258 1259 assert(numHits==offsets.length); 1260 assert(numHits==keyScores.length); 1261 1262 usedKeys+=numHits; 1263 usedKeyIterations++; 1264 1265 final boolean filter_by_qscore=(FILTER_BY_QSCORE && numKeys>=5); 1266 1267 assert(!(!SHRINK_BEFORE_WALK && ADD_SCORE_Z)); 1268 1269 1270 // final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*0.85f*maxScore)); 1271 final int minScore=(obeyLimits ? (int)(MIN_SCORE_MULT*maxScore) : (int)(MIN_SCORE_MULT*1.25f*maxScore)); 1272 final int minQuickScore=(int)(MIN_QSCORE_MULT*maxQuickScore); 1273 1274 final int baseChrom=baseChrom(baseChrom_); 1275 1276 heap.clear(); 1277 1278 final Quad64[] triples=tripleStorage; 1279 1280 final int[] values=valueArray; 1281 final int[] sizes=sizeArray; 1282 final int[] locArray=(USE_EXTENDED_SCORE ? getLocArray(bases.length) : null); 1283 final Block b=index[baseChrom]; 1284 1285 if(ssl==null){ssl=new ArrayList<SiteScore>(8);} 1286 1287 int currentTopScore=bestScores[0]; 1288 int cutoff=Tools.max(minScore, (int)(currentTopScore*DYNAMIC_SCORE_THRESH)); 1289 1290 int qcutoff=Tools.max(bestScores[2], minQuickScore); 1291 int bestqscore=bestScores[3]; 1292 int maxHits=bestScores[1]; 1293 int perfectsFound=bestScores[5]; 1294 assert((currentTopScore>=maxScore) == (perfectsFound>0)) : currentTopScore+", "+maxScore+", "+perfectsFound+", "+maxHits+", "+numHits; 1295 int approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, currentTopScore>=maxScore); 1296 if(approxHitsCutoff>numHits){return ssl;} 1297 1298 final boolean shortCircuit=(allBasesCovered && numKeys==numHits && filter_by_qscore); 1299 1300 1301 assert(USE_EXTENDED_SCORE); 1302 1303 if(currentTopScore>=maxScore){ 1304 assert(currentTopScore==maxScore); 1305 qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT)); 1306 } 1307 1308 1309 for(int i=0; i<numHits; i++){ 1310 final int[] sites=b.sites; 1311 final int start=starts[i]; 1312 sizes[i]=b.length(start, stops[i]); 1313 assert(sizes[i]>0); 1314 1315 int a=sites[start]; 1316 int a2; 1317 if((a&SITE_MASK)>=offsets[i]){ 1318 a2=a-offsets[i]; 1319 }else{ 1320 int ch=numberToChrom(a, baseChrom); 1321 int st=numberToSite(a); 1322 int st2=Tools.max(st-offsets[i], 0); 1323 a2=toNumber(st2, ch); 1324 } 1325 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)); 1326 1327 Quad64 t=triples[i]; 1328 assert(t!=null) : "Should be using tripleStorage"; 1329 assert(i==t.column); 1330 t.row=start; 1331 t.site=((long)a2)&0xFFFFFFFFL; 1332 t.list=sites; 1333 values[i]=a2; 1334 1335 heap.add(t); 1336 } 1337 1338 // System.out.println("\nEntering SS loop:"); 1339 // System.out.println("maxScore="+maxScore+"\tminScore="+minScore+"\tcurrentTopScore="+currentTopScore+"\n" + 1340 // "cutoff="+cutoff+"\tmaxHits="+maxHits+"\tapproxHitsCutoff="+approxHitsCutoff); 1341 // System.out.println("maxQuickScore="+maxQuickScore+"\tminQuickScore="+minQuickScore+"\tqcutoff="+qcutoff); 1342 1343 1344 SiteScore prevSS=null; 1345 while(!heap.isEmpty()){ 1346 Quad64 t=heap.peek(); 1347 final int site=(int)t.site; 1348 final int centerIndex=t.column; 1349 1350 int maxNearbySite=site; 1351 1352 1353 int approxHits=0; 1354 1355 {//Inner loop 1356 final int minsite=subUnsigned(site, MAX_INDEL); 1357 final int maxsite=addUnsigned(site, MAX_INDEL2); 1358 for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){ 1359 final int x=values[column]; 1360 assert(x==(int)triples[column].site); 1361 if(x>=minsite && x<=maxsite){ 1362 maxNearbySite=(x>maxNearbySite ? x : maxNearbySite); 1363 approxHits++; 1364 }else{chances--;} 1365 } 1366 } 1367 1368 assert(centerIndex>=0) : centerIndex; 1369 assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column; 1370 if(approxHits>=approxHitsCutoff){ 1371 1372 int score; 1373 int qscore=(filter_by_qscore ? quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits) : qcutoff); 1374 if(ADD_SCORE_Z){ 1375 int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits); 1376 qscore+=scoreZ; 1377 } 1378 1379 int mapStart=site, mapStop=maxNearbySite; 1380 1381 assert(USE_EXTENDED_SCORE); 1382 1383 boolean locArrayValid=false; 1384 if(qscore<qcutoff){ 1385 score=-1; 1386 }else{ 1387 1388 final int chrom=numberToChrom(site, baseChrom); 1389 1390 //TODO Note that disabling the shortCircuit code seems to make things run 2% faster (with identical results). 1391 //However, theoretically, shortCircuit should be far more efficient. Test both ways on cluster and on a larger run. 1392 //May have something to do with compiler loop optimizations. 1393 if(shortCircuit && qscore==maxQuickScore){ 1394 assert(approxHits==numKeys); 1395 // assert(maxScore==extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray)) : 1396 // maxScore+", "+extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray)+ 1397 // ", "+approxHits+", "+numKeys+", "+qscore+", "+maxQuickScore+", "+ 1398 // (scoreZ2(values, centerIndex, offsets, approxHits)+quickScore(values, keyScores, centerIndex, offsets, hits, false, approxHits))+", "+ 1399 // (scoreZ2(values, centerIndex, offsets, approxHits)+quickScore(values, keyScores, centerIndex, offsets, hits, true, approxHits)) 1400 // +"\noffsets="+Arrays.toString(offsets)+"\n"; 1401 score=maxScore; 1402 }else{ 1403 if(verbose){ 1404 System.err.println("Extending "+Arrays.toString(values)); 1405 } 1406 score=extendScore(bases, baseScores, offsets, values, chrom, centerIndex, locArray, numHits, approxHits); 1407 locArrayValid=true; 1408 1409 if(verbose){ 1410 System.err.println("score: "+score); 1411 System.err.println("locArray: "+Arrays.toString(locArray)); 1412 } 1413 1414 //Correct begin and end positions if they changed. 1415 int min=Integer.MAX_VALUE; 1416 int max=Integer.MIN_VALUE; 1417 for(int i=0; i<locArray.length; i++){ 1418 int x=locArray[i]; 1419 if(x>-1){ 1420 if(x<min){min=x;} 1421 if(x>max){max=x;} 1422 } 1423 } 1424 1425 if(score>=maxScore){ 1426 assert(min==max && min>-1) : "\n"+score+", "+maxScore+", "+min+", "+max+ 1427 ", "+(max-min)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n"; 1428 } 1429 1430 // assert(min>-1 && max>-1) : Arrays.toString(locArray); //TODO: How did this assertion trigger? 1431 if(min<0 || max<0){ 1432 if(!Shared.anomaly){ 1433 Shared.anomaly=true; 1434 System.err.println("Anomaly in "+getClass().getName()+".slowWalk: "+ 1435 chrom+", "+mapStart+", "+mapStop+", "+centerIndex+", "+ 1436 Arrays.toString(locArray)+"\n"+ 1437 Arrays.toString(values)+"\n"+ 1438 new String(bases)+"\nstrand="+strand+"\n"); 1439 System.err.println(); 1440 } 1441 score=-99999; 1442 } 1443 1444 //mapStart and mapStop are indices 1445 mapStart=toNumber(min, chrom); 1446 mapStop=toNumber(max, chrom); 1447 1448 if(score>=maxScore){ 1449 assert(mapStop-mapStart==0) : "\n"+score+", "+maxScore+", "+min+", "+max+ 1450 ", "+(max-min)+", "+(mapStop-mapStart)+", "+bases.length+"\n"+Arrays.toString(locArray)+"\n"; 1451 } 1452 } 1453 1454 if(score==maxScore){ 1455 qcutoff=Tools.max(qcutoff, (int)(maxQuickScore*DYNAMIC_QSCORE_THRESH_PERFECT)); 1456 approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, MIN_APPROX_HITS_TO_KEEP, true); 1457 } 1458 1459 if(score>=cutoff){ 1460 qcutoff=Tools.max(qcutoff, (int)(qscore*DYNAMIC_QSCORE_THRESH)); 1461 bestqscore=Tools.max(qscore, bestqscore); 1462 } 1463 } 1464 1465 // System.err.println("maxScore = "+maxScore); 1466 // System.err.println("hits = "+approxHits+" / "+approxHitsCutoff); 1467 // System.err.println("score = "+score+" / "+cutoff); 1468 1469 if(score>=cutoff){ 1470 1471 // System.err.println("Passed!"); 1472 1473 // System.out.println("approxHits="+approxHits+" / "+approxHitsCutoff); 1474 // System.out.println("score="+score+" / "+cutoff); 1475 // System.out.println("strand="+Gene.strandCodes[strand]); 1476 // System.out.println("center="+values[centerIndex]); 1477 // System.out.println("values="+Arrays.toString(values)); 1478 // extendScore(read, offsets, values, numberToChrom(site, baseChrom), centerIndex); 1479 // System.out.println(); 1480 1481 if(score>currentTopScore){ 1482 // System.err.println("New top score!"); 1483 1484 if(DYNAMICALLY_TRIM_LOW_SCORES){ 1485 1486 maxHits=Tools.max(approxHits, maxHits); 1487 approxHitsCutoff=calcApproxHitsCutoff(numKeys, maxHits, approxHitsCutoff, currentTopScore>=maxScore); 1488 1489 cutoff=Tools.max(cutoff, (int)(score*DYNAMIC_SCORE_THRESH)); 1490 if(score>=maxScore){ 1491 assert(USE_EXTENDED_SCORE); 1492 cutoff=Tools.max(cutoff, (int)(score*0.95f)); 1493 } 1494 } 1495 1496 currentTopScore=score; 1497 1498 // System.out.println("New top score: "+currentTopScore+" \t("+cutoff+")"); 1499 } 1500 1501 final int chrom=numberToChrom(mapStart, baseChrom); 1502 final int site2=numberToSite(mapStart); 1503 final int site3=numberToSite(mapStop)+bases.length-1; 1504 1505 assert(NUM_CHROM_BITS==0 || site2<SITE_MASK-1000) : "chrom="+chrom+", strand="+strand+", site="+site+ 1506 ", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length+ 1507 "\n\n"+Arrays.toString(b.getHitList(centerIndex)); assert(site2<site3) : R+chrom+R+strand+R+site+ R+maxNearbySite+R+site2+R+site3+R+bases.length; int[] gapArray=null; if(site3-site2>=MINGAP+bases.length)1508 assert(site2<site3) : "chrom="+chrom+", strand="+strand+", site="+site+ 1509 ", maxNearbySite="+maxNearbySite+", site2="+site2+", site3="+site3+", read.length="+bases.length; 1510 1511 1512 int[] gapArray=null; 1513 if(site3-site2>=MINGAP+bases.length){ 1514 assert(locArrayValid) : "Loc array was not filled."; 1515 // System.err.println("****\n"+Arrays.toString(locArray)+"\n"); 1516 // int[] clone=locArray.clone(); 1517 gapArray=makeGapArray(locArray, site2, MINGAP); 1518 if(gapArray!=null){ 1519 // System.err.println(Arrays.toString(locArray)+"\n"); 1520 // System.err.println(Arrays.toString(gapArray)); 1521 // 1522 //// int sub=site2-mapStart;//thus site2=mapStart+sub 1523 //// for(int i=0; i<gapArray.length; i++){ 1524 //// gapArray[i]+=sub; 1525 //// } 1526 //// System.err.println(Arrays.toString(gapArray)); 1527 // 1528 // System.err.println(mapStart+" -> "+site2); 1529 // System.err.println(mapStop+" -> "+site3); 1530 1531 assert(gapArray[0]>=site2 && gapArray[0]-site2<bases.length); 1532 assert(gapArray[gapArray.length-1]<=site3 && site3-gapArray[gapArray.length-1]<bases.length) : "\n"+ 1533 mapStart+" -> "+site2+"\n"+ 1534 mapStop+" -> "+site3+"\n\n"+ 1535 Arrays.toString(gapArray)+"\n\n"+ 1536 // Arrays.toString(clone)+"\n\n"+ 1537 Arrays.toString(locArray)+"\n"+ 1538 "numHits="+numHits+", "+ 1539 "heap.size="+heap.size()+", "+ 1540 "numHits="+numHits+", "+ 1541 "approxHits="+approxHits+"\n"; 1542 gapArray[0]=Tools.min(gapArray[0], site2); 1543 gapArray[gapArray.length-1]=Tools.max(gapArray[gapArray.length-1], site3); 1544 } 1545 if(verbose){System.err.println("@ site "+site2+", made gap array: "+Arrays.toString(gapArray));} 1546 // assert(false) : Arrays.toString(locArray); 1547 } 1548 1549 1550 //This block is optional, but tries to eliminate multiple identical alignments 1551 1552 SiteScore ss=null; 1553 final boolean perfect1=USE_EXTENDED_SCORE && score==maxScore && fullyDefined; 1554 final boolean inbounds=(site2>=0 && site3<Data.chromLengths[chrom]); 1555 // if(!inbounds){System.err.println("Index tossed out-of-bounds site chr"+chrom+", "+site2+"-"+site3);} 1556 1557 if(inbounds && !SEMIPERFECTMODE && !PERFECTMODE && gapArray==null && prevSS!=null && 1558 prevSS.chrom==chrom && prevSS.strand==strand && overlap(prevSS.start, prevSS.stop, site2, site3)){ 1559 1560 final int betterScore=Tools.max(score, prevSS.score); 1561 final int minStart=Tools.min(prevSS.start, site2); 1562 final int maxStop=Tools.max(prevSS.stop, site3); 1563 final boolean perfect2=USE_EXTENDED_SCORE && prevSS.score==maxScore && fullyDefined; 1564 assert(!USE_EXTENDED_SCORE || perfect2==prevSS.perfect); 1565 1566 final boolean shortEnough=(!LIMIT_SUBSUMPTION_LENGTH_TO_2X || (maxStop-minStart<2*bases.length)); 1567 1568 if(prevSS.start==site2 && prevSS.stop==site3){ 1569 prevSS.score=prevSS.quickScore=betterScore; 1570 prevSS.perfect=(prevSS.perfect || perfect1 || perfect2); 1571 if(prevSS.perfect){prevSS.semiperfect=true;} 1572 }else if(SUBSUME_SAME_START_SITES && shortEnough && prevSS.start==site2 && !prevSS.semiperfect){ 1573 if(perfect2){ 1574 //do nothing 1575 }else if(perfect1){ 1576 prevSS.setStop(site3); 1577 if(!prevSS.perfect){perfectsFound++;}//***$ 1578 prevSS.perfect=prevSS.semiperfect=true; 1579 }else{ 1580 prevSS.setStop(maxStop); 1581 prevSS.setPerfect(bases); 1582 } 1583 prevSS.score=prevSS.quickScore=betterScore; 1584 }else if(SUBSUME_SAME_STOP_SITES && shortEnough && prevSS.stop==site3 && !prevSS.semiperfect){ 1585 if(perfect2){ 1586 //do nothing 1587 }else if(perfect1){ 1588 prevSS.setStart(site2); 1589 if(!prevSS.perfect){perfectsFound++;}//***$ 1590 prevSS.perfect=prevSS.semiperfect=true; 1591 }else{ 1592 prevSS.setStart(minStart); 1593 prevSS.setPerfect(bases); 1594 } 1595 prevSS.score=prevSS.quickScore=betterScore; 1596 }else if(SUBSUME_OVERLAPPING_SITES && shortEnough && (maxStop-minStart<=bases.length+MAX_SUBSUMPTION_LENGTH) 1597 && !perfect1 && !perfect2 && !prevSS.semiperfect){ 1598 prevSS.setLimits(minStart, maxStop); 1599 prevSS.score=prevSS.quickScore=betterScore; 1600 prevSS.setPerfect(bases); 1601 }else{ 1602 ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1); 1603 if(!perfect1){ss.setPerfect(bases);} 1604 if(verbose){System.err.println("A) Index made SiteScore "+ss.toText()+", "+Arrays.toString(ss.gaps));} 1605 assert(!perfect1 || ss.stop-ss.start==bases.length-1); 1606 } 1607 assert(!perfect2 || prevSS.stop-prevSS.start==bases.length-1); 1608 }else if(inbounds){ 1609 ss=new SiteScore(chrom, strand, site2, site3, approxHits, score, false, perfect1); 1610 if(!perfect1){ss.setPerfect(bases);} 1611 ss.gaps=gapArray; 1612 if(verbose){System.err.println("B) Index made SiteScore "+ss.toText()+", "+Arrays.toString(ss.gaps));} 1613 } 1614 1615 assert(ss==null || !ss.perfect || ss.semiperfect) : ss; 1616 assert(prevSS==null || !prevSS.perfect || prevSS.semiperfect) : "\n"+SiteScore.header()+"\n"+ss+"\n"+prevSS; 1617 if(ss!=null && ((SEMIPERFECTMODE && !ss.semiperfect) || (PERFECTMODE && !ss.perfect))){ss=null;} 1618 1619 1620 if(ss!=null){ 1621 // System.out.println("Added site "+ss.toText()+", qscore="+qscore); 1622 ssl.add(ss); 1623 if(ss.perfect){ 1624 1625 if(prevSS==null || !prevSS.perfect || !ss.overlaps(prevSS)){ 1626 if(prevSS==null){assert ssl.size()<2 || !ss.overlaps(ssl.get(ssl.size()-2));} 1627 perfectsFound++; 1628 1629 //Human-specific code 1630 // if(QUIT_AFTER_TWO_PERFECTS){ 1631 // if(perfectsFound>=3 || (perfectsFound>=2 && chrom<24)){break;} 1632 // } 1633 1634 if(QUIT_AFTER_TWO_PERFECTS && perfectsFound>=2){break;} 1635 } 1636 } 1637 1638 prevSS=ss; 1639 }else{ 1640 // System.out.println("Subsumed site "+new SiteScore(chrom, strand, site2, site3, score).toText()); 1641 } 1642 } 1643 } 1644 1645 while(site==(int)heap.peek().site){ //Remove all identical elements, and add subsequent elements 1646 final Quad64 t2=heap.poll(); 1647 final int row=t2.row+1, col=t2.column; 1648 if(row<stops[col]){ 1649 t2.row=row; 1650 1651 int a=t2.list[row]; 1652 int a2; 1653 if((a&SITE_MASK)>=offsets[col]){ 1654 a2=a-offsets[col]; 1655 1656 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1657 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+ 1658 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1659 }else{ 1660 int ch=numberToChrom(a, baseChrom); 1661 int st=numberToSite(a); 1662 int st2=Tools.max(st-offsets[col], 0); 1663 a2=toNumber(st2, ch); 1664 1665 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1666 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+ 1667 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1668 } 1669 1670 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1671 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", strand="+strand+", site="+site+ 1672 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1673 1674 t2.site=((long)a2)&0xFFFFFFFFL; 1675 values[col]=a2; 1676 heap.add(t2); 1677 }else if(heap.size()<approxHitsCutoff || PERFECTMODE){ 1678 assert(USE_EXTENDED_SCORE); 1679 bestScores[0]=Tools.max(bestScores[0], currentTopScore); 1680 bestScores[1]=Tools.max(bestScores[1], maxHits); 1681 bestScores[2]=Tools.max(bestScores[2], qcutoff); 1682 bestScores[3]=Tools.max(bestScores[3], bestqscore); 1683 1684 bestScores[4]=maxQuickScore; 1685 bestScores[5]=perfectsFound; //***$ fixed by adding this line 1686 if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;} 1687 1688 return ssl; 1689 } 1690 if(heap.isEmpty()){ 1691 assert(false) : heap.size()+", "+approxHitsCutoff; 1692 break; 1693 } 1694 } 1695 1696 } 1697 1698 assert(USE_EXTENDED_SCORE); 1699 bestScores[0]=Tools.max(bestScores[0], currentTopScore); 1700 bestScores[1]=Tools.max(bestScores[1], maxHits); 1701 bestScores[2]=Tools.max(bestScores[2], qcutoff); 1702 bestScores[3]=Tools.max(bestScores[3], bestqscore); 1703 1704 bestScores[4]=maxQuickScore; 1705 bestScores[5]=perfectsFound; 1706 if(!RETAIN_BEST_QCUTOFF){bestScores[2]=-9999;} 1707 1708 // bestScores[0]=Tools.max(bestScores[0], currentTopScore); 1709 // bestScores[1]=Tools.max(bestScores[1], maxHits); 1710 // bestScores[2]=Tools.max(bestScores[2], qcutoff); 1711 return ssl; 1712 } 1713 1714 findMaxQscore2(final int[] starts, final int[] stops, final int[] offsets, final int[] keyScores, final int baseChrom_, final Quad64[] triples, final int[] values, final int prevMaxHits, boolean earlyExit, boolean perfectOnly)1715 private final int[] findMaxQscore2(final int[] starts, final int[] stops, final int[] offsets, final int[] keyScores, 1716 final int baseChrom_, final Quad64[] triples, final int[] values, final int prevMaxHits, 1717 boolean earlyExit, boolean perfectOnly){ 1718 1719 final int numHits=offsets.length; 1720 assert(numHits>=prevMaxHits); 1721 1722 final int baseChrom=baseChrom(baseChrom_); 1723 final Block b=index[baseChrom]; 1724 final int[] sizes=sizeArray; 1725 1726 heap.clear(); 1727 for(int i=0; i<numHits; i++){ 1728 final int[] sites=b.sites; 1729 final int start=starts[i]; 1730 sizes[i]=b.length(start, stops[i]); 1731 assert(sizes[i]>0); 1732 1733 int a=sites[start]; 1734 int a2; 1735 if((a&SITE_MASK)>=offsets[i]){ 1736 a2=a-offsets[i]; 1737 }else{ 1738 int ch=numberToChrom(a, baseChrom); 1739 int st=numberToSite(a); 1740 int st2=Tools.max(st-offsets[i], 0); 1741 a2=toNumber(st2, ch); 1742 } 1743 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)); 1744 1745 Quad64 t=triples[i]; 1746 assert(t!=null) : "Should be using tripleStorage"; 1747 assert(i==t.column); 1748 t.row=start; 1749 t.site=((long)a2)&0xFFFFFFFFL; 1750 t.list=sites; 1751 values[i]=a2; 1752 1753 heap.add(t); 1754 } 1755 1756 final int maxQuickScore=maxQuickScore(offsets, keyScores); 1757 1758 int topQscore=-999999999; 1759 1760 int maxHits=0; 1761 // int approxHitsCutoff=MIN_APPROX_HITS_TO_KEEP; 1762 1763 1764 int approxHitsCutoff; 1765 final int indelCutoff; 1766 if(perfectOnly){ 1767 approxHitsCutoff=numHits; 1768 indelCutoff=0; 1769 }else{ 1770 approxHitsCutoff=Tools.max(prevMaxHits, Tools.min(MIN_APPROX_HITS_TO_KEEP, numHits-1)); //Faster, same accuracy 1771 indelCutoff=MAX_INDEL2; 1772 } 1773 1774 1775 while(!heap.isEmpty()){ 1776 Quad64 t=heap.peek(); 1777 final int site=(int)t.site; //*** TODO 1778 final int centerIndex=t.column; 1779 1780 int maxNearbySite=site; 1781 1782 1783 int approxHits=0; 1784 1785 {//Inner loop 1786 final int minsite=subUnsigned(site, MAX_INDEL); 1787 final int maxsite=addUnsigned(site, MAX_INDEL2); 1788 for(int column=0, chances=numHits-approxHitsCutoff; column<numHits && chances>=0; column++){ 1789 final int x=values[column]; 1790 assert(x==(int)triples[column].site); 1791 if(x>=minsite && x<=maxsite){ 1792 maxNearbySite=(x>maxNearbySite ? x : maxNearbySite); 1793 approxHits++; 1794 }else{chances--;} 1795 } 1796 } 1797 1798 assert(centerIndex>=0) : centerIndex; 1799 assert(approxHits>=1 || approxHitsCutoff>1) : approxHits+", "+approxHitsCutoff+", "+numHits+", "+t.column; 1800 if(approxHits>=approxHitsCutoff){ 1801 1802 int qscore=quickScore(values, keyScores, centerIndex, offsets, sizes, true, approxHits, numHits); 1803 1804 if(ADD_SCORE_Z){ 1805 int scoreZ=scoreZ2(values, centerIndex, offsets, approxHits, numHits); 1806 qscore+=scoreZ; 1807 } 1808 1809 if(qscore>topQscore){ 1810 1811 // maxHits=Tools.max(approxHits, maxHits); 1812 // approxHitsCutoff=Tools.max(approxHitsCutoff, maxHits); //Best setting for pre-scan 1813 1814 maxHits=Tools.max(approxHits, maxHits); 1815 approxHitsCutoff=Tools.max(approxHitsCutoff, approxHits-1); //Best setting for pre-scan 1816 1817 topQscore=qscore; 1818 1819 if(qscore>=maxQuickScore){ 1820 assert(qscore==maxQuickScore); 1821 assert(approxHits==numHits); 1822 if(earlyExit){ 1823 return new int[] {topQscore, maxHits}; 1824 } 1825 } 1826 } 1827 } 1828 1829 while(site==(int)heap.peek().site){ //Remove all identical elements, and add subsequent elements 1830 final Quad64 t2=heap.poll(); 1831 final int row=t2.row+1, col=t2.column; 1832 if(row<stops[col]){ 1833 t2.row=row; 1834 1835 int a=t2.list[row]; 1836 int a2; 1837 if((a&SITE_MASK)>=offsets[col]){ 1838 a2=a-offsets[col]; 1839 1840 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1841 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+ 1842 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1843 }else{ 1844 int ch=numberToChrom(a, baseChrom); 1845 int st=numberToSite(a); 1846 int st2=Tools.max(st-offsets[col], 0); 1847 a2=toNumber(st2, ch); 1848 1849 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1850 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+ 1851 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1852 } 1853 1854 assert(numberToChrom(a, baseChrom) == numberToChrom(a2, baseChrom)) : 1855 "baseChrom="+baseChrom+", chrom="+numberToChrom(a, baseChrom)+", site="+site+ 1856 ", maxNearbySite="+maxNearbySite+", a="+a+", a2="+a2+", offsets["+col+"]="+offsets[col]; 1857 1858 t2.site=((long)a2)&0xFFFFFFFFL; 1859 values[col]=a2; 1860 heap.add(t2); 1861 }else if(earlyExit && (perfectOnly || heap.size()<approxHitsCutoff)){ 1862 return new int[] {topQscore, maxHits}; 1863 } 1864 if(heap.isEmpty()){break;} 1865 } 1866 1867 } 1868 1869 1870 1871 return new int[] {topQscore, maxHits}; 1872 } 1873 1874 1875 /** Uses unsigned math */ absdif(int a, int b)1876 private static final int absdif(int a, int b){ 1877 // return a>b ? a-b : b-a; 1878 return (a<0 == b<0) ? a>b ? a-b : b-a : Integer.MAX_VALUE; 1879 } 1880 1881 1882 @Override maxScore(int[] offsets, byte[] baseScores, int[] keyScores, int readlen, boolean useQuality)1883 final int maxScore(int[] offsets, byte[] baseScores, int[] keyScores, int readlen, boolean useQuality){ 1884 1885 if(useQuality){ 1886 //These lines apparently MUST be used if quality is used later on for slow align. 1887 if(USE_AFFINE_SCORE){return msa.maxQuality(baseScores);} 1888 if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5)+Tools.sumInt(baseScores);} 1889 }else{ 1890 if(USE_AFFINE_SCORE){return msa.maxQuality(readlen);} 1891 if(USE_EXTENDED_SCORE){return readlen*(BASE_HIT_SCORE+BASE_HIT_SCORE/5);} 1892 } 1893 1894 return maxQuickScore(offsets, keyScores); 1895 } 1896 1897 maxQuickScore(int[] offsets, int[] keyScores)1898 public final int maxQuickScore(int[] offsets, int[] keyScores){ 1899 1900 // int x=offsets.length*BASE_KEY_HIT_SCORE; 1901 int x=Tools.intSum(keyScores); 1902 int y=Y_SCORE_MULT*(offsets[offsets.length-1]-offsets[0]); 1903 // if(ADD_LIST_SIZE_BONUS){x+=(LIST_SIZE_BONUS[1]*offsets.length);} 1904 // assert(!ADD_SCORE_Z) : "Need to make sure this is correct..."; 1905 1906 // if(ADD_SCORE_Z){x+=((offsets[offsets.length-1]+CHUNKSIZE)*Z_SCORE_MULT);} 1907 if(ADD_SCORE_Z){x+=maxScoreZ(offsets);} 1908 1909 return x+y; 1910 // int bonus=(2*(HIT_SCORE/2)); //For matching both ends 1911 // return x+y+bonus; 1912 } 1913 1914 quickScore(final int[] values, final int[] keyScores, final int centerIndex, final int offsets[], int[] sizes, final boolean penalizeIndels, final int numApproxHits, final int numHits)1915 private final int quickScore(final int[] values, final int[] keyScores, final int centerIndex, final int offsets[], 1916 int[] sizes, final boolean penalizeIndels, final int numApproxHits, final int numHits){ 1917 1918 if(numApproxHits==1){return keyScores[centerIndex];} 1919 1920 //Done! 1921 //Correct way to calculate score: 1922 //Find the first chunk that exactly hits the center. 1923 //Then, align leftward of it, and align rightward of it, and sum the scores. 1924 1925 //"-centerIndex" is a disambiguating term that, given otherwise identical match patterns 1926 //(for example, a small indel will generate two valid site candidates), choose the lower site. 1927 1928 int x=keyScores[centerIndex]+scoreLeft(values, keyScores, centerIndex, sizes, penalizeIndels)+ 1929 scoreRight(values, keyScores, centerIndex, sizes, penalizeIndels, numHits)-centerIndex; 1930 1931 int y=Y_SCORE_MULT*scoreY(values, centerIndex, offsets); 1932 if(ADD_LIST_SIZE_BONUS){x+=calcListSizeBonus(sizes[centerIndex]);} 1933 // int z=scoreZ(locs, hits); 1934 return x+y; 1935 } 1936 1937 1938 // /** Generates a term that increases score with how many bases in the read match the ref. */ 1939 // private static final int scoreZ(int[] locs, int centerIndex, int offsets[]){ 1940 // final int center=locs[centerIndex]; 1941 // 1942 // final int[] refLoc=new int[offsets[offsets.length-1]+CHUNKSIZE]; 1943 // 1944 // final int maxLoc=center+MAX_INDEL2; 1945 // final int minLoc=Tools.max(0, center-MAX_INDEL); 1946 // 1947 // int score=0; 1948 // 1949 // for(int i=0; i<locs.length; i++){ 1950 // int loc=locs[i]; 1951 //// int dif=absdif(loc, center); 1952 // if(loc>=minLoc && loc<=maxLoc){ 1953 //// assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+ 1954 //// "\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets); 1955 // 1956 // int offset=offsets[i]; 1957 // int max=CHUNKSIZE+offset; 1958 // 1959 // for(int j=offset; j<max; j++){ 1960 // int old=refLoc[j]; 1961 // if(old==0){ 1962 // refLoc[j]=loc; 1963 // score+=4; 1964 // }else if(old>loc){ 1965 // refLoc[j]=loc; 1966 // score-=2; 1967 // }else if(old==loc){ 1968 // score-=1; 1969 // //do nothing, perhaps, or add 1? 1970 // }else{ 1971 // score-=2; 1972 // assert(old<loc); 1973 // } 1974 // } 1975 // } 1976 // } 1977 // return score; 1978 // } 1979 1980 1981 extendScore(final byte[] bases, final byte[] baseScores, final int[] offsets, final int[] values, final int chrom, final int centerIndex, final int[] locArray, final int numHits, final int numApproxHits)1982 private final int extendScore(final byte[] bases, final byte[] baseScores, final int[] offsets, final int[] values, 1983 final int chrom, final int centerIndex, final int[] locArray, final int numHits, final int numApproxHits){ 1984 callsToExtendScore++; 1985 1986 final int centerVal=values[centerIndex]; 1987 final int centerLoc=numberToSite(centerVal); 1988 1989 final int minLoc=Tools.max(0, centerLoc-MAX_INDEL); //Legacy, for assertions 1990 final int maxLoc=centerLoc+MAX_INDEL2; //Legacy, for assertions 1991 1992 final int minVal=subUnsigned(centerVal, MAX_INDEL); 1993 final int maxVal=addUnsigned(centerVal, MAX_INDEL2); 1994 1995 // System.out.println("Min, center, max = "+minLoc+", "+center+", "+ maxLoc); 1996 // System.out.println("centerIndex = "+centerIndex); 1997 1998 final byte[] ref=Data.getChromosome(chrom).array; 1999 2000 // int[] locArray=new int[bases.length]; 2001 Arrays.fill(locArray, -1); 2002 2003 2004 //First fill in reverse 2005 for(int i=0, keynum=0; i<numHits; i++){ 2006 final int value=values[i]; 2007 2008 if(value>=minVal && value<=maxVal){ 2009 final int refbase=numberToSite(value); 2010 assert(refbase>=minLoc && refbase<=maxLoc); 2011 2012 // System.out.println("Reverse: Trying key "+refbase+" @ "+offsets[i]); 2013 // System.out.println("Passed!"); 2014 keynum++; 2015 final int callbase=offsets[i]; 2016 2017 int misses=0; 2018 for(int cloc=callbase+KEYLEN-1, rloc=refbase+cloc; cloc>=0 && rloc>=0 && rloc<ref.length; cloc--, rloc--){ 2019 int old=locArray[cloc]; 2020 if(old==refbase){ 2021 // System.out.println("Broke because old="+old+", refbase="+refbase); 2022 break; 2023 } //Already filled with present value 2024 if(misses>0 && old>=0){ 2025 // System.out.println("Broke because old="+old+", misses="+misses); 2026 break; 2027 } //Already filled with something that has no errors 2028 byte c=bases[cloc]; 2029 byte r=ref[rloc]; 2030 2031 if(c==r){ 2032 if(old<0 || refbase==centerLoc){ //If the cell is empty or this key corresponds to center 2033 locArray[cloc]=refbase; 2034 } 2035 }else{ 2036 misses++; 2037 //Only extends first key all the way back. Others stop at the first error. 2038 if(old>=0 || keynum>1){ 2039 // System.out.println("Broke because old="+old+", keynum="+keynum); 2040 break; 2041 } 2042 } 2043 } 2044 } 2045 } 2046 2047 2048 2049 //Then fill forward 2050 for(int i=0; i<numHits; i++){ 2051 final int value=values[i]; 2052 2053 if(value>=minVal && value<=maxVal){ 2054 final int refbase=numberToSite(value); 2055 assert(refbase>=minLoc && refbase<=maxLoc); 2056 final int callbase=offsets[i]; 2057 2058 int misses=0; 2059 for(int cloc=callbase+KEYLEN, rloc=refbase+cloc; cloc<bases.length && rloc<ref.length; cloc++, rloc++){ 2060 int old=locArray[cloc]; 2061 if(old==refbase){break;} //Already filled with present value 2062 if(misses>0 && old>=0){break;} //Already filled with something that has no errors 2063 byte c=bases[cloc]; 2064 byte r=ref[rloc]; 2065 2066 if(c==r){ 2067 if(old<0 || refbase==centerLoc){ //If the cell is empty or this key corresponds to center 2068 locArray[cloc]=refbase; 2069 } 2070 }else{ 2071 misses++; 2072 if(old>=0){break;} //Already filled with something that has no errors 2073 } 2074 } 2075 } 2076 } 2077 2078 // //Change 'N' to -2. A bit slow. 2079 // { 2080 // int firstMatch=0; 2081 // while(firstMatch<locArray.length && locArray[firstMatch]<0){firstMatch++;} 2082 // assert(firstMatch<locArray.length) : new String(bases); 2083 // int last=locArray[firstMatch]; 2084 // for(int i=firstMatch-1; i>=0; i--){ 2085 // final byte c=bases[i]; 2086 // if(c=='N'){locArray[i]=-2;} 2087 // else{ 2088 // assert(locArray[i]==-1); 2089 // final int rloc=last+i; 2090 // byte r=ref[rloc]; 2091 // if(r=='N'){locArray[i]=-2;} 2092 // } 2093 // } 2094 // for(int i=firstMatch; i<locArray.length; i++){ 2095 // final int loc=locArray[i]; 2096 // if(last<1){last=loc;} 2097 // final byte c=bases[i]; 2098 // if(c=='N'){locArray[i]=-2;} 2099 // else if(loc==-1 && last>0){ 2100 // final int rloc=last+i; 2101 // byte r=ref[rloc]; 2102 // if(r=='N'){locArray[i]=-2;} 2103 // } 2104 // } 2105 // } 2106 2107 //Change 'N' to -2, but only for nocalls, not norefs. Much faster. 2108 { 2109 final byte nb=(byte)'N'; 2110 for(int i=0; i<bases.length; i++){ 2111 if(bases[i]==nb){locArray[i]=-2;} 2112 } 2113 } 2114 2115 if(USE_AFFINE_SCORE){ 2116 /* TODO - sometimes returns a higher score than actual alignment. This should never happen. */ 2117 int score=(KFILTER<2 ? msa.calcAffineScore(locArray, baseScores, bases) : 2118 msa.calcAffineScore(locArray, baseScores, bases, KFILTER)); 2119 return score; 2120 } 2121 2122 int score=0; 2123 int lastLoc=-1; 2124 int centerBonus=BASE_HIT_SCORE/5; 2125 for(int i=0; i<locArray.length; i++){ 2126 int loc=locArray[i]; 2127 if(loc>=0){ 2128 score+=BASE_HIT_SCORE+baseScores[i]; 2129 if(loc==centerLoc){score+=centerBonus;} 2130 if(loc!=lastLoc && lastLoc>=0){ 2131 int dif=absdif(loc, lastLoc); 2132 int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*dif, MAX_PENALTY_FOR_MISALIGNED_HIT); 2133 score-=penalty; 2134 } 2135 lastLoc=loc; 2136 } 2137 } 2138 2139 // System.err.println("Extended score: "+score); 2140 // System.err.println(Arrays.toString(locArray)); 2141 2142 2143 return score; 2144 } 2145 2146 2147 /** NOTE! This destroys the locArray, so use a copy if needed. */ makeGapArray(int[] locArray, int minLoc, int minGap)2148 private static final int[] makeGapArray(int[] locArray, int minLoc, int minGap){ 2149 int gaps=0; 2150 boolean doSort=false; 2151 2152 if(locArray[0]<0){locArray[0]=minLoc;} 2153 for(int i=1; i<locArray.length; i++){ 2154 if(locArray[i]<0){locArray[i]=locArray[i-1]+1;} 2155 else{locArray[i]+=i;} 2156 if(locArray[i]<locArray[i-1]){doSort=true;} 2157 } 2158 2159 // System.err.println(Arrays.toString(locArray)+"\n"); 2160 2161 if(doSort){ 2162 // System.err.println("*"); 2163 Arrays.sort(locArray); 2164 } 2165 // System.err.println(Arrays.toString(locArray)+"\n"); 2166 2167 for(int i=1; i<locArray.length; i++){ 2168 int dif=locArray[i]-locArray[i-1]; 2169 assert(dif>=0); 2170 if(dif>minGap){ 2171 gaps++; 2172 } 2173 } 2174 if(gaps<1){return null;} 2175 int[] out=new int[2+gaps*2]; 2176 out[0]=locArray[0]; 2177 out[out.length-1]=locArray[locArray.length-1]; 2178 2179 for(int i=1, j=1; i<locArray.length; i++){ 2180 int dif=locArray[i]-locArray[i-1]; 2181 assert(dif>=0); 2182 if(dif>minGap){ 2183 out[j]=locArray[i-1]; 2184 out[j+1]=locArray[i]; 2185 j+=2; 2186 } 2187 } 2188 return out; 2189 } 2190 2191 2192 /** Generates a term that increases score with how many bases in the read match the ref. */ scoreZ2(int[] values, int centerIndex, int offsets[], int numApproxHits, int numHits)2193 private final int scoreZ2(int[] values, int centerIndex, int offsets[], int numApproxHits, int numHits){ 2194 2195 if(numApproxHits==1){return SCOREZ_1KEY;} 2196 2197 final int center=values[centerIndex]; 2198 2199 final int maxLoc=center+MAX_INDEL2; 2200 final int minLoc=Tools.max(0, center-MAX_INDEL); 2201 2202 2203 2204 // final int minVal=subUnsigned(centerVal, MAX_INDEL); 2205 // final int maxVal=addUnsigned(centerVal, MAX_INDEL2); 2206 2207 int score=0; 2208 2209 int a0=-1, b0=-1; 2210 2211 for(int i=0; i<numHits; i++){ 2212 int loc=values[i]; 2213 // int dif=absdif(loc, center); 2214 if(loc>=minLoc && loc<=maxLoc){ 2215 // assert(loc>=center) : "loc="+loc+"\ni="+i+"\ncenterIndex="+centerIndex+ 2216 // "\nmaxLoc="+maxLoc+"\nlocs:\t"+Arrays.toString(locs)+"\noffsets:\t"+Arrays.toString(offsets); 2217 int a=offsets[i]; 2218 2219 if(b0<a){ 2220 score+=b0-a0; 2221 a0=a; 2222 } 2223 b0=a+KEYLEN; 2224 } 2225 } 2226 score+=b0-a0; 2227 score=score*Z_SCORE_MULT; 2228 // assert(score==scoreZslow(locs, centerIndex, offsets, false)) : scoreZslow(locs, centerIndex, offsets, true)+" != "+score; 2229 return score; 2230 } 2231 2232 @Deprecated 2233 /** This was just to verify scoreZ2. */ scoreZslow(int[] locs, int centerIndex, int offsets[], boolean display, int numHits)2234 private final int scoreZslow(int[] locs, int centerIndex, int offsets[], boolean display, int numHits){ 2235 final int center=locs[centerIndex]; 2236 2237 final int maxLoc=center+MAX_INDEL2; 2238 final int minLoc=Tools.max(0, center-MAX_INDEL); 2239 2240 byte[] array=new byte[offsets[offsets.length-1]+KEYLEN]; 2241 int score=0; 2242 2243 for(int i=0; i<numHits; i++){ 2244 int loc=locs[i]; 2245 // int dif=absdif(loc, center); 2246 if(loc>=minLoc && loc<=maxLoc){ 2247 int pos=offsets[i]; 2248 // if(true){ 2249 // System.err.println("\ni="+i+", pos="+pos+", array=["+array.length+"], limit="+(pos+CHUNKSIZE-1)); 2250 // } 2251 for(int j=pos; j<pos+KEYLEN; j++){ 2252 if(array[j]==0){score++;} 2253 array[j]=1; 2254 } 2255 } 2256 } 2257 2258 if(display){System.err.println("\n"+Arrays.toString(array)+"\n");} 2259 2260 return score*Z_SCORE_MULT; 2261 } 2262 2263 /** Generates a term that increases score with how many bases in the read match the ref. */ maxScoreZ(int offsets[])2264 private final int maxScoreZ(int offsets[]){ 2265 int score=0; 2266 int a0=-1, b0=-1; 2267 2268 for(int i=0; i<offsets.length; i++){ 2269 int a=offsets[i]; 2270 2271 if(b0<a){ 2272 score+=b0-a0; 2273 a0=a; 2274 } 2275 b0=a+KEYLEN; 2276 2277 } 2278 score+=b0-a0; 2279 return score*Z_SCORE_MULT; 2280 } 2281 2282 scoreRight(int[] values, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels, int numHits)2283 private final int scoreRight(int[] values, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels, int numHits){ 2284 2285 int score=0; 2286 2287 int prev, loc=values[centerIndex]; 2288 2289 for(int i=centerIndex+1; i<numHits; i++){ 2290 2291 if(values[i]!=-1){ 2292 prev=loc; 2293 loc=values[i]; 2294 2295 int offset=absdif(loc, prev); 2296 2297 if(offset<=MAX_INDEL){ 2298 score+=keyScores[i]; 2299 if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);} 2300 2301 // if(i==locs.length-1){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key 2302 2303 if(penalizeIndels && offset!=0){ 2304 int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT); 2305 score-=penalty; 2306 // score-=(INDEL_PENALTY+Tools.min(INDEL_PENALTY_MULT*offset, 1+HIT_SCORE/4)); 2307 } 2308 }else{ 2309 loc=prev; 2310 } 2311 } 2312 2313 } 2314 return score; 2315 2316 } 2317 scoreLeft(int[] values, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels)2318 private final int scoreLeft(int[] values, int[] keyScores, int centerIndex, int[] sizes, boolean penalizeIndels){ 2319 2320 callsToScore++; 2321 2322 int score=0; 2323 2324 int prev, loc=values[centerIndex]; 2325 2326 for(int i=centerIndex-1; i>=0; i--){ 2327 2328 if(values[i]!=-1){ 2329 prev=loc; 2330 loc=values[i]; 2331 2332 int offset=absdif(loc, prev); 2333 2334 if(offset<=MAX_INDEL){ 2335 score+=keyScores[i]; 2336 if(ADD_LIST_SIZE_BONUS){score+=calcListSizeBonus(sizes[i]);} 2337 2338 // if(i==0){score+=HIT_SCORE/2;} //Adds a bonus for matching the first or last key 2339 if(penalizeIndels && offset!=0){ 2340 int penalty=Tools.min(INDEL_PENALTY+INDEL_PENALTY_MULT*offset, MAX_PENALTY_FOR_MISALIGNED_HIT); 2341 score-=penalty; 2342 } 2343 }else{ 2344 loc=prev; 2345 } 2346 } 2347 2348 } 2349 return score; 2350 2351 } 2352 2353 /** Encode a (location, chrom) pair to an index */ toNumber(int site, int chrom)2354 private static final int toNumber(int site, int chrom){ 2355 int out=(chrom&CHROM_MASK_LOW); 2356 out=out<<SHIFT_LENGTH; 2357 out=(out|site); 2358 return out; 2359 } 2360 2361 /** Decode an (index, baseChrom) pair to a chromosome */ numberToChrom(int number, int baseChrom)2362 private static final int numberToChrom(int number, int baseChrom){ 2363 assert((baseChrom&CHROM_MASK_LOW)==0) : Integer.toHexString(number)+", baseChrom="+baseChrom; 2364 assert(baseChrom>=0) : Integer.toHexString(number)+", baseChrom="+baseChrom; 2365 int out=(number>>>SHIFT_LENGTH); 2366 out=out+(baseChrom&CHROM_MASK_HIGH); 2367 return out; 2368 } 2369 2370 /** Decode an index to a location */ numberToSite(int number)2371 private static final int numberToSite(int number){ 2372 return (number&SITE_MASK); 2373 } 2374 subUnsigned(int a, int b)2375 private static int subUnsigned(int a, int b){ 2376 int c=a-b; 2377 if((a<0)==(c<0)){return c;} 2378 if(a>=0){return b>=0 ? 0 : Integer.MAX_VALUE-10000;} 2379 return b>=0 ? Integer.MIN_VALUE : -10000; 2380 } 2381 addUnsigned(int a, int b)2382 private static int addUnsigned(int a, int b){ 2383 int c=a+b; 2384 if((a<0)==(c<0)){return c;} 2385 if(a>=0){return b>=0 ? Integer.MAX_VALUE-10000 : 0;} 2386 return b>=0 ? -10000 : Integer.MIN_VALUE; 2387 } 2388 minChrom(int chrom)2389 private static final int minChrom(int chrom){return Tools.max(MINCHROM, chrom&CHROM_MASK_HIGH);} baseChrom(int chrom)2390 private static final int baseChrom(int chrom){return Tools.max(0, chrom&CHROM_MASK_HIGH);} maxChrom(int chrom)2391 private static final int maxChrom(int chrom){return Tools.max(MINCHROM, Tools.min(MAXCHROM, chrom|CHROM_MASK_LOW));} 2392 2393 getOffsetArray(int len)2394 private final int[] getOffsetArray(int len){ 2395 if(offsetArrays[len]==null){offsetArrays[len]=new int[len];} 2396 return offsetArrays[len]; 2397 } getLocArray(int len)2398 private final int[] getLocArray(int len){ 2399 if(len>=locArrays.length){return new int[len];} 2400 if(locArrays[len]==null){locArrays[len]=new int[len];} 2401 return locArrays[len]; 2402 } getGreedyListArray(int len)2403 private final int[] getGreedyListArray(int len){ 2404 if(greedyListArrays[len]==null){greedyListArrays[len]=new int[len];} 2405 return greedyListArrays[len]; 2406 } getGenericArray(int len)2407 private final int[] getGenericArray(int len){ 2408 if(genericArrays[len]==null){genericArrays[len]=new int[len];} 2409 return genericArrays[len]; 2410 } 2411 2412 @Override getBaseScoreArray(int len, int strand)2413 final byte[] getBaseScoreArray(int len, int strand){ 2414 if(len>=baseScoreArrays[0].length){return new byte[len];} 2415 if(baseScoreArrays[strand][len]==null){baseScoreArrays[strand][len]=new byte[len];} 2416 return baseScoreArrays[strand][len]; 2417 } 2418 @Override getKeyScoreArray(int len, int strand)2419 final int[] getKeyScoreArray(int len, int strand){ 2420 if(len>=keyScoreArrays.length){return new int[len];} 2421 if(keyScoreArrays[strand][len]==null){keyScoreArrays[strand][len]=new int[len];} 2422 return keyScoreArrays[strand][len]; 2423 } getKeyWeightArray(int len)2424 private final float[] getKeyWeightArray(int len){ 2425 if(len>=keyWeightArrays.length){return new float[len];} 2426 if(keyWeightArrays[len]==null){keyWeightArrays[len]=new float[len];} 2427 return keyWeightArrays[len]; 2428 } 2429 @Override keyProbArray()2430 float[] keyProbArray() { 2431 return keyProbArray; 2432 } 2433 2434 2435 private final int[][] locArrays=new int[601][]; 2436 private final int[] valueArray=new int[128]; 2437 private final int[] sizeArray=new int[128]; 2438 private final int[][] offsetArrays=new int[128][]; 2439 private final int[][] greedyListArrays=new int[128][]; 2440 private final int[][] genericArrays=new int[128][]; 2441 private final int[] startArray=new int[128]; 2442 private final int[] stopArray=new int[128]; 2443 private final Quad64[] tripleStorage=makeQuad64Storage(128); 2444 private final int[] greedyReturn=KillSwitch.allocInt1D(2); 2445 private final int[][] shrinkReturn2=new int[3][]; 2446 private final int[][] shrinkReturn3=new int[5][]; 2447 private final int[][] prescanReturn=new int[2][]; 2448 private final int[] prescoreArray; 2449 private final int[] precountArray; 2450 2451 private final byte[][][] baseScoreArrays=new byte[2][601][]; 2452 private final int[][][] keyScoreArrays=new int[2][128][]; 2453 final float[] keyProbArray=new float[601]; 2454 private final float[][] keyWeightArrays=new float[128][]; 2455 2456 makeQuad64Storage(int number)2457 private final static Quad64[] makeQuad64Storage(int number){ 2458 Quad64[] r=new Quad64[number]; 2459 for(int i=0; i<number; i++){r[i]=new Quad64(i, 0, 0);} 2460 return r; 2461 } 2462 2463 2464 private final Quad64Heap heap=new Quad64Heap(127); 2465 2466 static int SHIFT_LENGTH=(32-NUM_CHROM_BITS); 2467 static int MAX_ALLOWED_CHROM_INDEX=~((-1)<<SHIFT_LENGTH); 2468 2469 /** Mask the number to get the site, which is in the lower bits */ 2470 static int SITE_MASK=((-1)>>>(NUM_CHROM_BITS)); 2471 2472 /** Mask the chromosome's high bits to get the low bits */ 2473 static int CHROM_MASK_LOW=CHROMS_PER_BLOCK-1; 2474 2475 /** Mask the chromosome's lower bits to get the high bits */ 2476 static int CHROM_MASK_HIGH=~CHROM_MASK_LOW; 2477 setChromBits(int x)2478 static void setChromBits(int x){ 2479 2480 NUM_CHROM_BITS=x; 2481 CHROMS_PER_BLOCK=(1<<(NUM_CHROM_BITS)); 2482 SHIFT_LENGTH=(32-NUM_CHROM_BITS); 2483 MAX_ALLOWED_CHROM_INDEX=~((-1)<<SHIFT_LENGTH); 2484 SITE_MASK=((-1)>>>(NUM_CHROM_BITS)); 2485 CHROM_MASK_LOW=CHROMS_PER_BLOCK-1; 2486 CHROM_MASK_HIGH=~CHROM_MASK_LOW; 2487 2488 // assert(false) : Integer.bitCount(SITE_MASK)+", "+MAX_ALLOWED_CHROM_INDEX+", "+CHROMS_PER_BLOCK; 2489 2490 // assert(NUM_CHROM_BITS<30); 2491 assert(NUM_CHROM_BITS>=0); //max is 3 for human; perhaps more for other organisms 2492 // assert((1<<(NUM_CHROM_BITS))>=CHROMSPERBLOCK) : (1<<(NUM_CHROM_BITS))+" < "+CHROMSPERBLOCK; 2493 assert((1<<(NUM_CHROM_BITS))==CHROMS_PER_BLOCK) : (1<<(NUM_CHROM_BITS))+" < "+CHROMS_PER_BLOCK; 2494 assert(Integer.bitCount(CHROMS_PER_BLOCK)==1); 2495 assert(Integer.numberOfLeadingZeros(SITE_MASK)==(NUM_CHROM_BITS)) : Integer.toHexString(SITE_MASK); 2496 } 2497 2498 private final int cycles; 2499 2500 public static final int BASE_HIT_SCORE=100; 2501 public static final int ALIGN_COLUMNS=3000; 2502 public static int MAX_INDEL=16000; //Max indel length, min 0, default 400; longer is more accurate 2503 public static int MAX_INDEL2=2*MAX_INDEL; 2504 2505 private final float INV_BASE_KEY_HIT_SCORE; 2506 private final int INDEL_PENALTY; //default (HIT_SCORE/2)-1 2507 private final int INDEL_PENALTY_MULT; //default 20; penalty for indel length 2508 private final int MAX_PENALTY_FOR_MISALIGNED_HIT; 2509 private final int SCOREZ_1KEY; 2510 2511 public static final boolean ADD_SCORE_Z=true; //Increases quality, decreases speed 2512 public static final int Z_SCORE_MULT=20; 2513 public static final int Y_SCORE_MULT=10; 2514 2515 2516 /** 2517 * Return only sites that match completely or with partial no-reference 2518 */ setSemiperfectMode()2519 public static void setSemiperfectMode() { 2520 assert(!PERFECTMODE); 2521 SEMIPERFECTMODE=true; 2522 PRESCAN_QSCORE=false; 2523 // MIN_APPROX_HITS_TO_KEEP++; 2524 2525 2526 2527 MAX_INDEL=0; 2528 MAX_INDEL2=0; 2529 } 2530 2531 /** 2532 * Return only sites that match completely 2533 */ setPerfectMode()2534 public static void setPerfectMode() { 2535 assert(!SEMIPERFECTMODE); 2536 PERFECTMODE=true; 2537 PRESCAN_QSCORE=false; 2538 // MIN_APPROX_HITS_TO_KEEP++; 2539 2540 2541 2542 MAX_INDEL=0; 2543 MAX_INDEL2=0; 2544 } 2545 2546 static float FRACTION_GENOME_TO_EXCLUDE=0.03f; //Default .03; lower is slower and more accurate. For perfect reads and small genomes, lower is FASTER. 2547 setFractionToExclude(float f)2548 public static final void setFractionToExclude(float f){ 2549 assert(f>=0 && f<1); 2550 FRACTION_GENOME_TO_EXCLUDE=f; 2551 MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810 2552 MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840 2553 MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910 2554 MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935 2555 MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860 2556 } 2557 2558 2559 /** Default .75. Range: 0 to 1 (but 0 will break everything). Lower is faster and less accurate. */ 2560 static final float HIT_FRACTION_TO_RETAIN=0.85f; //default: .8 2561 /** Range: 0 to 1000. Lower should be faster and less accurate. */ 2562 static int MIN_INDEX_TO_DROP_LONG_HIT_LIST=(int)(1000*(1-3.5*FRACTION_GENOME_TO_EXCLUDE)); //default 810 2563 /** Range: 2 to infinity. Lower should be faster and less accurate. */ 2564 static final int MIN_HIT_LISTS_TO_RETAIN=6; 2565 2566 static int MAX_AVERAGE_LIST_TO_SEARCH=(int)(1000*(1-2.3*FRACTION_GENOME_TO_EXCLUDE)); //lower is faster, default 840 2567 //lower is faster 2568 static int MAX_AVERAGE_LIST_TO_SEARCH2=(int)(1000*(1-1.4*FRACTION_GENOME_TO_EXCLUDE)); //default 910 2569 //lower is faster 2570 static int MAX_SINGLE_LIST_TO_SEARCH=(int)(1000*(1-1.0*FRACTION_GENOME_TO_EXCLUDE)); //default 935 2571 //lower is faster 2572 static int MAX_SHORTEST_LIST_TO_SEARCH=(int)(1000*(1-2.8*FRACTION_GENOME_TO_EXCLUDE)); //Default 860 2573 2574 /** To increase accuracy on small genomes, override greedy list dismissal when the list is at most this long. */ 2575 public static final int SMALL_GENOME_LIST=20; 2576 2577 static{assert(!(TRIM_BY_GREEDY && TRIM_BY_TOTAL_SITE_COUNT)) : "Pick one.";} 2578 2579 static final int CLUMPY_MAX_DIST=5; //Keys repeating over intervals of this or less are clumpy. 2580 2581 /** Minimum length of list before clumpiness is considered. This is an index in the length histogram, from 0 to 1000. */ 2582 static final int CLUMPY_MIN_LENGTH_INDEX=2000; 2583 static final float CLUMPY_FRACTION=0.75f; //0 to 1; higher is slower but more stringent. 0.5 means the median distance is clumpy. 2584 2585 static final int MAX_SUBSUMPTION_LENGTH=MAX_INDEL2; 2586 2587 /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION1 when slowWalk3 is first entered */ 2588 public static final int MAX_HITS_REDUCTION1=0; 2589 2590 /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION2 dynamically when best score is exceeded */ 2591 public static int MAX_HITS_REDUCTION2=2; //default 1; higher is more accurate (more mapping and less FP) but slower 2592 2593 /** approxHitsCutoff=maxHits-MAX_HITS_REDUCTION_PERFECT when perfect score is found */ 2594 public static final int MAX_HITS_REDUCTION_PERFECT=0; 2595 2596 public static int MAXIMUM_MAX_HITS_REDUCTION=3; 2597 public static int HIT_REDUCTION_DIV=5; 2598 calcApproxHitsCutoff(final int keys, final int hits, int currentCutoff, final boolean perfect)2599 private static final int calcApproxHitsCutoff(final int keys, final int hits, int currentCutoff, final boolean perfect){ //***$ 2600 assert(keys>=hits) : keys+", "+hits; 2601 assert(hits>=0); 2602 2603 int mahtk=MIN_APPROX_HITS_TO_KEEP; 2604 if(SEMIPERFECTMODE || PERFECTMODE){ 2605 if(keys==1){return 1;} 2606 else if(MIN_APPROX_HITS_TO_KEEP<keys){ 2607 mahtk++; 2608 if(currentCutoff==MIN_APPROX_HITS_TO_KEEP){currentCutoff++;} 2609 } 2610 } 2611 2612 int reduction=Tools.min(Tools.max((hits)/HIT_REDUCTION_DIV, MAX_HITS_REDUCTION2), Tools.max(MAXIMUM_MAX_HITS_REDUCTION, keys/8)); 2613 // if(hits<3){reduction=0;} 2614 // else if(hits<4){reduction=Tools.min(1, reduction);} 2615 // else if(hits<5){reduction=Tools.min(2, reduction);} 2616 // else if(hits<6){reduction=Tools.min(3, reduction);} 2617 assert(reduction>=0); 2618 int r=hits-reduction; 2619 2620 r=Tools.max(mahtk, currentCutoff, r); 2621 2622 if(perfect){ 2623 r=Tools.max(r, keys-MAX_HITS_REDUCTION_PERFECT); 2624 } 2625 return r; 2626 } 2627 2628 public static final boolean USE_SLOWALK3=true && USE_EXTENDED_SCORE; 2629 public static boolean PRESCAN_QSCORE=true && USE_EXTENDED_SCORE; //Decrease quality and increase speed 2630 public static final boolean FILTER_BY_QSCORE=true; //Slightly lower quality, but very fast. 2631 public static final float MIN_SCORE_MULT=(USE_AFFINE_SCORE ? 0.15f : USE_EXTENDED_SCORE ? .3f : 0.10f); //Fraction of max score to use as cutoff. Default 0.15, max is 1; lower is more accurate 2632 public static final float MIN_QSCORE_MULT=0.025f; //Fraction of max score to use as cutoff. Default 0.15, max is 1; lower is more accurate 2633 public static final float MIN_QSCORE_MULT2=0.1f; 2634 static final float DYNAMIC_SCORE_THRESH=(USE_AFFINE_SCORE ? 0.84f : USE_EXTENDED_SCORE ? .84f : 0.6f); //Default .85f; lower is more accurate 2635 static final float DYNAMIC_QSCORE_THRESH=0.6f; //default .58f 2636 static final float DYNAMIC_QSCORE_THRESH_PERFECT=0.8f; //***$ 2637 static final float PRESCAN_QSCORE_THRESH=DYNAMIC_QSCORE_THRESH*.95f; //default 1.0f; lower is more accurate and 0 essentially sets PRESCAN_QSCORE=false 2638 static{ 2639 assert(MIN_SCORE_MULT>=0 && MIN_SCORE_MULT<1); 2640 assert(DYNAMIC_SCORE_THRESH>=0 && DYNAMIC_SCORE_THRESH<1); 2641 } 2642 2643 2644 } 2645