1 package align2; 2 3 import java.util.Arrays; 4 5 import dna.ChromosomeArray; 6 import dna.Data; 7 import shared.Shared; 8 import shared.Tools; 9 import stream.Read; 10 import stream.SiteScore; 11 12 /** 13 * @author Brian Bushnell 14 * @date Jun 20, 2013 15 * 16 */ 17 public abstract class MSA { 18 makeMSA(int maxRows_, int maxColumns_, String classname)19 public static final MSA makeMSA(int maxRows_, int maxColumns_, String classname){ 20 flatMode=false; 21 if("MultiStateAligner9ts".equalsIgnoreCase(classname)){ 22 return new MultiStateAligner9ts(maxRows_, maxColumns_); 23 }else if("MultiStateAligner10ts".equalsIgnoreCase(classname)){ 24 return new MultiStateAligner10ts(maxRows_, maxColumns_); 25 }else if("MultiStateAligner11ts".equalsIgnoreCase(classname)){ 26 if(Shared.USE_JNI){ 27 return new MultiStateAligner11tsJNI(maxRows_, maxColumns_); 28 }else{ 29 return new MultiStateAligner11ts(maxRows_, maxColumns_); 30 } 31 }else if("MultiStateAligner11tsJNI".equalsIgnoreCase(classname)){ 32 return new MultiStateAligner11tsJNI(maxRows_, maxColumns_); 33 }else if("MultiStateAligner9PacBio".equalsIgnoreCase(classname)){ 34 return new MultiStateAligner9PacBio(maxRows_, maxColumns_); 35 }else if("MultiStateAligner9Flat".equalsIgnoreCase(classname)){ 36 return new MultiStateAligner9Flat(maxRows_, maxColumns_); 37 }else if("MultiStateAligner9XFlat".equalsIgnoreCase(classname)){ 38 flatMode=true; 39 return new MultiStateAligner9XFlat(maxRows_, maxColumns_); 40 }else{ 41 assert(false) : "Unhandled MSA type: "+classname; 42 return new MultiStateAligner11ts(maxRows_, maxColumns_); 43 } 44 } 45 MSA(int maxRows_, int maxColumns_)46 public MSA(int maxRows_, int maxColumns_){ 47 maxRows=maxRows_; 48 maxColumns=maxColumns_; 49 } 50 51 /** return new int[] {rows, maxC, maxS, max}; 52 * Will not fill areas that cannot match minScore */ fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps)53 public abstract int[] fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps); 54 55 56 /** return new int[] {rows, maxC, maxS, max}; 57 * Will not fill areas that cannot match minScore */ fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int[] gaps)58 public abstract int[] fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int[] gaps); 59 60 @Deprecated 61 /** return new int[] {rows, maxC, maxS, max}; */ fillQ(byte[] read, byte[] ref, byte[] baseScores, int refStartLoc, int refEndLoc)62 public abstract int[] fillQ(byte[] read, byte[] ref, byte[] baseScores, int refStartLoc, int refEndLoc); 63 64 65 /** @return {score, bestRefStart, bestRefStop} */ 66 /** Generates the match string */ traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state, boolean gapped)67 public abstract byte[] traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state, boolean gapped); 68 69 70 /** Generates the match string */ traceback2(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state)71 public abstract byte[] traceback2(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state); 72 73 /** @return {score, bestRefStart, bestRefStop} */ score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, final int maxRow, final int maxCol, final int maxState, boolean gapped)74 public abstract int[] score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, 75 final int maxRow, final int maxCol, final int maxState, boolean gapped); 76 77 /** @return {score, bestRefStart, bestRefStop}, or {score, bestRefStart, bestRefStop, padLeft, padRight} if more padding is needed */ score2(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, final int maxRow, final int maxCol, final int maxState)78 public abstract int[] score2(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, 79 final int maxRow, final int maxCol, final int maxState); 80 81 82 /** Will not fill areas that cannot match minScore. 83 * @return {score, bestRefStart, bestRefStop} */ fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps)84 public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps){ 85 int a=Tools.max(0, refStartLoc); 86 int b=Tools.min(ref.length-1, refEndLoc); 87 assert(b>=a); 88 89 int[] score; 90 91 if(verbose && b-a<500){ 92 System.err.println(new String(read)); 93 System.err.println(new String(ref, a, b-a)); 94 } 95 96 if(gaps==null){ 97 if(verbose){ 98 System.err.println("no gaps"); 99 } 100 if(b-a>=maxColumns){ 101 System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns); 102 assert(false) : refStartLoc+", "+refEndLoc; 103 b=Tools.min(ref.length-1, a+maxColumns-1); 104 } 105 int[] max=fillLimited(read, ref, a, b, minScore, gaps); 106 score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2], false)); 107 }else{ 108 if(verbose){System.err.println("\ngaps: "+Arrays.toString(gaps)+"\n"+new String(read)+"\ncoords: "+refStartLoc+", "+refEndLoc);} 109 int[] max=fillLimited(read, ref, a, b, minScore, gaps); 110 if(verbose){System.err.println("max: "+Arrays.toString(max));} 111 // score=(max==null ? null : score(read, grefbuffer, 0, greflimit, max[0], max[1], max[2], true)); 112 score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2], true)); 113 } 114 return score; 115 } 116 fillAndScoreLimited(byte[] read, SiteScore ss, int thresh, int minScore)117 public final int[] fillAndScoreLimited(byte[] read, SiteScore ss, int thresh, int minScore){ 118 return fillAndScoreLimited(read, ss.chrom, ss.start, ss.stop, thresh, minScore, ss.gaps); 119 } 120 121 // public final int[] translateScoreFromGappedCoordinate(int[] score) 122 fillAndScoreLimited(byte[] read, int chrom, int start, int stop, int thresh, int minScore, int[] gaps)123 public final int[] fillAndScoreLimited(byte[] read, int chrom, int start, int stop, int thresh, int minScore, int[] gaps){ 124 return fillAndScoreLimited(read, Data.getChromosome(chrom).array, start-thresh, stop+thresh, minScore, gaps); 125 } 126 127 @Deprecated fillAndScoreQ(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, byte[] baseScores)128 public final int[] fillAndScoreQ(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, byte[] baseScores){ 129 int a=Tools.max(0, refStartLoc); 130 int b=Tools.min(ref.length-1, refEndLoc); 131 assert(b>=a); 132 if(b-a>=maxColumns){ 133 System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns); 134 b=Tools.min(ref.length-1, a+maxColumns-1); 135 } 136 int[] max=fillQ(read, ref, baseScores, a, b); 137 // int[] score=score(read, ref, a, b, max[0], max[1], max[2]); 138 // return score; 139 return null; 140 } 141 142 @Deprecated fillAndScoreQ(byte[] read, SiteScore ss, int thresh, byte[] baseScores)143 public final int[] fillAndScoreQ(byte[] read, SiteScore ss, int thresh, byte[] baseScores){ 144 return fillAndScoreQ(read, ss.chrom, ss.start, ss.stop, thresh, baseScores); 145 } 146 147 @Deprecated fillAndScoreQ(byte[] read, int chrom, int start, int stop, int thresh, byte[] baseScores)148 public final int[] fillAndScoreQ(byte[] read, int chrom, int start, int stop, int thresh, byte[] baseScores){ 149 return fillAndScoreQ(read, Data.getChromosome(chrom).array, start-thresh, stop+thresh, baseScores); 150 } 151 scoreNoIndels(byte[] read, SiteScore ss)152 public final int scoreNoIndels(byte[] read, SiteScore ss){ 153 ChromosomeArray cha=Data.getChromosome(ss.chrom); 154 return scoreNoIndels(read, cha.array, ss.start); 155 } 156 scoreNoIndels(byte[] read, final int chrom, final int refStart)157 public final int scoreNoIndels(byte[] read, final int chrom, final int refStart){ 158 ChromosomeArray cha=Data.getChromosome(chrom); 159 return scoreNoIndels(read, cha.array, refStart); 160 } 161 scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores)162 public final int scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores){ 163 ChromosomeArray cha=Data.getChromosome(ss.chrom); 164 return scoreNoIndels(read, cha.array, baseScores, ss.start); 165 } 166 scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores)167 public final int scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores){ 168 ChromosomeArray cha=Data.getChromosome(chrom); 169 return scoreNoIndels(read, cha.array, baseScores, refStart); 170 } 171 172 // public final int scoreNoIndels(byte[] read, final int chrom, final int refStart){ 173 174 /** Calculates score based on an array from Index. */ calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases)175 public abstract int calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases); 176 177 /** Calculates score based on an array from Index using a kfilter. Slightly slower. */ calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases, int minContig)178 public abstract int calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases, int minContig); 179 scoreNoIndels(byte[] read, byte[] ref, final int refStart)180 public abstract int scoreNoIndels(byte[] read, byte[] ref, final int refStart); scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss)181 public int scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss){ 182 throw new RuntimeException("Unimplemented method in class "+this.getClass()); 183 } 184 genMatchNoIndels(byte[] read, byte[] ref, final int refStart)185 public abstract byte[] genMatchNoIndels(byte[] read, byte[] ref, final int refStart); 186 scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart)187 public abstract int scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart); scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss)188 public int scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss){ 189 throw new RuntimeException("Unimplemented method in class "+this.getClass()); 190 } 191 scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn)192 public abstract int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn); 193 scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn)194 public abstract int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn); 195 196 /** Assumes match string is in long format */ toLocalAlignment(Read r, SiteScore ss, byte[] basesM, int minToClip, float matchPointsMult)197 public final boolean toLocalAlignment(Read r, SiteScore ss, byte[] basesM, int minToClip, float matchPointsMult){ 198 final byte[] match=r.match, bases=(r.strand()==Shared.PLUS ? r.bases : basesM); 199 if(match==null || match.length<1){return false;} 200 201 assert(match==ss.match); 202 assert(match==r.match); 203 assert(r.start==ss.start); 204 assert(r.stop==ss.stop); 205 206 if(r.containsXY2()){ 207 if(verbose){System.err.println("\nInitial0:");} 208 if(verbose){System.err.println("0: match="+new String(match));} 209 if(verbose){System.err.println("0: r.start="+r.start+", r.stop="+r.stop+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));} 210 ss.fixXY(bases, false, this); 211 r.start=ss.start; 212 r.stop=ss.stop; 213 if(verbose){System.err.println("\nAfter fixXY:");} 214 if(verbose){System.err.println("0: match="+new String(match));} 215 if(verbose){System.err.println("0: r.start="+r.start+", r.stop="+r.stop+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));} 216 assert(match==ss.match); 217 assert(match==r.match); 218 assert(r.start==ss.start); 219 assert(r.stop==ss.stop); 220 assert(ss.lengthsAgree()) : ss.mappedLength()+"!="+ss.matchLength()+"\n"+ss+"\n\n"+r+"\n"; 221 } 222 assert(ss.lengthsAgree()) : ss.mappedLength()+"!="+ss.matchLength()+"\n"+ss+"\n\n"+r+"\n"; 223 224 int maxScore=-1; 225 226 int startLocC=-1; 227 int stopLocC=-1; 228 int lastZeroC=0; 229 230 int startLocM=-1; 231 int stopLocM=-1; 232 int lastZeroM=0; 233 234 int startLocR=-1; 235 int stopLocR=-1; 236 int lastZeroR=0; 237 238 byte mode=match[0], prevMode='0'; 239 int current=0, prevStreak=0; 240 int cpos=0; 241 int rpos=r.start; 242 int score=0; 243 244 if(verbose){System.err.println("\nInitial:");} 245 if(verbose){System.err.println("A: r.start="+r.start+", r.stop="+r.stop+"; rpos="+rpos+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));} 246 if(verbose){System.err.println("A: match=\n"+new String(match));} 247 if(verbose){System.err.println(new String(bases));} 248 if(verbose){System.err.println(Data.getChromosome(r.chrom).getString(r.start, Tools.max(r.stop, r.start+bases.length-1)));} 249 250 if(verbose){ 251 int calcscore=score(match); 252 System.err.println("A: score="+r.mapScore+", ss.slowScore="+ss.slowScore+", calcscore="+calcscore); 253 // assert(ss.slowScore<=calcscore); //May be lower due to ambig3. I found a case where this line fails, possibly due to long deletions? 254 } 255 256 for(int mpos=0; mpos<match.length; mpos++){ 257 byte c=match[mpos]; 258 259 if(mode==c){ 260 current++; 261 }else{ 262 if(mode=='m'){ 263 if(score<=0){ 264 score=0; 265 lastZeroC=cpos; 266 lastZeroM=mpos-current; 267 lastZeroR=rpos; 268 } 269 int add=calcMatchScore(current); 270 score+=(matchPointsMult*add); 271 // if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N 272 cpos+=current; 273 rpos+=current; 274 if(score>maxScore){ 275 maxScore=score; 276 startLocC=lastZeroC; 277 startLocM=lastZeroM; 278 startLocR=lastZeroR; 279 stopLocC=cpos-1; 280 stopLocM=mpos-1; 281 stopLocR=rpos-1; 282 } 283 }else if(mode=='S'){ 284 score+=calcSubScore(current); 285 if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N 286 else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();} 287 cpos+=current; 288 rpos+=current; 289 }else if(mode=='D'){ 290 score+=calcDelScore(current, true); 291 rpos+=current; 292 }else if(mode=='I'){ 293 score+=calcInsScore(current); 294 cpos+=current; 295 }else if(mode=='C'){ 296 cpos+=current; 297 rpos+=current; 298 }else if(mode=='X' || mode=='Y'){ 299 score+=calcInsScore(current);//TODO: Consider changing XY to subs 300 cpos+=current; 301 rpos+=current; 302 }else if(mode=='N'){ 303 score+=calcNocallScore(current); 304 cpos+=current; 305 rpos+=current; 306 }else if(mode=='R'){ 307 score+=calcNorefScore(current); 308 cpos+=current; 309 rpos+=current; 310 }else{ 311 assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match)+"\n"+new String(bases); 312 } 313 if(verbose){System.err.println("mode "+(char)mode+"->"+(char)c+"; rpos="+rpos);} 314 prevMode=mode; 315 prevStreak=current; 316 mode=c; 317 current=1; 318 } 319 } 320 if(current>0){ 321 assert(mode==match[match.length-1]); 322 if(mode=='m'){ 323 if(score<=0){ 324 score=0; 325 lastZeroC=cpos; 326 lastZeroM=match.length-current; 327 lastZeroR=rpos; 328 } 329 int add=calcMatchScore(current); 330 score+=(matchPointsMult*add); 331 // if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N 332 cpos+=current; 333 rpos+=current; 334 if(score>maxScore){ 335 maxScore=score; 336 startLocC=lastZeroC; 337 startLocM=lastZeroM; 338 startLocR=lastZeroR; 339 stopLocC=cpos-1; 340 stopLocM=match.length-1; 341 stopLocR=rpos-1; 342 } 343 }else if(mode=='S'){ 344 score+=calcSubScore(current); 345 if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N 346 else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();} 347 cpos+=current; 348 rpos+=current; 349 }else if(mode=='D'){ 350 score+=calcDelScore(current, true); 351 rpos+=current; 352 }else if(mode=='I'){ 353 score+=calcInsScore(current); 354 cpos+=current; 355 }else if(mode=='C'){ 356 cpos+=current; 357 rpos+=current; 358 }else if(mode=='X' || mode=='Y'){ 359 score+=calcInsScore(current); 360 cpos+=current; 361 rpos+=current; 362 }else if(mode=='N'){ 363 score+=calcNocallScore(current); 364 cpos+=current; 365 rpos+=current; 366 }else if(mode=='R'){ 367 score+=calcNorefScore(current); 368 cpos+=current; 369 rpos+=current; 370 }else if(mode!=0){ 371 assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match)+"\n"+new String(bases); 372 } 373 if(verbose){System.err.println("mode "+(char)mode+"->end; rpos="+rpos);} 374 } 375 376 if(startLocC<0 || stopLocC<0){ 377 //This can happen if there are zero matches. Which would be rare, but I have seen it occur. 378 r.clearMapping(); 379 // assert(false) : "Failed: "+startLocC+", "+stopLocC+"\n"+r+"\n"+r.mate+"\n"+r.toFastq()+"\n"+(r.mate==null ? "null" : r.mate.toFastq()); 380 return false; 381 } 382 383 384 if(verbose){System.err.println("A: r.start="+r.start+", r.stop="+r.stop+"; rpos="+rpos+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));} 385 386 assert(rpos==r.stop+1) : "\n\n\n"+rpos+"!="+(r.stop+1)+"\n"+r+"\n\n"+ 387 (r.topSite()==null ? "null" : r.topSite().mappedLength()+", "+r.topSite().matchLength()+", "+r.topSite().start+", "+r.topSite().stop+"\n"+r.topSite()); 388 389 if(verbose){System.err.println("B: rpos="+rpos+", startLocR="+startLocR+", stopLocR="+stopLocR);} 390 391 int headTrimR=startLocC; 392 int headTrimM=startLocM; 393 int tailTrimR=bases.length-stopLocC-1; 394 int tailTrimM=match.length-stopLocM-1; 395 396 if(verbose){System.err.println("C: headTrimR="+headTrimR+", headTrimM="+headTrimM+", tailTrimR="+tailTrimR+", tailTrimM="+tailTrimM);} 397 398 if(headTrimR<=minToClip && headTrimM<=minToClip){ 399 headTrimR=headTrimM=0; 400 } 401 if(tailTrimR<=minToClip && tailTrimM<=minToClip){ 402 tailTrimR=tailTrimM=0; 403 } 404 if(headTrimR==0 && headTrimM==0 && tailTrimR==0 && tailTrimM==0){ 405 return false; 406 } 407 //Do trimming 408 final int headDelta=headTrimR-headTrimM; 409 final int tailDelta=tailTrimR-tailTrimM; 410 final byte[] match2; 411 412 if(verbose){System.err.println("D: headTrimR="+headTrimR+", headTrimM="+headTrimM+", tailTrimR="+tailTrimR+", tailTrimM="+tailTrimM);} 413 if(verbose){System.err.println("D: headDelta="+headDelta+", tailDelta="+tailDelta);} 414 415 if(headDelta==0 && tailDelta==0){ 416 //Length-neutral trimming 417 match2=match; 418 for(int i=0; i<headTrimM; i++){match[i]='C';} 419 for(int i=match.length-tailTrimM; i<match.length; i++){match[i]='C';} 420 }else{ 421 final int newlen=match.length-headTrimM-tailTrimM+headTrimR+tailTrimR; 422 match2=new byte[newlen]; 423 for(int i=0; i<headTrimR; i++){match2[i]='C';} 424 for(int i=match2.length-tailTrimR; i<match2.length; i++){match2[i]='C';} 425 for(int i=headTrimM, i2=headTrimR, lim=match2.length-tailTrimR; i2<lim; i++, i2++){ 426 match2[i2]=match[i]; 427 } 428 } 429 430 assert(ss==null || ((ss.start==r.start) && (ss.stop==r.stop) && (ss.strand==r.strand()) && (ss.chrom==r.chrom) && (ss.match==r.match))) : 431 "\nr="+r+"\nr2="+r.mate+"\nss=\n"+ss+"\n"+(ss==null ? "ss is null" : ((ss.start==r.start)+", "+(ss.stop==r.stop)+", "+ 432 (ss.strand==r.strand())+", "+(ss.chrom==r.chrom)+", "+(ss.match==r.match))); 433 434 if(headTrimR!=0){r.start=startLocR-headTrimR;} 435 if(tailTrimR!=0){r.stop=stopLocR+tailTrimR;} 436 r.match=match2; 437 438 if(matchPointsMult!=1f){ 439 maxScore=score(match); 440 } 441 if(ss!=null){maxScore=Tools.max(maxScore, ss.slowScore);} 442 r.mapScore=maxScore; 443 444 if(verbose){System.err.println("E: r.start="+r.start+", r.stop="+r.stop);} 445 446 if(ss!=null){ 447 assert(maxScore>=ss.slowScore) : maxScore+", "+ss.slowScore+"\n"+r.toFastq(); 448 ss.match=r.match; 449 ss.setLimits(r.start, r.stop); 450 int pairedScore=ss.pairedScore>0 ? Tools.max(ss.pairedScore+(maxScore-ss.slowScore), 0) : 0; 451 } 452 453 if(!ss.perfect && ss.isPerfect(bases)){ 454 ss.perfect=ss.semiperfect=true; 455 r.setPerfect(true); 456 Arrays.fill(r.match, (byte)'m'); 457 ss.setSlowScore(maxScore); 458 }else if(!ss.semiperfect && ss.isSemiPerfect(bases)){ 459 ss.semiperfect=true; 460 ChromosomeArray cha=Data.getChromosome(ss.chrom); 461 r.match=ss.match=genMatchNoIndels(bases, cha.array, ss.start); 462 return toLocalAlignment(r, ss, basesM, minToClip, matchPointsMult); 463 } 464 return true; 465 } 466 467 468 /** Works in short or long format. */ score(byte[] match)469 public final int score(byte[] match){ 470 if(match==null || match.length<1){return 0;} 471 472 byte mode=match[0], prevMode='0'; 473 int current=0, prevStreak=0; 474 int score=0; 475 boolean hasDigit=false; 476 477 for(int mpos=0; mpos<match.length; mpos++){ 478 byte c=match[mpos]; 479 480 if(mode==c){ 481 current++; 482 }else if(Tools.isDigit(c)){ 483 current=(hasDigit ? current : 0)*10+c-'0'; 484 hasDigit=true; 485 }else{ 486 if(mode=='m'){ 487 score+=calcMatchScore(current); 488 // if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N 489 }else if(mode=='S'){ 490 score+=calcSubScore(current); 491 if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N 492 else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();} 493 }else if(mode=='D'){ 494 score+=calcDelScore(current, true); 495 }else if(mode=='I'){ 496 score+=calcInsScore(current); 497 }else if(mode=='C'){ 498 //do nothing 499 }else if(mode=='X' || mode=='Y'){ 500 score+=calcInsScore(current); 501 }else if(mode=='N'){ 502 score+=calcNocallScore(current); 503 }else if(mode=='R'){ 504 score+=calcNorefScore(current); 505 }else{ 506 assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match); 507 } 508 if(verbose){System.err.println("mode "+(char)mode+"->"+(char)c+"\tcurrent="+current+"\tscore="+score);} 509 prevMode=mode; 510 prevStreak=current; 511 mode=c; 512 current=1; 513 hasDigit=false; 514 } 515 } 516 if(current>0){ 517 assert(hasDigit || mode==match[match.length-1]); 518 if(mode=='m'){ 519 score+=calcMatchScore(current); 520 // if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N 521 }else if(mode=='S'){ 522 score+=calcSubScore(current); 523 if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N 524 else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();} 525 }else if(mode=='D'){ 526 score+=calcDelScore(current, true); 527 }else if(mode=='I'){ 528 score+=calcInsScore(current); 529 }else if(mode=='C'){ 530 //do nothing 531 }else if(mode=='X' || mode=='Y'){ 532 score+=calcInsScore(current); 533 }else if(mode=='N'){ 534 score+=calcNocallScore(current); 535 }else if(mode=='R'){ 536 score+=calcNorefScore(current); 537 }else if(mode!=0){ 538 assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match); 539 } 540 if(verbose){System.err.println("mode "+(char)mode+"->end; score="+score);} 541 } 542 543 return score; 544 } 545 maxQuality(int numBases)546 public abstract int maxQuality(int numBases); 547 maxQuality(byte[] baseScores)548 public abstract int maxQuality(byte[] baseScores); 549 maxImperfectScore(int numBases)550 public abstract int maxImperfectScore(int numBases); 551 maxImperfectScore(byte[] baseScores)552 public abstract int maxImperfectScore(byte[] baseScores); 553 toString(int[] a)554 public static final String toString(int[] a){ 555 556 int width=7; 557 558 StringBuilder sb=new StringBuilder((a.length+1)*width+2); 559 for(int num : a){ 560 String s=" "+num; 561 int spaces=width-s.length(); 562 assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces; 563 for(int i=0; i<spaces; i++){sb.append(' ');} 564 sb.append(s); 565 } 566 567 return sb.toString(); 568 } 569 printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET)570 static void printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET){ 571 for(int mode=0; mode<packed.length; mode++){ 572 printMatrix(packed, readlen, reflen, TIMEMASK, SCOREOFFSET, mode); 573 } 574 } 575 printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET, int mode)576 static void printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET, int mode){ 577 final int ylim=Tools.min(readlen+1, packed[mode].length); 578 final int xlim=Tools.min(reflen+1, packed[mode].length); 579 for(int row=0; row<ylim; row++){ 580 System.out.println(toScorePacked(packed[mode][row], SCOREOFFSET, xlim)); 581 } 582 System.out.println(); 583 for(int row=0; row<ylim; row++){ 584 System.out.println(toTimePacked(packed[mode][row], TIMEMASK, xlim)); 585 } 586 System.out.println(); 587 } 588 toTimePacked(int[] a, int TIMEMASK, int lim)589 public static final String toTimePacked(int[] a, int TIMEMASK, int lim){ 590 int width=6; 591 lim=Tools.min(lim, a.length); 592 593 StringBuilder sb=new StringBuilder((a.length+1)*width+2); 594 for(int j=0; j<lim; j++){ 595 int num=a[j]&TIMEMASK; 596 String s=" "+num; 597 int spaces=width-s.length(); 598 assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces; 599 for(int i=0; i<spaces; i++){sb.append(' ');} 600 sb.append(s); 601 } 602 603 return sb.toString(); 604 } 605 toScorePacked(int[] a, int SCOREOFFSET, int lim)606 public static final String toScorePacked(int[] a, int SCOREOFFSET, int lim){ 607 int width=6; 608 lim=Tools.min(lim, a.length); 609 610 // String minString=" -"; 611 // String maxString=" "; 612 // while(minString.length()<width){minString+='9';} 613 // while(maxString.length()<width){maxString+='9';} 614 615 String minString=" -"; 616 String maxString=" +"; 617 while(minString.length()<width){minString=minString+' ';} 618 while(maxString.length()<width){maxString=maxString+' ';} 619 620 StringBuilder sb=new StringBuilder((a.length+1)*width+2); 621 for(int j=0; j<lim; j++){ 622 int num=a[j]>>SCOREOFFSET; 623 String s=" "+num; 624 if(s.length()>width){s=num>0 ? maxString : minString;} 625 int spaces=width-s.length(); 626 assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces; 627 for(int i=0; i<spaces; i++){sb.append(' ');} 628 sb.append(s); 629 } 630 631 return sb.toString(); 632 } 633 toString(byte[] a)634 public static final String toString(byte[] a){ 635 636 int width=6; 637 638 StringBuilder sb=new StringBuilder((a.length+1)*width+2); 639 for(int num : a){ 640 String s=" "+num; 641 int spaces=width-s.length(); 642 assert(spaces>=0); 643 for(int i=0; i<spaces; i++){sb.append(' ');} 644 sb.append(s); 645 } 646 647 return sb.toString(); 648 } 649 toString(byte[] ref, int startLoc, int stopLoc)650 public static final String toString(byte[] ref, int startLoc, int stopLoc){ 651 StringBuilder sb=new StringBuilder(stopLoc-startLoc+1); 652 for(int i=startLoc; i<=stopLoc; i++){sb.append((char)ref[i]);} 653 return sb.toString(); 654 } 655 calcMatchScore(int len)656 public final int calcMatchScore(int len){ 657 assert(len>0) : len; 658 return POINTS_MATCH()+(len-1)*POINTS_MATCH2(); 659 } 660 calcSubScore(int len)661 public final int calcSubScore(int len){ 662 assert(len>0) : len; 663 final int lim3=LIMIT_FOR_COST_3(); 664 int score=POINTS_SUB(); 665 if(len>lim3){ 666 score+=(len-lim3)*POINTS_SUB3(); 667 len=lim3; 668 } 669 if(len>1){ 670 score+=(len-1)*POINTS_SUB2(); 671 } 672 return score; 673 } 674 calcNorefScore(int len)675 public final int calcNorefScore(int len){return len*POINTS_NOREF();} 676 calcNocallScore(int len)677 public final int calcNocallScore(int len){return len*POINTS_NOCALL();} 678 calcDelScore(int len, boolean approximateGaps)679 public abstract int calcDelScore(int len, boolean approximateGaps); 680 calcInsScore(int len)681 public abstract int calcInsScore(int len); 682 minIdToMinRatio(double minid, String classname)683 public static final float minIdToMinRatio(double minid, String classname){ 684 if("MultiStateAligner9ts".equalsIgnoreCase(classname)){ 685 return MultiStateAligner9ts.minIdToMinRatio(minid); 686 }else if("MultiStateAligner10ts".equalsIgnoreCase(classname)){ 687 return MultiStateAligner10ts.minIdToMinRatio(minid); 688 }else if("MultiStateAligner11ts".equalsIgnoreCase(classname)){ 689 return MultiStateAligner11ts.minIdToMinRatio(minid); 690 }else if("MultiStateAligner9PacBio".equalsIgnoreCase(classname)){ 691 return MultiStateAligner9PacBio.minIdToMinRatio(minid); 692 }else if("MultiStateAligner9Flat".equalsIgnoreCase(classname)){ 693 return MultiStateAligner9Flat.minIdToMinRatio(minid); 694 }else if("MultiStateAligner9XFlat".equalsIgnoreCase(classname)){ 695 return MultiStateAligner9XFlat.minIdToMinRatio(minid); 696 }else{ 697 assert(false) : "Unhandled MSA type: "+classname; 698 return MultiStateAligner11ts.minIdToMinRatio(minid); 699 } 700 } 701 702 static final int GAPBUFFER=Shared.GAPBUFFER; 703 static final int GAPBUFFER2=Shared.GAPBUFFER2; 704 static final int GAPLEN=Shared.GAPLEN; 705 static final int MINGAP=Shared.MINGAP; 706 static final int GAPCOST=Shared.GAPCOST; 707 static final byte GAPC=Shared.GAPC; 708 709 /** Seemingly to clear out prior data from the gref. Not sure what else it's used for. */ 710 static final int GREFLIMIT2_CUSHION=128; //Tools.max(GAPBUFFER2, GAPLEN); 711 712 713 /**DO NOT MODIFY*/ getGrefbuffer()714 public abstract byte[] getGrefbuffer(); 715 716 // public final int[] vertLimit; 717 // public final int[] horizLimit; 718 showVertLimit()719 public abstract CharSequence showVertLimit(); showHorizLimit()720 public abstract CharSequence showHorizLimit(); 721 722 //// public static final int MODEBITS=2; 723 // public static final int TIMEBITS=11; 724 // public static final int SCOREBITS=32-TIMEBITS; 725 // public static final int MAX_TIME=((1<<TIMEBITS)-1); 726 // public static final int MAX_SCORE=((1<<(SCOREBITS-1))-1)-2000; 727 // public static final int MIN_SCORE=0-MAX_SCORE; //Keeps it 1 point above "BAD". 728 // 729 //// public static final int MODEOFFSET=0; //Always zero. 730 //// public static final int TIMEOFFSET=0; SCOREOFFSET()731 public abstract int SCOREOFFSET(); 732 // 733 //// public static final int MODEMASK=~((-1)<<MODEBITS); 734 //// public static final int TIMEMASK=(~((-1)<<TIMEBITS))<<TIMEOFFSET; 735 // public static final int TIMEMASK=~((-1)<<TIMEBITS); 736 // public static final int SCOREMASK=(~((-1)<<SCOREBITS))<<SCOREOFFSET; 737 738 static final byte MODE_MS=0; 739 static final byte MODE_DEL=1; 740 static final byte MODE_INS=2; 741 static final byte MODE_SUB=3; 742 743 //These are to allow constants to be overridden POINTS_NOREF()744 public abstract int POINTS_NOREF(); POINTS_NOCALL()745 public abstract int POINTS_NOCALL(); POINTS_MATCH()746 public abstract int POINTS_MATCH(); POINTS_MATCH2()747 public abstract int POINTS_MATCH2(); POINTS_COMPATIBLE()748 public abstract int POINTS_COMPATIBLE(); POINTS_SUB()749 public abstract int POINTS_SUB(); POINTS_SUBR()750 public abstract int POINTS_SUBR(); POINTS_SUB2()751 public abstract int POINTS_SUB2(); POINTS_SUB3()752 public abstract int POINTS_SUB3(); POINTS_MATCHSUB()753 public abstract int POINTS_MATCHSUB(); POINTS_INS()754 public abstract int POINTS_INS(); POINTS_INS2()755 public abstract int POINTS_INS2(); POINTS_INS3()756 public abstract int POINTS_INS3(); POINTS_INS4()757 public abstract int POINTS_INS4(); POINTS_DEL()758 public abstract int POINTS_DEL(); POINTS_DEL2()759 public abstract int POINTS_DEL2(); POINTS_DEL3()760 public abstract int POINTS_DEL3(); POINTS_DEL4()761 public abstract int POINTS_DEL4(); POINTS_DEL5()762 public abstract int POINTS_DEL5(); POINTS_DEL_REF_N()763 public abstract int POINTS_DEL_REF_N(); POINTS_GAP()764 public abstract int POINTS_GAP(); 765 TIMESLIP()766 public abstract int TIMESLIP(); MASK5()767 public abstract int MASK5(); 768 BARRIER_I1()769 abstract int BARRIER_I1(); BARRIER_D1()770 abstract int BARRIER_D1(); 771 LIMIT_FOR_COST_3()772 public abstract int LIMIT_FOR_COST_3(); LIMIT_FOR_COST_4()773 public abstract int LIMIT_FOR_COST_4(); LIMIT_FOR_COST_5()774 public abstract int LIMIT_FOR_COST_5(); 775 BAD()776 public abstract int BAD(); 777 778 779 public final int maxRows; 780 public final int maxColumns; 781 782 public long iterationsLimited=0; 783 public long iterationsUnlimited=0; 784 785 public boolean verbose=false; 786 public boolean verbose2=false; 787 788 public static int bandwidth=0; 789 public static float bandwidthRatio=0; 790 public static boolean flatMode=false; 791 792 public static final int MIN_SCORE_ADJUST=120; 793 794 } 795