1 package align2;
2 
3 import java.util.Arrays;
4 
5 import dna.ChromosomeArray;
6 import dna.Data;
7 import shared.Shared;
8 import shared.Tools;
9 import stream.Read;
10 import stream.SiteScore;
11 
12 /**
13  * @author Brian Bushnell
14  * @date Jun 20, 2013
15  *
16  */
17 public abstract class MSA {
18 
makeMSA(int maxRows_, int maxColumns_, String classname)19 	public static final MSA makeMSA(int maxRows_, int maxColumns_, String classname){
20 		flatMode=false;
21 		if("MultiStateAligner9ts".equalsIgnoreCase(classname)){
22 			return new MultiStateAligner9ts(maxRows_, maxColumns_);
23 		}else if("MultiStateAligner10ts".equalsIgnoreCase(classname)){
24 			return new MultiStateAligner10ts(maxRows_, maxColumns_);
25 		}else if("MultiStateAligner11ts".equalsIgnoreCase(classname)){
26 			if(Shared.USE_JNI){
27 				return new MultiStateAligner11tsJNI(maxRows_, maxColumns_);
28 			}else{
29 				return new MultiStateAligner11ts(maxRows_, maxColumns_);
30 			}
31 		}else if("MultiStateAligner11tsJNI".equalsIgnoreCase(classname)){
32 			return new MultiStateAligner11tsJNI(maxRows_, maxColumns_);
33 		}else if("MultiStateAligner9PacBio".equalsIgnoreCase(classname)){
34 			return new MultiStateAligner9PacBio(maxRows_, maxColumns_);
35 		}else if("MultiStateAligner9Flat".equalsIgnoreCase(classname)){
36 			return new MultiStateAligner9Flat(maxRows_, maxColumns_);
37 		}else if("MultiStateAligner9XFlat".equalsIgnoreCase(classname)){
38 			flatMode=true;
39 			return new MultiStateAligner9XFlat(maxRows_, maxColumns_);
40 		}else{
41 			assert(false) : "Unhandled MSA type: "+classname;
42 			return new MultiStateAligner11ts(maxRows_, maxColumns_);
43 		}
44 	}
45 
MSA(int maxRows_, int maxColumns_)46 	public MSA(int maxRows_, int maxColumns_){
47 		maxRows=maxRows_;
48 		maxColumns=maxColumns_;
49 	}
50 
51 	/** return new int[] {rows, maxC, maxS, max};
52 	 * Will not fill areas that cannot match minScore */
fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps)53 	public abstract int[] fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps);
54 
55 
56 	/** return new int[] {rows, maxC, maxS, max};
57 	 * Will not fill areas that cannot match minScore */
fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int[] gaps)58 	public abstract int[] fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int[] gaps);
59 
60 	@Deprecated
61 	/** return new int[] {rows, maxC, maxS, max}; */
fillQ(byte[] read, byte[] ref, byte[] baseScores, int refStartLoc, int refEndLoc)62 	public abstract int[] fillQ(byte[] read, byte[] ref, byte[] baseScores, int refStartLoc, int refEndLoc);
63 
64 
65 	/** @return {score, bestRefStart, bestRefStop} */
66 	/** Generates the match string */
traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state, boolean gapped)67 	public abstract byte[] traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state, boolean gapped);
68 
69 
70 	/** Generates the match string */
traceback2(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state)71 	public abstract byte[] traceback2(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state);
72 
73 	/** @return {score, bestRefStart, bestRefStop} */
score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, final int maxRow, final int maxCol, final int maxState, boolean gapped)74 	public abstract int[] score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc,
75 			final int maxRow, final int maxCol, final int maxState, boolean gapped);
76 
77 	/** @return {score, bestRefStart, bestRefStop}, or {score, bestRefStart, bestRefStop, padLeft, padRight} if more padding is needed */
score2(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, final int maxRow, final int maxCol, final int maxState)78 	public abstract int[] score2(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc,
79 			final int maxRow, final int maxCol, final int maxState);
80 
81 
82 	/** Will not fill areas that cannot match minScore.
83 	 * @return {score, bestRefStart, bestRefStop}  */
fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps)84 	public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore, int[] gaps){
85 		int a=Tools.max(0, refStartLoc);
86 		int b=Tools.min(ref.length-1, refEndLoc);
87 		assert(b>=a);
88 
89 		int[] score;
90 
91 		if(verbose && b-a<500){
92 			System.err.println(new String(read));
93 			System.err.println(new String(ref, a, b-a));
94 		}
95 
96 		if(gaps==null){
97 			if(verbose){
98 				System.err.println("no gaps");
99 			}
100 			if(b-a>=maxColumns){
101 				System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns);
102 				assert(false) : refStartLoc+", "+refEndLoc;
103 				b=Tools.min(ref.length-1, a+maxColumns-1);
104 			}
105 			int[] max=fillLimited(read, ref, a, b, minScore, gaps);
106 			score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2], false));
107 		}else{
108 			if(verbose){System.err.println("\ngaps: "+Arrays.toString(gaps)+"\n"+new String(read)+"\ncoords: "+refStartLoc+", "+refEndLoc);}
109 			int[] max=fillLimited(read, ref, a, b, minScore, gaps);
110 			if(verbose){System.err.println("max: "+Arrays.toString(max));}
111 //			score=(max==null ? null : score(read, grefbuffer, 0, greflimit, max[0], max[1], max[2], true));
112 			score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2], true));
113 		}
114 		return score;
115 	}
116 
fillAndScoreLimited(byte[] read, SiteScore ss, int thresh, int minScore)117 	public final int[] fillAndScoreLimited(byte[] read, SiteScore ss, int thresh, int minScore){
118 		return fillAndScoreLimited(read, ss.chrom, ss.start, ss.stop, thresh, minScore, ss.gaps);
119 	}
120 
121 //	public final int[] translateScoreFromGappedCoordinate(int[] score)
122 
fillAndScoreLimited(byte[] read, int chrom, int start, int stop, int thresh, int minScore, int[] gaps)123 	public final int[] fillAndScoreLimited(byte[] read, int chrom, int start, int stop, int thresh, int minScore, int[] gaps){
124 		return fillAndScoreLimited(read, Data.getChromosome(chrom).array, start-thresh, stop+thresh, minScore, gaps);
125 	}
126 
127 	@Deprecated
fillAndScoreQ(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, byte[] baseScores)128 	public final int[] fillAndScoreQ(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, byte[] baseScores){
129 		int a=Tools.max(0, refStartLoc);
130 		int b=Tools.min(ref.length-1, refEndLoc);
131 		assert(b>=a);
132 		if(b-a>=maxColumns){
133 			System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns);
134 			b=Tools.min(ref.length-1, a+maxColumns-1);
135 		}
136 		int[] max=fillQ(read, ref, baseScores, a, b);
137 //		int[] score=score(read, ref,  a, b, max[0], max[1], max[2]);
138 //		return score;
139 		return null;
140 	}
141 
142 	@Deprecated
fillAndScoreQ(byte[] read, SiteScore ss, int thresh, byte[] baseScores)143 	public final int[] fillAndScoreQ(byte[] read, SiteScore ss, int thresh, byte[] baseScores){
144 		return fillAndScoreQ(read, ss.chrom, ss.start, ss.stop, thresh, baseScores);
145 	}
146 
147 	@Deprecated
fillAndScoreQ(byte[] read, int chrom, int start, int stop, int thresh, byte[] baseScores)148 	public final int[] fillAndScoreQ(byte[] read, int chrom, int start, int stop, int thresh, byte[] baseScores){
149 		return fillAndScoreQ(read, Data.getChromosome(chrom).array, start-thresh, stop+thresh, baseScores);
150 	}
151 
scoreNoIndels(byte[] read, SiteScore ss)152 	public final int scoreNoIndels(byte[] read, SiteScore ss){
153 		ChromosomeArray cha=Data.getChromosome(ss.chrom);
154 		return scoreNoIndels(read, cha.array, ss.start);
155 	}
156 
scoreNoIndels(byte[] read, final int chrom, final int refStart)157 	public final int scoreNoIndels(byte[] read, final int chrom, final int refStart){
158 		ChromosomeArray cha=Data.getChromosome(chrom);
159 		return scoreNoIndels(read, cha.array, refStart);
160 	}
161 
scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores)162 	public final int scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores){
163 		ChromosomeArray cha=Data.getChromosome(ss.chrom);
164 		return scoreNoIndels(read, cha.array, baseScores, ss.start);
165 	}
166 
scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores)167 	public final int scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores){
168 		ChromosomeArray cha=Data.getChromosome(chrom);
169 		return scoreNoIndels(read, cha.array, baseScores, refStart);
170 	}
171 
172 //	public final int scoreNoIndels(byte[] read, final int chrom, final int refStart){
173 
174 	/** Calculates score based on an array from Index. */
calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases)175 	public abstract int calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases);
176 
177 	/** Calculates score based on an array from Index using a kfilter.  Slightly slower. */
calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases, int minContig)178 	public abstract int calcAffineScore(int[] locArray, byte[] baseScores, byte[] bases, int minContig);
179 
scoreNoIndels(byte[] read, byte[] ref, final int refStart)180 	public abstract int scoreNoIndels(byte[] read, byte[] ref, final int refStart);
scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss)181 	public int scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss){
182 		throw new RuntimeException("Unimplemented method in class "+this.getClass());
183 	}
184 
genMatchNoIndels(byte[] read, byte[] ref, final int refStart)185 	public abstract byte[] genMatchNoIndels(byte[] read, byte[] ref, final int refStart);
186 
scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart)187 	public abstract int scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart);
scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss)188 	public int scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss){
189 		throw new RuntimeException("Unimplemented method in class "+this.getClass());
190 	}
191 
scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn)192 	public abstract int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn);
193 
scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn)194 	public abstract int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn);
195 
196 	/** Assumes match string is in long format */
toLocalAlignment(Read r, SiteScore ss, byte[] basesM, int minToClip, float matchPointsMult)197 	public final boolean toLocalAlignment(Read r, SiteScore ss, byte[] basesM, int minToClip, float matchPointsMult){
198 		final byte[] match=r.match, bases=(r.strand()==Shared.PLUS ? r.bases : basesM);
199 		if(match==null || match.length<1){return false;}
200 
201 		assert(match==ss.match);
202 		assert(match==r.match);
203 		assert(r.start==ss.start);
204 		assert(r.stop==ss.stop);
205 
206 		if(r.containsXY2()){
207 			if(verbose){System.err.println("\nInitial0:");}
208 			if(verbose){System.err.println("0: match="+new String(match));}
209 			if(verbose){System.err.println("0: r.start="+r.start+", r.stop="+r.stop+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));}
210 			ss.fixXY(bases, false, this);
211 			r.start=ss.start;
212 			r.stop=ss.stop;
213 			if(verbose){System.err.println("\nAfter fixXY:");}
214 			if(verbose){System.err.println("0: match="+new String(match));}
215 			if(verbose){System.err.println("0: r.start="+r.start+", r.stop="+r.stop+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));}
216 			assert(match==ss.match);
217 			assert(match==r.match);
218 			assert(r.start==ss.start);
219 			assert(r.stop==ss.stop);
220 			assert(ss.lengthsAgree()) : ss.mappedLength()+"!="+ss.matchLength()+"\n"+ss+"\n\n"+r+"\n";
221 		}
222 		assert(ss.lengthsAgree()) : ss.mappedLength()+"!="+ss.matchLength()+"\n"+ss+"\n\n"+r+"\n";
223 
224 		int maxScore=-1;
225 
226 		int startLocC=-1;
227 		int stopLocC=-1;
228 		int lastZeroC=0;
229 
230 		int startLocM=-1;
231 		int stopLocM=-1;
232 		int lastZeroM=0;
233 
234 		int startLocR=-1;
235 		int stopLocR=-1;
236 		int lastZeroR=0;
237 
238 		byte mode=match[0], prevMode='0';
239 		int current=0, prevStreak=0;
240 		int cpos=0;
241 		int rpos=r.start;
242 		int score=0;
243 
244 		if(verbose){System.err.println("\nInitial:");}
245 		if(verbose){System.err.println("A: r.start="+r.start+", r.stop="+r.stop+"; rpos="+rpos+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));}
246 		if(verbose){System.err.println("A: match=\n"+new String(match));}
247 		if(verbose){System.err.println(new String(bases));}
248 		if(verbose){System.err.println(Data.getChromosome(r.chrom).getString(r.start, Tools.max(r.stop, r.start+bases.length-1)));}
249 
250 		if(verbose){
251 			int calcscore=score(match);
252 			System.err.println("A: score="+r.mapScore+", ss.slowScore="+ss.slowScore+", calcscore="+calcscore);
253 //			assert(ss.slowScore<=calcscore); //May be lower due to ambig3.  I found a case where this line fails, possibly due to long deletions?
254 		}
255 
256 		for(int mpos=0; mpos<match.length; mpos++){
257 			byte c=match[mpos];
258 
259 			if(mode==c){
260 				current++;
261 			}else{
262 				if(mode=='m'){
263 					if(score<=0){
264 						score=0;
265 						lastZeroC=cpos;
266 						lastZeroM=mpos-current;
267 						lastZeroR=rpos;
268 					}
269 					int add=calcMatchScore(current);
270 					score+=(matchPointsMult*add);
271 //					if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N
272 					cpos+=current;
273 					rpos+=current;
274 					if(score>maxScore){
275 						maxScore=score;
276 						startLocC=lastZeroC;
277 						startLocM=lastZeroM;
278 						startLocR=lastZeroR;
279 						stopLocC=cpos-1;
280 						stopLocM=mpos-1;
281 						stopLocR=rpos-1;
282 					}
283 				}else if(mode=='S'){
284 					score+=calcSubScore(current);
285 					if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N
286 					else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();}
287 					cpos+=current;
288 					rpos+=current;
289 				}else if(mode=='D'){
290 					score+=calcDelScore(current, true);
291 					rpos+=current;
292 				}else if(mode=='I'){
293 					score+=calcInsScore(current);
294 					cpos+=current;
295 				}else if(mode=='C'){
296 					cpos+=current;
297 					rpos+=current;
298 				}else if(mode=='X' || mode=='Y'){
299 					score+=calcInsScore(current);//TODO: Consider changing XY to subs
300 					cpos+=current;
301 					rpos+=current;
302 				}else if(mode=='N'){
303 					score+=calcNocallScore(current);
304 					cpos+=current;
305 					rpos+=current;
306 				}else if(mode=='R'){
307 					score+=calcNorefScore(current);
308 					cpos+=current;
309 					rpos+=current;
310 				}else{
311 					assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match)+"\n"+new String(bases);
312 				}
313 				if(verbose){System.err.println("mode "+(char)mode+"->"+(char)c+"; rpos="+rpos);}
314 				prevMode=mode;
315 				prevStreak=current;
316 				mode=c;
317 				current=1;
318 			}
319 		}
320 		if(current>0){
321 			assert(mode==match[match.length-1]);
322 			if(mode=='m'){
323 				if(score<=0){
324 					score=0;
325 					lastZeroC=cpos;
326 					lastZeroM=match.length-current;
327 					lastZeroR=rpos;
328 				}
329 				int add=calcMatchScore(current);
330 				score+=(matchPointsMult*add);
331 //				if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N
332 				cpos+=current;
333 				rpos+=current;
334 				if(score>maxScore){
335 					maxScore=score;
336 					startLocC=lastZeroC;
337 					startLocM=lastZeroM;
338 					startLocR=lastZeroR;
339 					stopLocC=cpos-1;
340 					stopLocM=match.length-1;
341 					stopLocR=rpos-1;
342 				}
343 			}else if(mode=='S'){
344 				score+=calcSubScore(current);
345 				if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N
346 				else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();}
347 				cpos+=current;
348 				rpos+=current;
349 			}else if(mode=='D'){
350 				score+=calcDelScore(current, true);
351 				rpos+=current;
352 			}else if(mode=='I'){
353 				score+=calcInsScore(current);
354 				cpos+=current;
355 			}else if(mode=='C'){
356 				cpos+=current;
357 				rpos+=current;
358 			}else if(mode=='X' || mode=='Y'){
359 				score+=calcInsScore(current);
360 				cpos+=current;
361 				rpos+=current;
362 			}else if(mode=='N'){
363 				score+=calcNocallScore(current);
364 				cpos+=current;
365 				rpos+=current;
366 			}else if(mode=='R'){
367 				score+=calcNorefScore(current);
368 				cpos+=current;
369 				rpos+=current;
370 			}else if(mode!=0){
371 				assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match)+"\n"+new String(bases);
372 			}
373 			if(verbose){System.err.println("mode "+(char)mode+"->end; rpos="+rpos);}
374 		}
375 
376 		if(startLocC<0 || stopLocC<0){
377 			//This can happen if there are zero matches.  Which would be rare, but I have seen it occur.
378 			r.clearMapping();
379 //			assert(false) : "Failed: "+startLocC+", "+stopLocC+"\n"+r+"\n"+r.mate+"\n"+r.toFastq()+"\n"+(r.mate==null ? "null" : r.mate.toFastq());
380 			return false;
381 		}
382 
383 
384 		if(verbose){System.err.println("A: r.start="+r.start+", r.stop="+r.stop+"; rpos="+rpos+"; len="+bases.length+"; reflen="+(r.stop-r.start+1));}
385 
386 		assert(rpos==r.stop+1) : "\n\n\n"+rpos+"!="+(r.stop+1)+"\n"+r+"\n\n"+
387 			(r.topSite()==null ? "null" : r.topSite().mappedLength()+", "+r.topSite().matchLength()+", "+r.topSite().start+", "+r.topSite().stop+"\n"+r.topSite());
388 
389 		if(verbose){System.err.println("B: rpos="+rpos+", startLocR="+startLocR+", stopLocR="+stopLocR);}
390 
391 		int headTrimR=startLocC;
392 		int headTrimM=startLocM;
393 		int tailTrimR=bases.length-stopLocC-1;
394 		int tailTrimM=match.length-stopLocM-1;
395 
396 		if(verbose){System.err.println("C: headTrimR="+headTrimR+", headTrimM="+headTrimM+", tailTrimR="+tailTrimR+", tailTrimM="+tailTrimM);}
397 
398 		if(headTrimR<=minToClip && headTrimM<=minToClip){
399 			headTrimR=headTrimM=0;
400 		}
401 		if(tailTrimR<=minToClip && tailTrimM<=minToClip){
402 			tailTrimR=tailTrimM=0;
403 		}
404 		if(headTrimR==0 && headTrimM==0 && tailTrimR==0 && tailTrimM==0){
405 			return false;
406 		}
407 		//Do trimming
408 		final int headDelta=headTrimR-headTrimM;
409 		final int tailDelta=tailTrimR-tailTrimM;
410 		final byte[] match2;
411 
412 		if(verbose){System.err.println("D: headTrimR="+headTrimR+", headTrimM="+headTrimM+", tailTrimR="+tailTrimR+", tailTrimM="+tailTrimM);}
413 		if(verbose){System.err.println("D: headDelta="+headDelta+", tailDelta="+tailDelta);}
414 
415 		if(headDelta==0 && tailDelta==0){
416 			//Length-neutral trimming
417 			match2=match;
418 			for(int i=0; i<headTrimM; i++){match[i]='C';}
419 			for(int i=match.length-tailTrimM; i<match.length; i++){match[i]='C';}
420 		}else{
421 			final int newlen=match.length-headTrimM-tailTrimM+headTrimR+tailTrimR;
422 			match2=new byte[newlen];
423 			for(int i=0; i<headTrimR; i++){match2[i]='C';}
424 			for(int i=match2.length-tailTrimR; i<match2.length; i++){match2[i]='C';}
425 			for(int i=headTrimM, i2=headTrimR, lim=match2.length-tailTrimR; i2<lim; i++, i2++){
426 				match2[i2]=match[i];
427 			}
428 		}
429 
430 		assert(ss==null || ((ss.start==r.start) && (ss.stop==r.stop) && (ss.strand==r.strand()) && (ss.chrom==r.chrom) && (ss.match==r.match))) :
431 			"\nr="+r+"\nr2="+r.mate+"\nss=\n"+ss+"\n"+(ss==null ? "ss is null" : ((ss.start==r.start)+", "+(ss.stop==r.stop)+", "+
432 			(ss.strand==r.strand())+", "+(ss.chrom==r.chrom)+", "+(ss.match==r.match)));
433 
434 		if(headTrimR!=0){r.start=startLocR-headTrimR;}
435 		if(tailTrimR!=0){r.stop=stopLocR+tailTrimR;}
436 		r.match=match2;
437 
438 		if(matchPointsMult!=1f){
439 			maxScore=score(match);
440 		}
441 		if(ss!=null){maxScore=Tools.max(maxScore, ss.slowScore);}
442 		r.mapScore=maxScore;
443 
444 		if(verbose){System.err.println("E: r.start="+r.start+", r.stop="+r.stop);}
445 
446 		if(ss!=null){
447 			assert(maxScore>=ss.slowScore) : maxScore+", "+ss.slowScore+"\n"+r.toFastq();
448 			ss.match=r.match;
449 			ss.setLimits(r.start, r.stop);
450 			int pairedScore=ss.pairedScore>0 ? Tools.max(ss.pairedScore+(maxScore-ss.slowScore), 0) : 0;
451 		}
452 
453 		if(!ss.perfect && ss.isPerfect(bases)){
454 			ss.perfect=ss.semiperfect=true;
455 			r.setPerfect(true);
456 			Arrays.fill(r.match, (byte)'m');
457 			ss.setSlowScore(maxScore);
458 		}else if(!ss.semiperfect && ss.isSemiPerfect(bases)){
459 			ss.semiperfect=true;
460 			ChromosomeArray cha=Data.getChromosome(ss.chrom);
461 			r.match=ss.match=genMatchNoIndels(bases, cha.array, ss.start);
462 			return toLocalAlignment(r, ss, basesM, minToClip, matchPointsMult);
463 		}
464 		return true;
465 	}
466 
467 
468 	/** Works in short or long format. */
score(byte[] match)469 	public final int score(byte[] match){
470 		if(match==null || match.length<1){return 0;}
471 
472 		byte mode=match[0], prevMode='0';
473 		int current=0, prevStreak=0;
474 		int score=0;
475 		boolean hasDigit=false;
476 
477 		for(int mpos=0; mpos<match.length; mpos++){
478 			byte c=match[mpos];
479 
480 			if(mode==c){
481 				current++;
482 			}else if(Tools.isDigit(c)){
483 				current=(hasDigit ? current : 0)*10+c-'0';
484 				hasDigit=true;
485 			}else{
486 				if(mode=='m'){
487 					score+=calcMatchScore(current);
488 //					if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N
489 				}else if(mode=='S'){
490 					score+=calcSubScore(current);
491 					if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N
492 					else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();}
493 				}else if(mode=='D'){
494 					score+=calcDelScore(current, true);
495 				}else if(mode=='I'){
496 					score+=calcInsScore(current);
497 				}else if(mode=='C'){
498 					//do nothing
499 				}else if(mode=='X' || mode=='Y'){
500 					score+=calcInsScore(current);
501 				}else if(mode=='N'){
502 					score+=calcNocallScore(current);
503 				}else if(mode=='R'){
504 					score+=calcNorefScore(current);
505 				}else{
506 					assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match);
507 				}
508 				if(verbose){System.err.println("mode "+(char)mode+"->"+(char)c+"\tcurrent="+current+"\tscore="+score);}
509 				prevMode=mode;
510 				prevStreak=current;
511 				mode=c;
512 				current=1;
513 				hasDigit=false;
514 			}
515 		}
516 		if(current>0){
517 			assert(hasDigit || mode==match[match.length-1]);
518 			if(mode=='m'){
519 				score+=calcMatchScore(current);
520 //				if(prevMode=='N' || prevMode=='R'){score=score+POINTS_MATCH2()-POINTS_MATCH();} //Don't penalize first match after N
521 			}else if(mode=='S'){
522 				score+=calcSubScore(current);
523 				if(prevMode=='N' || prevMode=='R'){score=score+POINTS_SUB2()-POINTS_SUB();} //Don't penalize first sub after N
524 				else if(prevMode=='m' && prevStreak<2){score=score+POINTS_SUBR()-POINTS_SUB();}
525 			}else if(mode=='D'){
526 				score+=calcDelScore(current, true);
527 			}else if(mode=='I'){
528 				score+=calcInsScore(current);
529 			}else if(mode=='C'){
530 				//do nothing
531 			}else if(mode=='X' || mode=='Y'){
532 				score+=calcInsScore(current);
533 			}else if(mode=='N'){
534 				score+=calcNocallScore(current);
535 			}else if(mode=='R'){
536 				score+=calcNorefScore(current);
537 			}else if(mode!=0){
538 				assert(false) : "Unhandled symbol "+mode+"\n"+(char)mode+"\n"+new String(match);
539 			}
540 			if(verbose){System.err.println("mode "+(char)mode+"->end; score="+score);}
541 		}
542 
543 		return score;
544 	}
545 
maxQuality(int numBases)546 	public abstract int maxQuality(int numBases);
547 
maxQuality(byte[] baseScores)548 	public abstract int maxQuality(byte[] baseScores);
549 
maxImperfectScore(int numBases)550 	public abstract int maxImperfectScore(int numBases);
551 
maxImperfectScore(byte[] baseScores)552 	public abstract int maxImperfectScore(byte[] baseScores);
553 
toString(int[] a)554 	public static final String toString(int[] a){
555 
556 		int width=7;
557 
558 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
559 		for(int num : a){
560 			String s=" "+num;
561 			int spaces=width-s.length();
562 			assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
563 			for(int i=0; i<spaces; i++){sb.append(' ');}
564 			sb.append(s);
565 		}
566 
567 		return sb.toString();
568 	}
569 
printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET)570 	static void printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET){
571 		for(int mode=0; mode<packed.length; mode++){
572 			printMatrix(packed, readlen, reflen, TIMEMASK, SCOREOFFSET, mode);
573 		}
574 	}
575 
printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET, int mode)576 	static void printMatrix(int[][][] packed, int readlen, int reflen, int TIMEMASK, int SCOREOFFSET, int mode){
577 		final int ylim=Tools.min(readlen+1, packed[mode].length);
578 		final int xlim=Tools.min(reflen+1, packed[mode].length);
579 		for(int row=0; row<ylim; row++){
580 			System.out.println(toScorePacked(packed[mode][row], SCOREOFFSET, xlim));
581 		}
582 		System.out.println();
583 		for(int row=0; row<ylim; row++){
584 			System.out.println(toTimePacked(packed[mode][row], TIMEMASK, xlim));
585 		}
586 		System.out.println();
587 	}
588 
toTimePacked(int[] a, int TIMEMASK, int lim)589 	public static final String toTimePacked(int[] a, int TIMEMASK, int lim){
590 		int width=6;
591 		lim=Tools.min(lim, a.length);
592 
593 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
594 		for(int j=0; j<lim; j++){
595 			int num=a[j]&TIMEMASK;
596 			String s=" "+num;
597 			int spaces=width-s.length();
598 			assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
599 			for(int i=0; i<spaces; i++){sb.append(' ');}
600 			sb.append(s);
601 		}
602 
603 		return sb.toString();
604 	}
605 
toScorePacked(int[] a, int SCOREOFFSET, int lim)606 	public static final String toScorePacked(int[] a, int SCOREOFFSET, int lim){
607 		int width=6;
608 		lim=Tools.min(lim, a.length);
609 
610 //		String minString=" -";
611 //		String maxString="  ";
612 //		while(minString.length()<width){minString+='9';}
613 //		while(maxString.length()<width){maxString+='9';}
614 
615 		String minString=" -";
616 		String maxString=" +";
617 		while(minString.length()<width){minString=minString+' ';}
618 		while(maxString.length()<width){maxString=maxString+' ';}
619 
620 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
621 		for(int j=0; j<lim; j++){
622 			int num=a[j]>>SCOREOFFSET;
623 			String s=" "+num;
624 			if(s.length()>width){s=num>0 ? maxString : minString;}
625 			int spaces=width-s.length();
626 			assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
627 			for(int i=0; i<spaces; i++){sb.append(' ');}
628 			sb.append(s);
629 		}
630 
631 		return sb.toString();
632 	}
633 
toString(byte[] a)634 	public static final String toString(byte[] a){
635 
636 		int width=6;
637 
638 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
639 		for(int num : a){
640 			String s=" "+num;
641 			int spaces=width-s.length();
642 			assert(spaces>=0);
643 			for(int i=0; i<spaces; i++){sb.append(' ');}
644 			sb.append(s);
645 		}
646 
647 		return sb.toString();
648 	}
649 
toString(byte[] ref, int startLoc, int stopLoc)650 	public static final String toString(byte[] ref, int startLoc, int stopLoc){
651 		StringBuilder sb=new StringBuilder(stopLoc-startLoc+1);
652 		for(int i=startLoc; i<=stopLoc; i++){sb.append((char)ref[i]);}
653 		return sb.toString();
654 	}
655 
calcMatchScore(int len)656 	public final int calcMatchScore(int len){
657 		assert(len>0) : len;
658 		return POINTS_MATCH()+(len-1)*POINTS_MATCH2();
659 	}
660 
calcSubScore(int len)661 	public final int calcSubScore(int len){
662 		assert(len>0) : len;
663 		final int lim3=LIMIT_FOR_COST_3();
664 		int score=POINTS_SUB();
665 		if(len>lim3){
666 			score+=(len-lim3)*POINTS_SUB3();
667 			len=lim3;
668 		}
669 		if(len>1){
670 			score+=(len-1)*POINTS_SUB2();
671 		}
672 		return score;
673 	}
674 
calcNorefScore(int len)675 	public final int calcNorefScore(int len){return len*POINTS_NOREF();}
676 
calcNocallScore(int len)677 	public final int calcNocallScore(int len){return len*POINTS_NOCALL();}
678 
calcDelScore(int len, boolean approximateGaps)679 	public abstract int calcDelScore(int len, boolean approximateGaps);
680 
calcInsScore(int len)681 	public abstract int calcInsScore(int len);
682 
minIdToMinRatio(double minid, String classname)683 	public static final float minIdToMinRatio(double minid, String classname){
684 		if("MultiStateAligner9ts".equalsIgnoreCase(classname)){
685 			return MultiStateAligner9ts.minIdToMinRatio(minid);
686 		}else if("MultiStateAligner10ts".equalsIgnoreCase(classname)){
687 			return MultiStateAligner10ts.minIdToMinRatio(minid);
688 		}else if("MultiStateAligner11ts".equalsIgnoreCase(classname)){
689 			return MultiStateAligner11ts.minIdToMinRatio(minid);
690 		}else if("MultiStateAligner9PacBio".equalsIgnoreCase(classname)){
691 			return MultiStateAligner9PacBio.minIdToMinRatio(minid);
692 		}else if("MultiStateAligner9Flat".equalsIgnoreCase(classname)){
693 			return MultiStateAligner9Flat.minIdToMinRatio(minid);
694 		}else if("MultiStateAligner9XFlat".equalsIgnoreCase(classname)){
695 			return MultiStateAligner9XFlat.minIdToMinRatio(minid);
696 		}else{
697 			assert(false) : "Unhandled MSA type: "+classname;
698 			return MultiStateAligner11ts.minIdToMinRatio(minid);
699 		}
700 	}
701 
702 	static final int GAPBUFFER=Shared.GAPBUFFER;
703 	static final int GAPBUFFER2=Shared.GAPBUFFER2;
704 	static final int GAPLEN=Shared.GAPLEN;
705 	static final int MINGAP=Shared.MINGAP;
706 	static final int GAPCOST=Shared.GAPCOST;
707 	static final byte GAPC=Shared.GAPC;
708 
709 	/** Seemingly to clear out prior data from the gref.  Not sure what else it's used for. */
710 	static final int GREFLIMIT2_CUSHION=128; //Tools.max(GAPBUFFER2, GAPLEN);
711 
712 
713 	/**DO NOT MODIFY*/
getGrefbuffer()714 	public abstract byte[] getGrefbuffer();
715 
716 //	public final int[] vertLimit;
717 //	public final int[] horizLimit;
718 
showVertLimit()719 	public abstract CharSequence showVertLimit();
showHorizLimit()720 	public abstract CharSequence showHorizLimit();
721 
722 ////	public static final int MODEBITS=2;
723 //	public static final int TIMEBITS=11;
724 //	public static final int SCOREBITS=32-TIMEBITS;
725 //	public static final int MAX_TIME=((1<<TIMEBITS)-1);
726 //	public static final int MAX_SCORE=((1<<(SCOREBITS-1))-1)-2000;
727 //	public static final int MIN_SCORE=0-MAX_SCORE; //Keeps it 1 point above "BAD".
728 //
729 ////	public static final int MODEOFFSET=0; //Always zero.
730 ////	public static final int TIMEOFFSET=0;
SCOREOFFSET()731 	public abstract int SCOREOFFSET();
732 //
733 ////	public static final int MODEMASK=~((-1)<<MODEBITS);
734 ////	public static final int TIMEMASK=(~((-1)<<TIMEBITS))<<TIMEOFFSET;
735 //	public static final int TIMEMASK=~((-1)<<TIMEBITS);
736 //	public static final int SCOREMASK=(~((-1)<<SCOREBITS))<<SCOREOFFSET;
737 
738 	static final byte MODE_MS=0;
739 	static final byte MODE_DEL=1;
740 	static final byte MODE_INS=2;
741 	static final byte MODE_SUB=3;
742 
743 	//These are to allow constants to be overridden
POINTS_NOREF()744 	public abstract int POINTS_NOREF();
POINTS_NOCALL()745 	public abstract int POINTS_NOCALL();
POINTS_MATCH()746 	public abstract int POINTS_MATCH();
POINTS_MATCH2()747 	public abstract int POINTS_MATCH2();
POINTS_COMPATIBLE()748 	public abstract int POINTS_COMPATIBLE();
POINTS_SUB()749 	public abstract int POINTS_SUB();
POINTS_SUBR()750 	public abstract int POINTS_SUBR();
POINTS_SUB2()751 	public abstract int POINTS_SUB2();
POINTS_SUB3()752 	public abstract int POINTS_SUB3();
POINTS_MATCHSUB()753 	public abstract int POINTS_MATCHSUB();
POINTS_INS()754 	public abstract int POINTS_INS();
POINTS_INS2()755 	public abstract int POINTS_INS2();
POINTS_INS3()756 	public abstract int POINTS_INS3();
POINTS_INS4()757 	public abstract int POINTS_INS4();
POINTS_DEL()758 	public abstract int POINTS_DEL();
POINTS_DEL2()759 	public abstract int POINTS_DEL2();
POINTS_DEL3()760 	public abstract int POINTS_DEL3();
POINTS_DEL4()761 	public abstract int POINTS_DEL4();
POINTS_DEL5()762 	public abstract int POINTS_DEL5();
POINTS_DEL_REF_N()763 	public abstract int POINTS_DEL_REF_N();
POINTS_GAP()764 	public abstract int POINTS_GAP();
765 
TIMESLIP()766 	public abstract int TIMESLIP();
MASK5()767 	public abstract int MASK5();
768 
BARRIER_I1()769 	abstract int BARRIER_I1();
BARRIER_D1()770 	abstract int BARRIER_D1();
771 
LIMIT_FOR_COST_3()772 	public abstract int LIMIT_FOR_COST_3();
LIMIT_FOR_COST_4()773 	public abstract int LIMIT_FOR_COST_4();
LIMIT_FOR_COST_5()774 	public abstract int LIMIT_FOR_COST_5();
775 
BAD()776 	public abstract int BAD();
777 
778 
779 	public final int maxRows;
780 	public final int maxColumns;
781 
782 	public long iterationsLimited=0;
783 	public long iterationsUnlimited=0;
784 
785 	public boolean verbose=false;
786 	public boolean verbose2=false;
787 
788 	public static int bandwidth=0;
789 	public static float bandwidthRatio=0;
790 	public static boolean flatMode=false;
791 
792 	public static final int MIN_SCORE_ADJUST=120;
793 
794 }
795