1 package aligner;
2 
3 import java.util.Arrays;
4 
5 import dna.AminoAcid;
6 import dna.ChromosomeArray;
7 import dna.Data;
8 import shared.Tools;
9 import stream.Read;
10 import stream.SiteScore;
11 
12 /**
13  * Based on MSA9ts, with transform scores tweaked for PacBio. */
14 public final class MultiStateAligner9PacBioAdapter3 {
15 
16 
MultiStateAligner9PacBioAdapter3(int maxRows_, int maxColumns_)17 	public MultiStateAligner9PacBioAdapter3(int maxRows_, int maxColumns_){
18 //		assert(maxColumns_>=200);
19 //		assert(maxRows_>=200);
20 		maxRows=maxRows_;
21 		maxColumns=maxColumns_;
22 		packed=new int[3][maxRows+1][];
23 
24 		for(int i=0; i<3; i++){
25 			packed[i][0]=new int[maxColumns+1];
26 			packed[i][1]=new int[maxColumns+1];
27 			packed[i][2]=new int[maxColumns+1];
28 			for(int j=3; j<maxRows+1; j++){
29 				packed[i][j]=packed[i][j-2];
30 			}
31 		}
32 
33 		insScoreArray=new int[maxRows+1];
34 		delScoreArray=new int[maxColumns+1];
35 		matchScoreArray=new int[maxRows+1];
36 		subScoreArray=new int[maxRows+1];
37 		for(int i=0; i<insScoreArray.length; i++){
38 			insScoreArray[i]=(i==0 ? POINTSoff_INS :
39 				i<LIMIT_FOR_COST_3 ? POINTSoff_INS2 :
40 					i<LIMIT_FOR_COST_4 ? POINTSoff_INS3 : POINTSoff_INS4);
41 		}
42 		for(int i=0; i<delScoreArray.length; i++){
43 			delScoreArray[i]=(i==0 ? POINTSoff_DEL :
44 				i<LIMIT_FOR_COST_3 ? POINTSoff_DEL2 :
45 					i<LIMIT_FOR_COST_4 ? POINTSoff_DEL3 : POINTSoff_DEL4);
46 		}
47 		for(int i=0; i<matchScoreArray.length; i++){
48 			matchScoreArray[i]=(i==0 ? POINTSoff_MATCH2 : POINTSoff_MATCH);
49 		}
50 		for(int i=0; i<subScoreArray.length; i++){
51 			subScoreArray[i]=(i==0 ? POINTSoff_SUB : i<LIMIT_FOR_COST_3 ? POINTSoff_SUB2 : POINTSoff_SUB3);
52 		}
53 
54 		vertLimit=new int[maxRows+1];
55 		horizLimit=new int[maxColumns+1];
56 		Arrays.fill(vertLimit, BADoff);
57 		Arrays.fill(horizLimit, BADoff);
58 
59 		for(int matrix=0; matrix<packed.length; matrix++){
60 			for(int i=1; i<=3; i++){
61 				for(int j=0; j<packed[matrix][i].length; j++){
62 					packed[matrix][i][j]|=BADoff;
63 				}
64 			}
65 
66 			//Initializes the rows; not needed.
67 			for(int i=1; i<=2; i++){
68 
69 				int score=calcInsScoreOffset(i);
70 				packed[matrix][i][0]=score;
71 			}
72 		}
73 	}
74 
75 
76 	/** return new int[] {rows, maxC, maxS, max};
77 	 * Will not fill areas that cannot match minScore */
fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore)78 	public final int[] fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){
79 		return fillLimitedX(read, ref, refStartLoc, refEndLoc, minScore);
80 	}
81 
82 
83 	/** return new int[] {rows, maxC, maxS, max};
84 	 * Will not fill areas that cannot match minScore */
fillLimitedX(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore)85 	private final int[] fillLimitedX(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){
86 //		minScore=0;
87 //		assert(minScore>0);
88 		rows=read.length;
89 		columns=refEndLoc-refStartLoc+1;
90 
91 		if(/*read.length<40 || */false || minScore<=0 || columns>read.length+Tools.min(100, read.length)){
92 //			assert(false) : minScore;
93 			return fillUnlimited(read, ref, refStartLoc, refEndLoc);
94 		}
95 
96 		minScore-=100; //Increases quality trivially
97 
98 		assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows+"\n"+
99 			refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
100 		assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns+"\n"+
101 			refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
102 
103 		assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc+"\n"+
104 			refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
105 		assert(refEndLoc<ref.length) : "Check that values are in-bounds before calling this function: "+refEndLoc+", "+ref.length+"\n"+
106 			refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
107 
108 //		for(int x=0; x<packed.length; x++){
109 //			for(int y=1; y<rows+1; y++){
110 //				Arrays.fill(packed[x][y], 1, columns+1, BADoff);
111 //			}
112 //		}
113 		for(int x=0; x<packed.length; x++){
114 
115 //			Arrays.fill(packed[x][1], 1, columns+1, BADoff);
116 			Arrays.fill(packed[x][rows], 1, columns+1, BADoff);
117 //			for(int y=1; y<rows+1; y++){
118 //				Arrays.fill(packed[x][y], 1, columns+1, BADoff);
119 //			}
120 		}
121 
122 		int minGoodCol=1;
123 		int maxGoodCol=columns;
124 
125 		final int minScore_off=(minScore<<SCOREOFFSET);
126 		final int maxGain=(read.length-1)*POINTSoff_MATCH2+POINTSoff_MATCH;
127 		final int floor=minScore_off-maxGain;
128 //		final int subfloor=Tools.max(BADoff, floor-200*POINTSoff_MATCH2);
129 		final int subfloor=floor-5*POINTSoff_MATCH2;
130 		assert(subfloor>BADoff); //TODO: Actually, it needs to be substantially more.
131 		assert(subfloor<minScore_off) : minScore_off+", "+floor+", "+BADoff+", "+subfloor;
132 
133 		if(verbose2){
134 			System.out.println();
135 			System.out.println("minScore="+minScore+"\t"+minScore_off);
136 			System.out.println("maxGain="+(maxGain>>SCOREOFFSET)+"\t"+(maxGain));
137 			System.out.println("floor="+(floor>>SCOREOFFSET)+"\t"+(floor));
138 			System.out.println("subfloor="+(subfloor>>SCOREOFFSET)+"\t"+(subfloor));
139 			System.out.println("BADoff="+(BADoff>>SCOREOFFSET)+"\t"+(BADoff));
140 			System.out.println("maxGain="+(maxGain>>SCOREOFFSET)+"\t"+(maxGain));
141 			System.out.println();
142 		}
143 
144 		vertLimit[rows]=minScore_off;
145 		for(int i=rows-1; i>=0; i--){
146 			vertLimit[i]=Tools.max(vertLimit[i+1]-POINTSoff_MATCH2, floor);
147 		}
148 
149 		horizLimit[columns]=minScore_off;
150 		for(int i=columns-1; i>=0; i--){
151 			horizLimit[i]=Tools.max(horizLimit[i+1]-POINTSoff_MATCH2, floor);
152 		}
153 
154 		for(int row=1; row<=rows; row++){
155 
156 			{
157 				int score=calcInsScoreOffset(row);
158 				packed[0][row][0]=score;
159 				packed[1][row][0]=score;
160 				packed[2][row][0]=score;
161 			}
162 
163 			int colStart=minGoodCol;
164 			int colStop=maxGoodCol;
165 
166 			minGoodCol=-1;
167 			maxGoodCol=-2;
168 
169 			final int vlimit=vertLimit[row];
170 
171 			if(verbose2){
172 				System.out.println();
173 				System.out.println("row="+row);
174 				System.out.println("colStart="+colStart);
175 				System.out.println("colStop="+colStop);
176 				System.out.println("vlimit="+(vlimit>>SCOREOFFSET)+"\t"+(vlimit));
177 			}
178 
179 			if(colStart<0 || colStop<colStart){break;}
180 
181 
182 			if(colStart>1){
183 				assert(row>0);
184 				packed[MODE_MS][row][colStart-1]=subfloor;
185 				packed[MODE_INS][row][colStart-1]=subfloor;
186 				packed[MODE_DEL][row][colStart-1]=subfloor;
187 			}
188 
189 
190 			for(int col=colStart; col<=columns; col++){
191 
192 
193 				if(verbose2){
194 					System.out.println("\ncol "+col);
195 				}
196 
197 				final byte call0=(row<2 ? (byte)'?' : read[row-2]);
198 				final byte call1=read[row-1];
199 				final byte ref0=(col<2 ? (byte)'!' : ref[refStartLoc+col-2]);
200 				final byte ref1=ref[refStartLoc+col-1];
201 
202 //				final boolean match=(read[row-1]==ref[refStartLoc+col-1]);
203 //				final boolean prevMatch=(row<2 || col<2 ? false : read[row-2]==ref[refStartLoc+col-2]);
204 				final boolean match=(call1==ref1);
205 				final boolean prevMatch=(call0==ref0);
206 
207 //				System.err.println("")
208 
209 				iterationsLimited++;
210 				final int limit=Tools.max(vlimit, horizLimit[col]);
211 				final int limit3=Tools.max(floor, (match ? limit-POINTSoff_MATCH2 : limit-POINTSoff_SUB3));
212 
213 				final int delNeeded=Tools.max(0, row-col-1);
214 				final int insNeeded=Tools.max(0, (rows-row)-(columns-col)-1);
215 
216 				final int delPenalty=calcDelScoreOffset(delNeeded);
217 				final int insPenalty=calcInsScoreOffset(insNeeded);
218 
219 
220 				final int scoreFromDiag_MS=packed[MODE_MS][row-1][col-1]&SCOREMASK;
221 				final int scoreFromDel_MS=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
222 				final int scoreFromIns_MS=packed[MODE_INS][row-1][col-1]&SCOREMASK;
223 
224 				final int scoreFromDiag_DEL=packed[MODE_MS][row][col-1]&SCOREMASK;
225 				final int scoreFromDel_DEL=packed[MODE_DEL][row][col-1]&SCOREMASK;
226 
227 				final int scoreFromDiag_INS=packed[MODE_MS][row-1][col]&SCOREMASK;
228 				final int scoreFromIns_INS=packed[MODE_INS][row-1][col]&SCOREMASK;
229 
230 //				if(scoreFromDiag_MS<limit3 && scoreFromDel_MS<limit3 && scoreFromIns_MS<limit3
231 //						&& scoreFromDiag_DEL<limit && scoreFromDel_DEL<limit && scoreFromDiag_INS<limit && scoreFromIns_INS<limit){
232 //					iterationsLimited--; //A "fast" iteration
233 //				}
234 
235 				if((scoreFromDiag_MS<=limit3 && scoreFromDel_MS<=limit3 && scoreFromIns_MS<=limit3)){
236 					packed[MODE_MS][row][col]=subfloor;
237 				}else{//Calculate match and sub scores
238 					final int streak=(packed[MODE_MS][row-1][col-1]&TIMEMASK);
239 
240 					{//Calculate match/sub score
241 
242 						int score;
243 						int time;
244 						byte prevState;
245 
246 						if(match){
247 
248 							int scoreMS=scoreFromDiag_MS+(prevMatch ? POINTSoff_MATCH2 : POINTSoff_MATCH);
249 							int scoreD=scoreFromDel_MS+POINTSoff_MATCH;
250 							int scoreI=scoreFromIns_MS+POINTSoff_MATCH;
251 
252 //							byte prevState;
253 							if(scoreMS>=scoreD && scoreMS>=scoreI){
254 								score=scoreMS;
255 								time=(prevMatch ? streak+1 : 1);
256 								prevState=MODE_MS;
257 							}else if(scoreD>=scoreI){
258 								score=scoreD;
259 								time=1;
260 								prevState=MODE_DEL;
261 							}else{
262 								score=scoreI;
263 								time=1;
264 								prevState=MODE_INS;
265 							}
266 
267 						}else{
268 
269 							int scoreMS;
270 							if(ref1!='N' && call1!='N'){
271 								scoreMS=scoreFromDiag_MS+(prevMatch ? (streak<=1 ? POINTSoff_SUBR : POINTSoff_SUB) : subScoreArray[streak]);
272 							}else{
273 								scoreMS=scoreFromDiag_MS+POINTSoff_NOCALL;
274 							}
275 
276 							int scoreD=scoreFromDel_MS+POINTSoff_SUB; //+2 to move it as close as possible to the deletion / insertion
277 							int scoreI=scoreFromIns_MS+POINTSoff_SUB;
278 
279 							if(scoreMS>=scoreD && scoreMS>=scoreI){
280 								score=scoreMS;
281 								time=(prevMatch ? 1 : streak+1);
282 //								time=(prevMatch ? (streak==1 ? 3 : 1) : streak+1);
283 								prevState=MODE_MS;
284 							}else if(scoreD>=scoreI){
285 								score=scoreD;
286 								time=1;
287 								prevState=MODE_DEL;
288 							}else{
289 								score=scoreI;
290 								time=1;
291 								prevState=MODE_INS;
292 							}
293 						}
294 
295 						final int limit2;
296 						if(delNeeded>0){
297 							limit2=limit-delPenalty;
298 						}else if(insNeeded>0){
299 							limit2=limit-insPenalty;
300 						}else{
301 							limit2=limit;
302 						}
303 						assert(limit2>=limit);
304 
305 						if(verbose2){System.err.println("MS: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));}
306 
307 						if(score>=limit2){
308 							maxGoodCol=col;
309 							if(minGoodCol<0){minGoodCol=col;}
310 						}else{
311 							score=subfloor;
312 						}
313 
314 						assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
315 						assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead";
316 						assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
317 //						packed[MODE_MS][row][col]=(score|prevState|time);
318 						packed[MODE_MS][row][col]=(score|time);
319 						assert((score&SCOREMASK)==score);
320 //						assert((prevState&MODEMASK)==prevState);
321 						assert((time&TIMEMASK)==time);
322 					}
323 				}
324 
325 				if((scoreFromDiag_DEL<=limit && scoreFromDel_DEL<=limit)){
326 //					assert((scoreFromDiag_DEL<=limit && scoreFromDel_DEL<=limit)) : scoreFromDiag_DEL+", "+row;
327 					packed[MODE_DEL][row][col]=subfloor;
328 				}else{//Calculate DEL score
329 
330 					final int streak=packed[MODE_DEL][row][col-1]&TIMEMASK;
331 
332 					int scoreMS=scoreFromDiag_DEL+POINTSoff_DEL;
333 					int scoreD=scoreFromDel_DEL+delScoreArray[streak];
334 //					int scoreI=scoreFromIns+POINTSoff_DEL;
335 
336 
337 					if(ref1=='N'){
338 						scoreMS+=POINTSoff_DEL_REF_N;
339 						scoreD+=POINTSoff_DEL_REF_N;
340 					}
341 
342 					//if(match){scoreMS=subfloor;}
343 
344 					int score;
345 					int time;
346 					byte prevState;
347 					if(scoreMS>=scoreD){
348 						score=scoreMS;
349 						time=1;
350 						prevState=MODE_MS;
351 					}else{
352 						score=scoreD;
353 						time=streak+1;
354 						prevState=MODE_DEL;
355 					}
356 
357 					final int limit2;
358 					if(insNeeded>0){
359 						limit2=limit-insPenalty;
360 					}else if(delNeeded>0){
361 						limit2=limit-calcDelScoreOffset(time+delNeeded)+calcDelScoreOffset(time);
362 					}else{
363 						limit2=limit;
364 					}
365 					assert(limit2>=limit);
366 					if(verbose2){System.err.println("DEL: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));}
367 
368 					if(score>=limit2){
369 						maxGoodCol=col;
370 						if(minGoodCol<0){minGoodCol=col;}
371 					}else{
372 						score=subfloor;
373 					}
374 
375 					assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
376 					assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead";
377 					assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
378 //					packed[MODE_DEL][row][col]=(score|prevState|time);
379 					packed[MODE_DEL][row][col]=(score|time);
380 					assert((score&SCOREMASK)==score);
381 //					assert((prevState&MODEMASK)==prevState);
382 					assert((time&TIMEMASK)==time);
383 				}
384 
385 				if(scoreFromDiag_INS<=limit && scoreFromIns_INS<=limit){
386 					packed[MODE_INS][row][col]=subfloor;
387 				}else{//Calculate INS score
388 
389 					final int streak=packed[MODE_INS][row-1][col]&TIMEMASK;
390 
391 					int scoreMS=scoreFromDiag_INS+POINTSoff_INS;
392 //					int scoreD=scoreFromDel+POINTSoff_INS;
393 					int scoreI=scoreFromIns_INS+insScoreArray[streak];
394 
395 
396 //					System.err.println("("+row+","+col+")\t"+scoreFromDiag+"+"+POINTSoff_INS+"="+scoreM+", "+
397 //							scoreFromSub+"+"+POINTSoff_INS+"="+scoreS+", "
398 //							+scoreD+", "+scoreFromIns+"+"+
399 //							(streak==0 ? POINTSoff_INS : streak<LIMIT_FOR_COST_3 ? POINTSoff_INS2 : POINTSoff_INS3)+"="+scoreI);
400 
401 					//if(match){scoreMS=subfloor;}
402 
403 					int score;
404 					int time;
405 					byte prevState;
406 					if(scoreMS>=scoreI){
407 						score=scoreMS;
408 						time=1;
409 						prevState=MODE_MS;
410 					}else{
411 						score=scoreI;
412 						time=streak+1;
413 						prevState=MODE_INS;
414 					}
415 
416 					final int limit2;
417 					if(delNeeded>0){
418 						limit2=limit-delPenalty;
419 					}else if(insNeeded>0){
420 						limit2=limit-calcInsScoreOffset(time+insNeeded)+calcInsScoreOffset(time);
421 					}else{
422 						limit2=limit;
423 					}
424 					assert(limit2>=limit);
425 
426 					if(verbose2){System.err.println("INS: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));}
427 					if(score>=limit2){
428 						maxGoodCol=col;
429 						if(minGoodCol<0){minGoodCol=col;}
430 					}else{
431 						score=subfloor;
432 					}
433 
434 					assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
435 					assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead";
436 					assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
437 //					packed[MODE_INS][row][col]=(score|prevState|time);
438 					packed[MODE_INS][row][col]=(score|time);
439 					assert((score&SCOREMASK)==score);
440 //					assert((prevState&MODEMASK)==prevState);
441 					assert((time&TIMEMASK)==time);
442 				}
443 
444 
445 				if(col>=colStop){
446 					if(col>colStop && maxGoodCol<col){break;}
447 					if(row>1){
448 						packed[MODE_MS][row-1][col+1]=subfloor;
449 						packed[MODE_INS][row-1][col+1]=subfloor;
450 						packed[MODE_DEL][row-1][col+1]=subfloor;
451 					}
452 				}
453 			}
454 		}
455 
456 
457 		int maxCol=-1;
458 		int maxState=-1;
459 		int maxScore=Integer.MIN_VALUE;
460 
461 		for(int state=0; state<packed.length; state++){
462 			for(int col=1; col<=columns; col++){
463 				int x=packed[state][rows][col]&SCOREMASK;
464 				if(x>maxScore){
465 					maxScore=x;
466 					maxCol=col;
467 					maxState=state;
468 				}
469 			}
470 		}
471 
472 		assert(maxScore>=BADoff);
473 //		if(maxScore==BADoff){
474 //			return null;
475 //		}
476 //		if(maxScore<floor){
477 //			return null;
478 //		}
479 		if(maxScore<minScore_off){
480 			return null;
481 		}
482 
483 		maxScore>>=SCOREOFFSET;
484 
485 //		System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore);
486 		return new int[] {rows, maxCol, maxState, maxScore};
487 	}
488 
489 
490 	/** return new int[] {rows, maxC, maxS, max};
491 	 * Does not require a min score (ie, same as old method) */
fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc)492 	private final int[] fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc){
493 		rows=read.length;
494 		columns=refEndLoc-refStartLoc+1;
495 
496 		final int maxGain=(read.length-1)*POINTSoff_MATCH2+POINTSoff_MATCH;
497 		final int subfloor=0-2*maxGain;
498 		assert(subfloor>BADoff && subfloor*2>BADoff); //TODO: Actually, it needs to be substantially more.
499 
500 		//temporary, for finding a bug
501 		if(rows>maxRows || columns>maxColumns){
502 			throw new RuntimeException("rows="+rows+", maxRows="+maxRows+", cols="+columns+", maxCols="+maxColumns+"\n"+new String(read)+"\n");
503 		}
504 
505 		assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows;
506 		assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns;
507 
508 		assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc;
509 		assert(refEndLoc<ref.length) : "Check that values are in-bounds before calling this function: "+refEndLoc+", "+ref.length;
510 
511 		for(int row=1; row<=rows; row++){
512 
513 			{
514 				int score=calcInsScoreOffset(row);
515 				packed[0][row][0]=score;
516 				packed[1][row][0]=score;
517 				packed[2][row][0]=score;
518 			}
519 
520 //			int minc=max(1, row-20);
521 //			int maxc=min(columns, row+20);
522 
523 			for(int col=1; col<=columns; col++){
524 				iterationsUnlimited++;
525 
526 //				final boolean match=(read[row-1]==ref[refStartLoc+col-1]);
527 //				final boolean prevMatch=(row<2 || col<2 ? false : read[row-2]==ref[refStartLoc+col-2]);
528 
529 				final byte call0=(row<2 ? (byte)'?' : read[row-2]);
530 				final byte call1=read[row-1];
531 				final byte ref0=(col<2 ? (byte)'!' : ref[refStartLoc+col-2]);
532 				final byte ref1=ref[refStartLoc+col-1];
533 
534 				final boolean match=(call1==ref1);
535 				final boolean prevMatch=(call0==ref0);
536 
537 				{//Calculate match and sub scores
538 
539 					final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK;
540 					final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
541 					final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK;
542 					final int streak=(packed[MODE_MS][row-1][col-1]&TIMEMASK);
543 
544 					{//Calculate match/sub score
545 
546 						if(match){
547 
548 							int scoreMS=scoreFromDiag+(prevMatch ? POINTSoff_MATCH2 : POINTSoff_MATCH);
549 							int scoreD=scoreFromDel+POINTSoff_MATCH;
550 							int scoreI=scoreFromIns+POINTSoff_MATCH;
551 
552 							int score;
553 							int time;
554 //							byte prevState;
555 							if(scoreMS>=scoreD && scoreMS>=scoreI){
556 								score=scoreMS;
557 								time=(prevMatch ? streak+1 : 1);
558 //								prevState=MODE_MS;
559 							}else if(scoreD>=scoreI){
560 								score=scoreD;
561 								time=1;
562 //								prevState=MODE_DEL;
563 							}else{
564 								score=scoreI;
565 								time=1;
566 //								prevState=MODE_INS;
567 							}
568 
569 							assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
570 							assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
571 							assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
572 //							packed[MODE_MS][row][col]=(score|prevState|time);
573 							packed[MODE_MS][row][col]=(score|time);
574 							assert((score&SCOREMASK)==score);
575 //							assert((prevState&MODEMASK)==prevState);
576 							assert((time&TIMEMASK)==time);
577 
578 						}else{
579 
580 							int scoreMS;
581 							if(ref1!='N' && call1!='N'){
582 								scoreMS=scoreFromDiag+(prevMatch ? (streak<=1 ? POINTSoff_SUBR : POINTSoff_SUB) : subScoreArray[streak]);
583 							}else{
584 								scoreMS=scoreFromDiag+POINTSoff_NOCALL;
585 							}
586 
587 							int scoreD=scoreFromDel+POINTSoff_SUB; //+2 to move it as close as possible to the deletion / insertion
588 							int scoreI=scoreFromIns+POINTSoff_SUB;
589 
590 							int score;
591 							int time;
592 							byte prevState;
593 							if(scoreMS>=scoreD && scoreMS>=scoreI){
594 								score=scoreMS;
595 								time=(prevMatch ? 1 : streak+1);
596 //								time=(prevMatch ? (streak==1 ? 3 : 1) : streak+1);
597 								prevState=MODE_MS;
598 							}else if(scoreD>=scoreI){
599 								score=scoreD;
600 								time=1;
601 								prevState=MODE_DEL;
602 							}else{
603 								score=scoreI;
604 								time=1;
605 								prevState=MODE_INS;
606 							}
607 
608 							assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
609 							assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
610 							assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
611 //							packed[MODE_MS][row][col]=(score|prevState|time);
612 							packed[MODE_MS][row][col]=(score|time);
613 							assert((score&SCOREMASK)==score);
614 //							assert((prevState&MODEMASK)==prevState);
615 							assert((time&TIMEMASK)==time);
616 						}
617 					}
618 				}
619 
620 				{//Calculate DEL score
621 
622 					final int streak=packed[MODE_DEL][row][col-1]&TIMEMASK;
623 
624 					final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK;
625 					final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK;
626 
627 					int scoreMS=scoreFromDiag+POINTSoff_DEL;
628 					int scoreD=scoreFromDel+delScoreArray[streak];
629 //					int scoreI=scoreFromIns+POINTSoff_DEL;
630 
631 					if(ref1=='N'){
632 						scoreMS+=POINTSoff_DEL_REF_N;
633 						scoreD+=POINTSoff_DEL_REF_N;
634 					}
635 
636 					//if(match){scoreMS=subfloor;}
637 
638 					int score;
639 					int time;
640 					byte prevState;
641 					if(scoreMS>=scoreD){
642 						score=scoreMS;
643 						time=1;
644 						prevState=MODE_MS;
645 					}else{
646 						score=scoreD;
647 						time=streak+1;
648 						prevState=MODE_DEL;
649 					}
650 
651 					assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
652 					assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
653 					assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
654 //					packed[MODE_DEL][row][col]=(score|prevState|time);
655 					packed[MODE_DEL][row][col]=(score|time);
656 					assert((score&SCOREMASK)==score);
657 //					assert((prevState&MODEMASK)==prevState);
658 					assert((time&TIMEMASK)==time);
659 				}
660 
661 				{//Calculate INS score
662 
663 					final int streak=packed[MODE_INS][row-1][col]&TIMEMASK;
664 
665 					final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK;
666 					final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK;
667 
668 					int scoreMS=scoreFromDiag+POINTSoff_INS;
669 //					int scoreD=scoreFromDel+POINTSoff_INS;
670 					int scoreI=scoreFromIns+insScoreArray[streak];
671 
672 //					System.err.println("("+row+","+col+")\t"+scoreFromDiag+"+"+POINTSoff_INS+"="+scoreM+", "+
673 //							scoreFromSub+"+"+POINTSoff_INS+"="+scoreS+", "
674 //							+scoreD+", "+scoreFromIns+"+"+
675 //							(streak==0 ? POINTSoff_INS : streak<LIMIT_FOR_COST_3 ? POINTSoff_INS2 : POINTSoff_INS3)+"="+scoreI);
676 
677 					//if(match){scoreMS=subfloor;}
678 
679 					int score;
680 					int time;
681 					byte prevState;
682 					if(scoreMS>=scoreI){
683 						score=scoreMS;
684 						time=1;
685 						prevState=MODE_MS;
686 					}else{
687 						score=scoreI;
688 						time=streak+1;
689 						prevState=MODE_INS;
690 					}
691 
692 					assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
693 					assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
694 					assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
695 //					packed[MODE_INS][row][col]=(score|prevState|time);
696 					packed[MODE_INS][row][col]=(score|time);
697 					assert((score&SCOREMASK)==score);
698 //					assert((prevState&MODEMASK)==prevState);
699 					assert((time&TIMEMASK)==time);
700 				}
701 			}
702 		}
703 
704 
705 		int maxCol=-1;
706 		int maxState=-1;
707 		int maxScore=Integer.MIN_VALUE;
708 
709 		for(int state=0; state<packed.length; state++){
710 			for(int col=1; col<=columns; col++){
711 				int x=packed[state][rows][col]&SCOREMASK;
712 				if(x>maxScore){
713 					maxScore=x;
714 					maxCol=col;
715 					maxState=state;
716 				}
717 			}
718 		}
719 		maxScore>>=SCOREOFFSET;
720 
721 //		System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore);
722 		return new int[] {rows, maxCol, maxState, maxScore};
723 	}
724 
725 
726 	/** Generates the match string */
727 	public final byte[] traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state){
728 //		assert(false);
729 		assert(refStartLoc<=refEndLoc) : refStartLoc+", "+refEndLoc;
730 		assert(row==rows);
731 
732 		byte[] out=new byte[row+col-1]; //TODO if an out of bound crash occurs, try removing the "-1".
733 		int outPos=0;
734 
735 		if(state==MODE_INS){
736 			//TODO ? Maybe not needed.
737 		}
738 
739 		while(row>0 && col>0){
740 
741 //			byte prev0=(byte)(packed[state][row][col]&MODEMASK);
742 
743 			final int time=packed[state][row][col]&TIMEMASK;
744 			final byte prev;
745 
746 //			System.err.println("state="+state+", prev="+prev+", row="+row+", col="+col+", score="+scores[state][row][col]);
747 
748 			if(state==MODE_MS){
749 				if(time>1){prev=(byte)state;}
750 				else{
751 					final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK;
752 					final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
753 					final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK;
754 					if(scoreFromDiag>=scoreFromDel && scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
755 					else if(scoreFromDel>=scoreFromIns){prev=MODE_DEL;}
756 					else{prev=MODE_INS;}
757 				}
758 
759 				byte c=read[row-1];
760 				byte r=ref[refStartLoc+col-1];
761 				if(c==r){
762 					out[outPos]='m';
763 				}else{
764 					if(!AminoAcid.isFullyDefined(c)){
765 						out[outPos]='N';
766 					}else if(!AminoAcid.isFullyDefined(r)){
767 //						out[outPos]='X';
768 						out[outPos]='N';
769 					}else{
770 						out[outPos]='S';
771 					}
772 				}
773 
774 				row--;
775 				col--;
776 			}else if(state==MODE_DEL){
777 				if(time>1){prev=(byte)state;}
778 				else{
779 					final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK;
780 					final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK;
781 					if(scoreFromDiag>=scoreFromDel){prev=MODE_MS;}
782 					else{prev=MODE_DEL;}
783 				}
784 
785 				byte r=ref[refStartLoc+col-1];
786 				out[outPos]='D';
787 
788 				col--;
789 			}else{
790 				if(time>1){prev=(byte)state;}
791 				else{
792 					final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK;
793 					final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK;
794 					if(scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
795 					else{prev=MODE_INS;}
796 				}
797 
798 				assert(state==MODE_INS) : state;
799 				if(col==0){
800 					out[outPos]='X';
801 				}else if(col>=columns){
802 					out[outPos]='Y';
803 				}else{
804 					out[outPos]='I';
805 				}
806 				row--;
807 			}
808 
809 //			assert(prev==prev0);
810 			state=prev;
811 			outPos++;
812 		}
813 
814 		assert(row==0 || col==0);
815 		if(col!=row){
816 			while(row>0){
817 				out[outPos]='X';
818 				outPos++;
819 				row--;
820 				col--;
821 			}
822 			if(col>0){
823 				//do nothing
824 			}
825 		}
826 
827 
828 		//Shrink and reverse the string
829 		byte[] out2=new byte[outPos];
830 		for(int i=0; i<outPos; i++){
831 			out2[i]=out[outPos-i-1];
832 		}
833 		out=null;
834 
835 		return out2;
836 	}
837 
838 	/** @return {score, bestRefStart, bestRefStop} */
score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, final int maxRow, final int maxCol, final int maxState)839 	public final int[] score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc,
840 			final int maxRow, final int maxCol, final int maxState){
841 
842 		int row=maxRow;
843 		int col=maxCol;
844 		int state=maxState;
845 
846 		assert(maxState>=0 && maxState<packed.length) :
847 			maxState+", "+maxRow+", "+maxCol+"\n"+new String(read)+"\n"+toString(ref, refStartLoc, refEndLoc);
848 		assert(maxRow>=0 && maxRow<packed[0].length) :
849 			maxState+", "+maxRow+", "+maxCol+"\n"+new String(read)+"\n"+toString(ref, refStartLoc, refEndLoc);
850 		assert(maxCol>=0 && maxCol<packed[0][0].length) :
851 			maxState+", "+maxRow+", "+maxCol+"\n"+new String(read)+"\n"+toString(ref, refStartLoc, refEndLoc);
852 
853 		int score=packed[maxState][maxRow][maxCol]&SCOREMASK; //Or zero, if it is to be recalculated
854 
855 		if(row<rows){
856 			int difR=rows-row;
857 			int difC=columns-col;
858 
859 			while(difR>difC){
860 				score+=POINTSoff_NOREF;
861 				difR--;
862 			}
863 
864 			row+=difR;
865 			col+=difR;
866 
867 		}
868 
869 		assert(refStartLoc<=refEndLoc);
870 		assert(row==rows);
871 
872 
873 		final int bestRefStop=refStartLoc+col-1;
874 
875 		while(row>0 && col>0){
876 //			System.err.println("state="+state+", row="+row+", col="+col);
877 
878 
879 
880 //			byte prev0=(byte)(packed[state][row][col]&MODEMASK);
881 
882 			final int time=packed[state][row][col]&TIMEMASK;
883 			final byte prev;
884 
885 			if(state==MODE_MS){
886 				if(time>1){prev=(byte)state;}
887 				else{
888 					final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK;
889 					final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
890 					final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK;
891 					if(scoreFromDiag>=scoreFromDel && scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
892 					else if(scoreFromDel>=scoreFromIns){prev=MODE_DEL;}
893 					else{prev=MODE_INS;}
894 				}
895 				row--;
896 				col--;
897 			}else if(state==MODE_DEL){
898 				if(time>1){prev=(byte)state;}
899 				else{
900 					final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK;
901 					final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK;
902 					if(scoreFromDiag>=scoreFromDel){prev=MODE_MS;}
903 					else{prev=MODE_DEL;}
904 				}
905 				col--;
906 			}else{
907 				assert(state==MODE_INS);
908 				if(time>1){prev=(byte)state;}
909 				else{
910 					final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK;
911 					final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK;
912 					if(scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
913 					else{prev=MODE_INS;}
914 				}
915 				row--;
916 			}
917 
918 			if(col<0){
919 				System.err.println(row);
920 				break; //prevents an out of bounds access
921 
922 			}
923 
924 //			assert(prev==prev0);
925 			state=prev;
926 
927 //			System.err.println("state2="+state+", row2="+row+", col2="+col+"\n");
928 		}
929 //		assert(false) : row+", "+col;
930 		if(row>col){
931 			col-=row;
932 		}
933 
934 		final int bestRefStart=refStartLoc+col;
935 
936 		score>>=SCOREOFFSET;
937 		int[] rvec;
938 		if(bestRefStart<refStartLoc || bestRefStop>refEndLoc){ //Suggest extra padding in cases of overflow
939 			int padLeft=Tools.max(0, refStartLoc-bestRefStart);
940 			int padRight=Tools.max(0, bestRefStop-refEndLoc);
941 			rvec=new int[] {score, bestRefStart, bestRefStop, padLeft, padRight};
942 		}else{
943 			rvec=new int[] {score, bestRefStart, bestRefStop};
944 		}
945 		return rvec;
946 	}
947 
948 
949 	/** Will not fill areas that cannot match minScore.
950 	 * @return {score, bestRefStart, bestRefStop}  */
fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore)951 	public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){
952 		int a=Tools.max(0, refStartLoc);
953 		int b=Tools.min(ref.length-1, refEndLoc);
954 		assert(b>=a);
955 
956 		int[] score;
957 
958 		if(b-a>=maxColumns){
959 			System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns);
960 			assert(false) : refStartLoc+", "+refEndLoc;
961 			b=Tools.min(ref.length-1, a+maxColumns-1);
962 		}
963 		int[] max=fillLimited(read, ref, a, b, minScore);
964 		score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2]));
965 
966 		return score;
967 	}
968 
969 
970 
scoreNoIndels(byte[] read, SiteScore ss)971 	public final static int scoreNoIndels(byte[] read, SiteScore ss){
972 		ChromosomeArray cha=Data.getChromosome(ss.chrom);
973 		return scoreNoIndels(read, cha.array, ss.start, ss);
974 	}
975 
scoreNoIndels(byte[] read, final int chrom, final int refStart)976 	public final static int scoreNoIndels(byte[] read, final int chrom, final int refStart){
977 		ChromosomeArray cha=Data.getChromosome(chrom);
978 		return scoreNoIndels(read, cha.array, refStart, null);
979 	}
980 
scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores)981 	public final static int scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores){
982 		ChromosomeArray cha=Data.getChromosome(ss.chrom);
983 		return scoreNoIndels(read, cha.array, baseScores, ss.start, ss);
984 	}
985 
scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores)986 	public final static int scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores){
987 		ChromosomeArray cha=Data.getChromosome(chrom);
988 		return scoreNoIndels(read, cha.array, baseScores, refStart, null);
989 	}
990 
scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss)991 	public final static int scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss){
992 
993 		int score=0;
994 		int mode=-1;
995 		int timeInMode=0;
996 
997 		//This block handles cases where the read runs outside the reference
998 		//Of course, padding the reference with 'N' would be better, but...
999 		int readStart=0;
1000 		int readStop=read.length;
1001 		final int refStop=refStart+read.length;
1002 		boolean semiperfect=true;
1003 		int norefs=0;
1004 
1005 		if(refStart<0){
1006 			readStart=0-refStart;
1007 			score+=POINTS_NOREF*readStart;
1008 			norefs+=readStart;
1009 		}
1010 		if(refStop>ref.length){
1011 			int dif=(refStop-ref.length);
1012 			readStop-=dif;
1013 			score+=POINTS_NOREF*dif;
1014 			norefs+=dif;
1015 		}
1016 
1017 //		if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1018 
1019 		for(int i=readStart; i<readStop; i++){
1020 			byte c=read[i];
1021 			byte r=ref[refStart+i];
1022 
1023 			if(c==r && c!='N'){
1024 				if(mode==MODE_MS){
1025 					timeInMode++;
1026 					score+=POINTS_MATCH2;
1027 				}else{
1028 					timeInMode=0;
1029 					score+=POINTS_MATCH;
1030 				}
1031 				mode=MODE_MS;
1032 			}else if(c<0 || c=='N'){
1033 				score+=POINTS_NOCALL;
1034 				semiperfect=false;
1035 			}else if(r<0 || r=='N'){
1036 				score+=POINTS_NOREF;
1037 				norefs++;
1038 			}else{
1039 				if(mode==MODE_SUB){timeInMode++;}
1040 				else{timeInMode=0;}
1041 
1042 				if(timeInMode==0){score+=POINTS_SUB;}
1043 				else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1044 				else{score+=POINTS_SUB3;}
1045 				mode=MODE_SUB;
1046 				semiperfect=false;
1047 			}
1048 		}
1049 
1050 		if(semiperfect && ss!=null){ss.semiperfect=((ss.stop==ss.start+read.length-1) && (norefs<=read.length/2));}
1051 
1052 		return score;
1053 	}
1054 
1055 
scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss)1056 	public final static int scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss){
1057 
1058 		int score=0;
1059 		int mode=-1;
1060 		int timeInMode=0;
1061 		int norefs=0;
1062 
1063 		//This block handles cases where the read runs outside the reference
1064 		//Of course, padding the reference with 'N' would be better, but...
1065 		int readStart=0;
1066 		int readStop=read.length;
1067 		final int refStop=refStart+read.length;
1068 		boolean semiperfect=true;
1069 
1070 		if(refStart<0){
1071 			readStart=0-refStart;
1072 			score+=POINTS_NOREF*readStart;
1073 			norefs+=readStart;
1074 		}
1075 		if(refStop>ref.length){
1076 			int dif=(refStop-ref.length);
1077 			readStop-=dif;
1078 			score+=POINTS_NOREF*dif;
1079 			norefs+=dif;
1080 		}
1081 
1082 //		if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1083 
1084 		for(int i=readStart; i<readStop; i++){
1085 			byte c=read[i];
1086 			byte r=ref[refStart+i];
1087 
1088 			if(c==r && c!='N'){
1089 				if(mode==MODE_MS){
1090 					timeInMode++;
1091 					score+=POINTS_MATCH2;
1092 				}else{
1093 					timeInMode=0;
1094 					score+=POINTS_MATCH;
1095 				}
1096 				score+=baseScores[i];
1097 				mode=MODE_MS;
1098 			}else if(c<0 || c=='N'){
1099 				score+=POINTS_NOCALL;
1100 				semiperfect=false;
1101 			}else if(r<0 || r=='N'){
1102 				score+=POINTS_NOREF;
1103 				norefs++;
1104 			}else{
1105 				if(mode==MODE_SUB){timeInMode++;}
1106 				else{timeInMode=0;}
1107 
1108 				if(timeInMode==0){score+=POINTS_SUB;}
1109 				else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1110 				else{score+=POINTS_SUB3;}
1111 				mode=MODE_SUB;
1112 				semiperfect=false;
1113 			}
1114 		}
1115 
1116 		if(semiperfect && ss!=null){ss.semiperfect=((ss.stop==ss.start+read.length-1) && (norefs<=read.length/2));}
1117 		assert(Read.CHECKSITE(ss, read, -1));
1118 
1119 		return score;
1120 	}
1121 
1122 
scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn)1123 	public final static int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn){
1124 		int score=0;
1125 		int mode=-1;
1126 		int timeInMode=0;
1127 
1128 		assert(refStart<=ref.length) : refStart+", "+ref.length;
1129 
1130 		//This block handles cases where the read runs outside the reference
1131 		//Of course, padding the reference with 'N' would be better, but...
1132 		int readStart=0;
1133 		int readStop=read.length;
1134 		final int refStop=refStart+read.length;
1135 		if(refStart<0){
1136 			readStart=0-refStart;
1137 			score+=POINTS_NOREF*readStart;
1138 		}
1139 		if(refStop>ref.length){
1140 			int dif=(refStop-ref.length);
1141 			System.err.println("dif="+dif+", ref.length="+ref.length+", refStop="+refStop);
1142 			readStop-=dif;
1143 			score+=POINTS_NOREF*dif;
1144 		}
1145 		assert(refStart+readStop<=ref.length) : "readStart="+readStart+", readStop="+readStop+
1146 		", refStart="+refStart+", refStop="+refStop+", ref.length="+ref.length+", read.length="+read.length;
1147 
1148 		assert(matchReturn!=null);
1149 		assert(matchReturn.length==1);
1150 		if(matchReturn[0]==null || matchReturn[0].length!=read.length){
1151 			assert(matchReturn[0]==null || matchReturn[0].length<read.length) : matchReturn[0].length+"!="+read.length;
1152 			matchReturn[0]=new byte[read.length];
1153 		}
1154 		final byte[] match=matchReturn[0];
1155 
1156 //		if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1157 
1158 		for(int i=readStart; i<readStop; i++){
1159 			byte c=read[i];
1160 			byte r=ref[refStart+i];
1161 
1162 			assert(r!='.' && c!='.');
1163 
1164 			if(c==r && c!='N'){
1165 				if(mode==MODE_MS){
1166 					timeInMode++;
1167 					score+=POINTS_MATCH2;
1168 				}else{
1169 					timeInMode=0;
1170 					score+=POINTS_MATCH;
1171 				}
1172 				score+=baseScores[i];
1173 				match[i]='m';
1174 				mode=MODE_MS;
1175 			}else if(c<0 || c=='N'){
1176 				score+=POINTS_NOCALL;
1177 				match[i]='N';
1178 			}else if(r<0 || r=='N'){
1179 				score+=POINTS_NOREF;
1180 //				match[i]='m';
1181 				match[i]='N';
1182 			}else{
1183 				match[i]='S';
1184 				if(mode==MODE_SUB){timeInMode++;}
1185 				else{timeInMode=0;}
1186 
1187 				if(timeInMode==0){score+=POINTS_SUB;}
1188 				else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1189 				else{score+=POINTS_SUB3;}
1190 				mode=MODE_SUB;
1191 			}
1192 		}
1193 
1194 		return score;
1195 	}
1196 
1197 
scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn)1198 	public final static int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn){
1199 		int score=0;
1200 		int mode=-1;
1201 		int timeInMode=0;
1202 
1203 		assert(refStart<=ref.length) : refStart+", "+ref.length;
1204 
1205 		//This block handles cases where the read runs outside the reference
1206 		//Of course, padding the reference with 'N' would be better, but...
1207 		int readStart=0;
1208 		int readStop=read.length;
1209 		final int refStop=refStart+read.length;
1210 		if(refStart<0){
1211 			readStart=0-refStart;
1212 			score+=POINTS_NOREF*readStart;
1213 		}
1214 		if(refStop>ref.length){
1215 			int dif=(refStop-ref.length);
1216 			System.err.println("dif="+dif+", ref.length="+ref.length+", refStop="+refStop);
1217 			readStop-=dif;
1218 			score+=POINTS_NOREF*dif;
1219 		}
1220 		assert(refStart+readStop<=ref.length) : "readStart="+readStart+", readStop="+readStop+
1221 		", refStart="+refStart+", refStop="+refStop+", ref.length="+ref.length+", read.length="+read.length;
1222 
1223 		assert(matchReturn!=null);
1224 		assert(matchReturn.length==1);
1225 		if(matchReturn[0]==null || matchReturn[0].length!=read.length){
1226 			assert(matchReturn[0]==null || matchReturn[0].length<read.length) : matchReturn[0].length+"!="+read.length;
1227 			matchReturn[0]=new byte[read.length];
1228 		}
1229 		final byte[] match=matchReturn[0];
1230 
1231 //		if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1232 
1233 		for(int i=readStart; i<readStop; i++){
1234 			byte c=read[i];
1235 			byte r=ref[refStart+i];
1236 
1237 			assert(r!='.' && c!='.');
1238 
1239 			if(c==r && c!='N'){
1240 				if(mode==MODE_MS){
1241 					timeInMode++;
1242 					score+=POINTS_MATCH2;
1243 				}else{
1244 					timeInMode=0;
1245 					score+=POINTS_MATCH;
1246 				}
1247 				match[i]='m';
1248 				mode=MODE_MS;
1249 			}else if(c<0 || c=='N'){
1250 				score+=POINTS_NOCALL;
1251 				match[i]='N';
1252 			}else if(r<0 || r=='N'){
1253 				score+=POINTS_NOREF;
1254 //				match[i]='m';
1255 				match[i]='N';
1256 			}else{
1257 				match[i]='S';
1258 				if(mode==MODE_SUB){timeInMode++;}
1259 				else{timeInMode=0;}
1260 
1261 				if(timeInMode==0){score+=POINTS_SUB;}
1262 				else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1263 				else{score+=POINTS_SUB3;}
1264 				mode=MODE_SUB;
1265 			}
1266 		}
1267 
1268 		return score;
1269 	}
1270 
maxQuality(int numBases)1271 	public static final int maxQuality(int numBases){
1272 		return POINTS_MATCH+(numBases-1)*(POINTS_MATCH2);
1273 	}
1274 
maxQuality(byte[] baseScores)1275 	public static final int maxQuality(byte[] baseScores){
1276 		return POINTS_MATCH+(baseScores.length-1)*(POINTS_MATCH2)+Tools.sumInt(baseScores);
1277 	}
1278 
maxImperfectScore(int numBases)1279 	public static final int maxImperfectScore(int numBases){
1280 //		int maxQ=maxQuality(numBases);
1281 ////		maxImperfectSwScore=maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB);
1282 //		int maxI=maxQ+POINTS_DEL;
1283 //		maxI=Tools.max(maxI, maxQ+POINTS_INS-POINTS_MATCH2);
1284 //		maxI=Tools.min(maxI, maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB));
1285 
1286 		int maxQ=maxQuality(numBases);
1287 		int maxI=maxQ+Tools.min(POINTS_DEL, POINTS_INS-POINTS_MATCH2);
1288 		assert(maxI<(maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB)));
1289 		return maxI;
1290 	}
1291 
1292 	public static final int maxImperfectScore(byte[] baseScores){
1293 //		int maxQ=maxQuality(numBases);
1294 ////		maxImperfectSwScore=maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB);
1295 //		int maxI=maxQ+POINTS_DEL;
1296 //		maxI=Tools.max(maxI, maxQ+POINTS_INS-POINTS_MATCH2);
1297 //		maxI=Tools.min(maxI, maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB));
1298 
1299 		int maxQ=maxQuality(baseScores);
1300 		int maxI=maxQ+Tools.min(POINTS_DEL, POINTS_INS-POINTS_MATCH2);
1301 		assert(maxI<(maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB)));
1302 		return maxI;
1303 	}
1304 
1305 	public static final String toString(int[] a){
1306 
1307 		int width=7;
1308 
1309 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1310 		for(int num : a){
1311 			String s=" "+num;
1312 			int spaces=width-s.length();
1313 			assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
1314 			for(int i=0; i<spaces; i++){sb.append(' ');}
1315 			sb.append(s);
1316 		}
1317 
1318 		return sb.toString();
1319 	}
1320 
1321 	public static final String toTimePacked(int[] a){
1322 		int width=7;
1323 
1324 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1325 		for(int num_ : a){
1326 			int num=num_&TIMEMASK;
1327 			String s=" "+num;
1328 			int spaces=width-s.length();
1329 			assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
1330 			for(int i=0; i<spaces; i++){sb.append(' ');}
1331 			sb.append(s);
1332 		}
1333 
1334 		return sb.toString();
1335 	}
1336 
1337 	public static final String toScorePacked(int[] a){
1338 		int width=7;
1339 
1340 		String minString=" -";
1341 		String maxString="  ";
1342 		while(minString.length()<width){minString+='9';}
1343 		while(maxString.length()<width){maxString+='9';}
1344 
1345 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1346 		for(int num_ : a){
1347 			int num=num_>>SCOREOFFSET;
1348 			String s=" "+num;
1349 			if(s.length()>width){s=num>0 ? maxString : minString;}
1350 			int spaces=width-s.length();
1351 			assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
1352 			for(int i=0; i<spaces; i++){sb.append(' ');}
1353 			sb.append(s);
1354 		}
1355 
1356 		return sb.toString();
1357 	}
1358 
1359 	public static final String toString(byte[] a){
1360 
1361 		int width=6;
1362 
1363 		StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1364 		for(int num : a){
1365 			String s=" "+num;
1366 			int spaces=width-s.length();
1367 			assert(spaces>=0);
1368 			for(int i=0; i<spaces; i++){sb.append(' ');}
1369 			sb.append(s);
1370 		}
1371 
1372 		return sb.toString();
1373 	}
1374 
1375 	public static final String toString(byte[] ref, int startLoc, int stopLoc){
1376 		StringBuilder sb=new StringBuilder(stopLoc-startLoc+1);
1377 		for(int i=startLoc; i<=stopLoc; i++){sb.append((char)ref[i]);}
1378 		return sb.toString();
1379 	}
1380 
1381 	public static int calcDelScore(int len){
1382 		if(len<=0){return 0;}
1383 		int score=POINTS_DEL;
1384 
1385 		if(len>LIMIT_FOR_COST_4){
1386 			score+=(len-LIMIT_FOR_COST_4)*POINTS_DEL4;
1387 			len=LIMIT_FOR_COST_4;
1388 		}
1389 		if(len>LIMIT_FOR_COST_3){
1390 			score+=(len-LIMIT_FOR_COST_3)*POINTS_DEL3;
1391 			len=LIMIT_FOR_COST_3;
1392 		}
1393 		if(len>1){
1394 			score+=(len-1)*POINTS_DEL2;
1395 		}
1396 		return score;
1397 	}
1398 
1399 	private static int calcDelScoreOffset(int len){
1400 		if(len<=0){return 0;}
1401 		int score=POINTSoff_DEL;
1402 
1403 		if(len>LIMIT_FOR_COST_4){
1404 			score+=(len-LIMIT_FOR_COST_4)*POINTSoff_DEL4;
1405 			len=LIMIT_FOR_COST_4;
1406 		}
1407 		if(len>LIMIT_FOR_COST_3){
1408 			score+=(len-LIMIT_FOR_COST_3)*POINTSoff_DEL3;
1409 			len=LIMIT_FOR_COST_3;
1410 		}
1411 		if(len>1){
1412 			score+=(len-1)*POINTSoff_DEL2;
1413 		}
1414 		return score;
1415 	}
1416 
1417 	private static int calcMatchScoreOffset(int len){
1418 		if(len<=0){return 0;}
1419 		int score=POINTSoff_MATCH;
1420 
1421 		if(len>1){
1422 			score+=(len-1)*POINTSoff_MATCH2;
1423 		}
1424 		return score;
1425 	}
1426 
1427 	private static int calcSubScoreOffset(int len){
1428 		if(len<=0){return 0;}
1429 		int score=POINTSoff_SUB;
1430 
1431 		if(len>LIMIT_FOR_COST_3){
1432 			score+=(len-LIMIT_FOR_COST_3)*POINTSoff_SUB3;
1433 			len=LIMIT_FOR_COST_3;
1434 		}
1435 		if(len>1){
1436 			score+=(len-1)*POINTSoff_SUB2;
1437 		}
1438 		return score;
1439 	}
1440 
1441 	public static int calcInsScore(int len){
1442 		if(len<=0){return 0;}
1443 		int score=POINTS_INS;
1444 
1445 		if(len>LIMIT_FOR_COST_4){
1446 			score+=(len-LIMIT_FOR_COST_4)*POINTS_INS4;
1447 			len=LIMIT_FOR_COST_4;
1448 		}
1449 		if(len>LIMIT_FOR_COST_3){
1450 			score+=(len-LIMIT_FOR_COST_3)*POINTS_INS3;
1451 			len=LIMIT_FOR_COST_3;
1452 		}
1453 		if(len>1){
1454 			score+=(len-1)*POINTS_INS2;
1455 		}
1456 		return score;
1457 	}
1458 
1459 	private static int calcInsScoreOffset(int len){
1460 		if(len<=0){return 0;}
1461 		int score=POINTSoff_INS;
1462 		if(len>LIMIT_FOR_COST_4){
1463 			score+=(len-LIMIT_FOR_COST_4)*POINTSoff_INS4;
1464 			len=LIMIT_FOR_COST_4;
1465 		}
1466 		if(len>LIMIT_FOR_COST_3){
1467 			score+=(len-LIMIT_FOR_COST_3)*POINTSoff_INS3;
1468 			len=LIMIT_FOR_COST_3;
1469 		}
1470 		if(len>1){
1471 			score+=(len-1)*POINTSoff_INS2;
1472 		}
1473 		return score;
1474 	}
1475 
1476 
1477 	private final int maxRows;
1478 	private final int maxColumns;
1479 
1480 	private final int[][][] packed;
1481 
1482 	private final int[] vertLimit;
1483 	private final int[] horizLimit;
1484 
1485 	private final int[] insScoreArray;
1486 	private final int[] delScoreArray;
1487 	private final int[] matchScoreArray;
1488 	private final int[] subScoreArray;
1489 
1490 	CharSequence showVertLimit(){
1491 		StringBuilder sb=new StringBuilder();
1492 		for(int i=0; i<=rows; i++){sb.append(vertLimit[i]>>SCOREOFFSET).append(",");}
1493 		return sb;
1494 	}
1495 	CharSequence showHorizLimit(){
1496 		StringBuilder sb=new StringBuilder();
1497 		for(int i=0; i<=columns; i++){sb.append(horizLimit[i]>>SCOREOFFSET).append(",");}
1498 		return sb;
1499 	}
1500 
1501 //	public static final int MODEBITS=2;
1502 	public static final int TIMEBITS=12;
1503 	public static final int SCOREBITS=32-TIMEBITS;
1504 	public static final int MAX_TIME=((1<<TIMEBITS)-1);
1505 	public static final int MAX_SCORE=((1<<(SCOREBITS-1))-1)-2000;
1506 	public static final int MIN_SCORE=0-MAX_SCORE; //Keeps it 1 point above "BAD".
1507 
1508 //	public static final int MODEOFFSET=0; //Always zero.
1509 //	public static final int TIMEOFFSET=0;
1510 	public static final int SCOREOFFSET=TIMEBITS;
1511 
1512 //	public static final int MODEMASK=~((-1)<<MODEBITS);
1513 //	public static final int TIMEMASK=(~((-1)<<TIMEBITS))<<TIMEOFFSET;
1514 	public static final int TIMEMASK=~((-1)<<TIMEBITS);
1515 	public static final int SCOREMASK=(~((-1)<<SCOREBITS))<<SCOREOFFSET;
1516 
1517 	private static final byte MODE_MS=0;
1518 	private static final byte MODE_DEL=1;
1519 	private static final byte MODE_INS=2;
1520 	private static final byte MODE_SUB=3;
1521 
1522 	public static final int POINTS_NOREF=-10;
1523 	public static final int POINTS_NOCALL=-10;
1524 	public static final int POINTS_MATCH=90;
1525 	public static final int POINTS_MATCH2=100; //Note:  Changing to 90 substantially reduces false positives
1526 	public static final int POINTS_COMPATIBLE=50;
1527 	public static final int POINTS_SUB=-143;
1528 	public static final int POINTS_SUBR=-161; //increased penalty if prior match streak was at most 1
1529 	public static final int POINTS_SUB2=-54;
1530 	public static final int POINTS_SUB3=-35;
1531 	public static final int POINTS_MATCHSUB=-10;
1532 	public static final int POINTS_INS=-207;
1533 	public static final int POINTS_INS2=-51;
1534 	public static final int POINTS_INS3=-37;
1535 	public static final int POINTS_INS4=-15;
1536 	public static final int POINTS_DEL=-273;
1537 	public static final int POINTS_DEL2=-38;
1538 	public static final int POINTS_DEL3=-27;
1539 	public static final int POINTS_DEL4=-15;
1540 	public static final int POINTS_DEL_REF_N=-10;
1541 
1542 
1543 	public static final int LIMIT_FOR_COST_3=5;
1544 	public static final int LIMIT_FOR_COST_4=30;
1545 
1546 	public static final int BAD=MIN_SCORE-1;
1547 
1548 
1549 	public static final int POINTSoff_NOREF=(POINTS_NOREF<<SCOREOFFSET);
1550 	public static final int POINTSoff_NOCALL=(POINTS_NOCALL<<SCOREOFFSET);
1551 	public static final int POINTSoff_MATCH=(POINTS_MATCH<<SCOREOFFSET);
1552 	public static final int POINTSoff_MATCH2=(POINTS_MATCH2<<SCOREOFFSET);
1553 	public static final int POINTSoff_COMPATIBLE=(POINTS_COMPATIBLE<<SCOREOFFSET);
1554 	public static final int POINTSoff_SUB=(POINTS_SUB<<SCOREOFFSET);
1555 	public static final int POINTSoff_SUBR=(POINTS_SUBR<<SCOREOFFSET);
1556 	public static final int POINTSoff_SUB2=(POINTS_SUB2<<SCOREOFFSET);
1557 	public static final int POINTSoff_SUB3=(POINTS_SUB3<<SCOREOFFSET);
1558 	public static final int POINTSoff_MATCHSUB=(POINTS_MATCHSUB<<SCOREOFFSET);
1559 	public static final int POINTSoff_INS=(POINTS_INS<<SCOREOFFSET);
1560 	public static final int POINTSoff_INS2=(POINTS_INS2<<SCOREOFFSET);
1561 	public static final int POINTSoff_INS3=(POINTS_INS3<<SCOREOFFSET);
1562 	public static final int POINTSoff_INS4=(POINTS_INS4<<SCOREOFFSET);
1563 	public static final int POINTSoff_DEL=(POINTS_DEL<<SCOREOFFSET);
1564 	public static final int POINTSoff_DEL2=(POINTS_DEL2<<SCOREOFFSET);
1565 	public static final int POINTSoff_DEL3=(POINTS_DEL3<<SCOREOFFSET);
1566 	public static final int POINTSoff_DEL4=(POINTS_DEL4<<SCOREOFFSET);
1567 	public static final int POINTSoff_DEL_REF_N=(POINTS_DEL_REF_N<<SCOREOFFSET);
1568 	public static final int BADoff=(BAD<<SCOREOFFSET);
1569 	public static final int MAXoff_SCORE=MAX_SCORE<<SCOREOFFSET;
1570 	public static final int MINoff_SCORE=MIN_SCORE<<SCOREOFFSET;
1571 
1572 	private int rows;
1573 	private int columns;
1574 
1575 	public long iterationsLimited=0;
1576 	public long iterationsUnlimited=0;
1577 
1578 	public boolean verbose=false;
1579 	public boolean verbose2=false;
1580 
1581 }
1582