1 package shared;
2 
3 import java.io.Serializable;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 
7 import align2.QualityTools;
8 import dna.AminoAcid;
9 import stream.Read;
10 import stream.SamLine;
11 import stream.SiteScore;
12 import structures.ByteBuilder;
13 
14 /**
15  * Helper class for processes that do inline quality trimming.
16  * @author Brian Bushnell
17  * @date Mar 15, 2013
18  *
19  */
20 public final class TrimRead implements Serializable {
21 
22 	/**
23 	 *
24 	 */
25 	private static final long serialVersionUID = 8791743639124592480L;
26 
main(String[] args)27 	public static void main(String[] args){
28 		byte[] bases=args[0].getBytes();
29 		byte[] quals=(args.length<2 ? null : args[1].getBytes());
30 		if(quals!=null){
31 			for(int i=0; i<quals.length; i++){quals[i]-=32;}
32 		}
33 		byte[] match=(args.length<3 ? null : args[2].getBytes());
34 		float minq=(args.length<4 ? 5 : Float.parseFloat(args[3]));
35 		float minE=(float)QualityTools.phredToProbError(minq);
36 		Read r=new Read(bases, quals, 1);
37 		r.match=match;
38 		System.out.println("Before trim:\n"+r.toFastq()+(r.match==null ? "" : "\n"+new String(r.match)));
39 		System.out.println(Arrays.toString(r.quality));
40 		TrimRead tr=trim(r, true, true, minq, minE, 1);
41 		System.out.println("\nAfter trim:\n"+r.toFastq()+(r.match==null ? "" : "\n"+new String(r.match)));
42 		if(r.match==null){
43 			r.match=new byte[r.length()];
44 			for(int i=0; i<r.length(); i++){r.match[i]='m';}
45 		}
46 		tr.untrim();
47 		System.out.println("\nAfter untrim:\n"+r.toFastq()+(r.match==null ? "" : "\n"+new String(r.match)));
48 	}
49 
50 //	public static TrimRead trim(Read r, boolean trimLeft, boolean trimRight, float trimq, int minlen){
51 //		if(r==null || r.bases==null){return null;}
52 //
53 //		final int a, b;
54 //		if(optimalMode){
55 //			long packed=testOptimal(r.bases, r.quality, QualityTools.PROB_ERROR[trimq]);
56 //			a=trimLeft ? (int)((packed>>32)&0xFFFFFFFFL) : 0;
57 //			b=trimRight ? (int)((packed)&0xFFFFFFFFL) : 0;
58 //		}else if(windowMode){
59 //			a=0;
60 //			b=(trimRight ? testRightWindow(r.bases, r.quality, (byte)trimq, windowLength) : 0);
61 //		}else{
62 //			a=(trimLeft ? testLeft(r.bases, r.quality, (byte)trimq) : 0);
63 //			b=(trimRight ? testRight(r.bases, r.quality, (byte)trimq) : 0);
64 //		}
65 //		return (a+b==0 ? null : new TrimRead(r, a, b, trimq, minlen));
66 //	}
67 
trim(Read r, boolean trimLeft, boolean trimRight, float trimq, float avgErrorRate, int minlen)68 	public static TrimRead trim(Read r, boolean trimLeft, boolean trimRight,
69 			float trimq, float avgErrorRate, int minlen){
70 		if(r==null || r.bases==null){return null;}
71 
72 		final int a, b;
73 		if(optimalMode){
74 			long packed=testOptimal(r.bases, r.quality, avgErrorRate);
75 			a=trimLeft ? (int)((packed>>32)&0xFFFFFFFFL) : 0;
76 			b=trimRight ? (int)((packed)&0xFFFFFFFFL) : 0;
77 		}else if(windowMode){
78 			a=0;
79 			b=(trimRight ? testRightWindow(r.bases, r.quality, (byte)trimq, windowLength) : 0);
80 		}else{
81 			a=(trimLeft ? testLeft(r.bases, r.quality, (byte)trimq) : 0);
82 			b=(trimRight ? testRight(r.bases, r.quality, (byte)trimq) : 0);
83 		}
84 		return (a+b==0 ? null : new TrimRead(r, a, b, trimq, minlen));
85 	}
86 
87 	/**
88 	 * @param r Read to trim
89 	 * @param trimLeft Trim left side
90 	 * @param trimRight Trim right side
91 	 * @param trimq Maximum quality to trim
92 	 * @param minResult Ensure trimmed read is at least this long
93 	 * @return Number of bases trimmed
94 	 */
trimFast(Read r, boolean trimLeft, boolean trimRight, float trimq, float avgErrorRate, int minResult)95 	public static int trimFast(Read r, boolean trimLeft, boolean trimRight,
96 			float trimq, float avgErrorRate, int minResult){
97 		return trimFast(r, trimLeft, trimRight, trimq, avgErrorRate, minResult, false);
98 	}
99 
100 	/**
101 	 * @param r Read to trim
102 	 * @param trimLeft Trim left side
103 	 * @param trimRight Trim right side
104 	 * @param trimq Maximum quality to trim
105 	 * @param minResult Ensure trimmed read is at least this long
106 	 * @return Number of bases trimmed
107 	 */
trimFast(Read r, boolean trimLeft, boolean trimRight, float trimq, float avgErrorRate, int minResult, boolean trimClip)108 	public static int trimFast(Read r, boolean trimLeft, boolean trimRight,
109 			float trimq, float avgErrorRate, int minResult, boolean trimClip){
110 		return trimFast(r, trimLeft, trimRight, trimq, avgErrorRate, minResult, 0, trimClip);
111 	}
112 
113 	/**
114 	 * This method allows a "discardUnder" parameter, so that qtrim=r will still discard
115 	 * reads that if left-trimmed, would be shorter than the desired minimum length.
116 	 * It is not presently used.
117 	 * @param r Read to trim
118 	 * @param trimLeft Trim left side
119 	 * @param trimRight Trim right side
120 	 * @param trimq Maximum quality to trim
121 	 * @param minResult Ensure trimmed read is at least this long
122 	 * @param discardUnder Resulting reads shorter than this are not wanted
123 	 * @return Number of bases trimmed
124 	 */
trimFast(Read r, boolean trimLeft, boolean trimRight, float trimq, float avgErrorRate, int minResult, int discardUnder, boolean trimClip)125 	public static int trimFast(Read r, boolean trimLeft, boolean trimRight,
126 			float trimq, float avgErrorRate, int minResult, int discardUnder, boolean trimClip){
127 //		assert(avgErrorRate==(float)QualityTools.phredToProbError(trimq)) : trimq+", "+avgErrorRate+", "+(float)QualityTools.phredToProbError(trimq);
128 		final byte[] bases=r.bases, qual=r.quality;
129 		if(bases==null || bases.length<1){return 0;}
130 
131 //		assert(false) : trimLeft+", "+trimRight+", "+trimq+", "+minResult+", "+discardUnder;
132 
133 		final int len=r.length();
134 		final int a0, b0;
135 		final int a, b;
136 		if(optimalMode){
137 			long packed=testOptimal(bases, qual, avgErrorRate);
138 			a0=(int)((packed>>32)&0xFFFFFFFFL);
139 			b0=(int)((packed)&0xFFFFFFFFL);
140 			if(trimLeft!=trimRight && discardUnder>0 && len-a0-b0<discardUnder){
141 				a=trimLeft ? len : 0;
142 				b=trimRight ? len : 0;
143 			}else{
144 				a=trimLeft ? a0 : 0;
145 				b=trimRight ? b0 : 0;
146 			}
147 //			assert(false) : a0+", "+b0+" -> "+a+", "+b;
148 		}else if(windowMode){
149 			a=0;
150 			b=(trimRight ? testRightWindow(bases, qual, (byte)trimq, windowLength) : 0);
151 		}else{
152 			a=(trimLeft ? testLeft(bases, qual, (byte)trimq) : 0);
153 			b=(trimRight ? testRight(bases, qual, (byte)trimq) : 0);
154 		}
155 		return trimByAmount(r, a, b, minResult, trimClip);
156 	}
157 
untrim(Read r)158 	public static boolean untrim(Read r){
159 		if(r==null || r.obj==null){return false;}
160 		if(r.obj.getClass()!=TrimRead.class){return false;}
161 		TrimRead tr=(TrimRead)r.obj;
162 		return tr.untrim();
163 	}
164 
165 //	public TrimRead(Read r_, boolean trimLeft, boolean trimRight, float trimq_, int minlen_){
166 //		this(r_, (trimLeft ? testLeft(r_.bases, r_.quality, (byte)trimq_) : 0), (trimRight ? testRight(r_.bases, r_.quality, (byte)trimq_) : 0), trimq_, minlen_);
167 //	}
168 
TrimRead(Read r_, int trimLeft, int trimRight, float trimq_, int minlen_)169 	public TrimRead(Read r_, int trimLeft, int trimRight, float trimq_, int minlen_){
170 		minlen_=Tools.max(minlen_, 0);
171 		r=r_;
172 		bases1=r.bases;
173 		qual1=r.quality;
174 		trimq=(byte)trimq_;
175 		assert(bases1!=null || qual1==null) : "\n"+new String(bases1)+"\n"+new String(qual1)+"\n";
176 		assert(bases1==null || qual1==null || bases1.length==qual1.length) : "\n"+new String(bases1)+"\n"+new String(qual1)+"\n";
177 		int trimmed=trim(trimLeft, trimRight, minlen_);
178 		if(trimmed>0){
179 			assert(bases2==null || bases2.length>=minlen_ || bases1.length<minlen_) : bases1.length+", "+bases2.length+", "+minlen_+", "+trimLeft+", "+trimRight;
180 			r.bases=bases2;
181 			r.quality=qual2;
182 			r.obj=this;
183 			trimMatch(r);
184 		}
185 	}
186 
187 //	/** Trim the left end of the read, from left to right */
188 //	private int trim(final boolean trimLeft, final boolean trimRight, final int minlen){
189 //		final int a, b;
190 //		if(optimalMode){
191 //			long packed=testOptimal(bases1, qual1, QualityTools.PROB_ERROR[trimq]);
192 //			a=trimLeft ? (int)((packed>>32)&0xFFFFFFFFL) : 0;
193 //			b=trimRight ? (int)((packed)&0xFFFFFFFFL) : 0;
194 //		}else{
195 //			a=(trimLeft ? testLeft(bases1, qual1, (byte)trimq) : 0);
196 //			b=(trimRight ? testRight(bases1, qual1, (byte)trimq) : 0);
197 //		}
198 //		return trim(a, b, minlen);
199 //	}
200 
201 	/** Trim the left end of the read, from left to right */
trim(int trimLeft, int trimRight, final int minlen)202 	private int trim(int trimLeft, int trimRight, final int minlen){
203 		assert(trimLeft>=0 && trimRight>=0) : "trimLeft="+trimLeft+", trimRight="+trimRight+", minlen="+minlen+", len="+bases1.length;
204 		assert(trimLeft>0 || trimRight>0) : "trimLeft="+trimLeft+", trimRight="+trimRight+", minlen="+minlen+", len="+bases1.length;
205 		final int maxTrim=Tools.min(bases1.length, bases1.length-minlen);
206 		if(trimLeft+trimRight>maxTrim){
207 			int excess=trimLeft+trimRight-maxTrim;
208 			if(trimLeft>0 && excess>0){
209 				trimLeft=Tools.max(0, trimLeft-excess);
210 				excess=trimLeft+trimRight-maxTrim;
211 			}
212 			if(trimRight>0 && excess>0){
213 				trimRight=Tools.max(0, trimRight-excess);
214 				excess=trimLeft+trimRight-maxTrim;
215 			}
216 
217 		}
218 
219 		leftTrimmed=trimLeft;
220 		rightTrimmed=trimRight;
221 		final int sum=leftTrimmed+rightTrimmed;
222 
223 		if(verbose){
224 			System.err.println("leftTrimmed="+leftTrimmed+", rightTrimmed="+rightTrimmed+", sum="+sum);
225 		}
226 
227 		if(sum==0){
228 			bases2=bases1;
229 			qual2=qual1;
230 		}else{
231 			bases2=KillSwitch.copyOfRange(bases1, trimLeft, bases1.length-trimRight);
232 			qual2=((qual1==null || (trimLeft+trimRight>=qual1.length)) ? null : KillSwitch.copyOfRange(qual1, trimLeft, qual1.length-trimRight));
233 		}
234 		return sum;
235 	}
236 
237 	/** Trim bases outside of leftLoc and rightLoc, excluding leftLoc and rightLoc */
trimToPosition(Read r, int leftLoc, int rightLoc, int minResultingLength)238 	public static int trimToPosition(Read r, int leftLoc, int rightLoc, int minResultingLength){
239 		final int len=r.length();
240 		return trimByAmount(r, leftLoc, len-rightLoc-1, minResultingLength, false);
241 	}
242 
243 	/** Remove non-genetic-code from reads */
trimBadSequence(Read r)244 	public static int trimBadSequence(Read r){
245 		final byte[] bases=r.bases, quals=r.quality;
246 		if(bases==null){return 0;}
247 		final int minGenetic=20;
248 		int lastNon=-1;
249 		for(int i=0; i<bases.length; i++){
250 			byte b=bases[i];
251 			if(!AminoAcid.isACGTN(b)){lastNon=i;}
252 			if(i-lastNon>minGenetic){break;}
253 		}
254 		if(lastNon>=0){
255 			r.bases=KillSwitch.copyOfRange(bases, lastNon+1, bases.length);
256 			if(quals!=null){
257 				r.quality=KillSwitch.copyOfRange(quals, lastNon+1, quals.length);
258 			}
259 		}
260 		return lastNon+1;
261 	}
262 
263 	/** Trim this many bases from each end */
trimByAmount(Read r, int leftTrimAmount, int rightTrimAmount, int minResultingLength)264 	public static int trimByAmount(Read r, int leftTrimAmount, int rightTrimAmount, int minResultingLength){
265 		return trimByAmount(r, leftTrimAmount, rightTrimAmount, minResultingLength, false);
266 	}
267 
268 	/** Trim this many bases from each end */
trimByAmount(Read r, int leftTrimAmount, int rightTrimAmount, int minResultingLength, boolean trimClip)269 	public static int trimByAmount(Read r, int leftTrimAmount, int rightTrimAmount, int minResultingLength, boolean trimClip){
270 
271 		leftTrimAmount=Tools.max(leftTrimAmount, 0);
272 		rightTrimAmount=Tools.max(rightTrimAmount, 0);
273 
274 		//These assertions are unnecessary if the mapping information will never be used or output.
275 //		assert(r.match==null) : "TODO: Handle trimming of reads with match strings.";
276 		assert(r.sites==null) : "TODO: Handle trimming of reads with SiteScores.";
277 
278 		if(r.match!=null){
279 			return trimReadWithMatch(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength, Integer.MAX_VALUE, trimClip);
280 		}
281 
282 		final byte[] bases=r.bases, qual=r.quality;
283 		final int len=(bases==null ? 0 : bases.length), qlen=(qual==null ? 0 : qual.length);
284 		if(len<1){return 0;}
285 		minResultingLength=Tools.min(len, Tools.max(minResultingLength, 0));
286 		if(leftTrimAmount+rightTrimAmount+minResultingLength>len){
287 			rightTrimAmount=Tools.max(1, len-minResultingLength);
288 			leftTrimAmount=0;
289 		}
290 
291 		final int total=leftTrimAmount+rightTrimAmount;
292 		if(total>0){
293 			r.bases=KillSwitch.copyOfRange(bases, leftTrimAmount, len-rightTrimAmount);
294 			r.quality=(leftTrimAmount+rightTrimAmount>=qlen ? null : KillSwitch.copyOfRange(qual, leftTrimAmount, qlen-rightTrimAmount));
295 			trimMatch(r);
296 			if(r.stop>r.start){ //TODO:  Fixing mapped coordinates needs more work.
297 				r.start+=leftTrimAmount;
298 				r.stop-=rightTrimAmount;
299 			}
300 		}
301 
302 		if(verbose){
303 			System.err.println("leftTrimmed="+leftTrimAmount+", rightTrimmed="+rightTrimAmount+
304 					", sum="+total+", final length="+r.length());
305 		}
306 		return total;
307 	}
308 
309 	/** Count number of bases that need trimming on each side, and pack into a long */
testOptimal(byte[] bases, byte[] qual, float avgErrorRate)310 	public static long testOptimal(byte[] bases, byte[] qual, float avgErrorRate){
311 		if(optimalBias>=0){avgErrorRate=optimalBias;}//Override
312 		assert(avgErrorRate>0 && avgErrorRate<=1) : "Average error rate ("+avgErrorRate+") must be between 0 (exclusive) and 1 (inclusive)";
313 		if(bases==null || bases.length==0){return 0;}
314 		if(qual==null){return avgErrorRate>=1 ? 0 : ((((long)testLeftN(bases))<<32) | (((long)testRightN(bases))&0xFFFFFFFFL));}
315 
316 		float maxScore=0;
317 		float score=0;
318 		int maxLoc=-1;
319 		int maxCount=-1;
320 		int count=0;
321 
322 		final float nprob=Tools.max(Tools.min(avgErrorRate*1.1f, 1), NPROB);
323 
324 		for(int i=0; i<bases.length; i++){
325 			final byte b=bases[i];
326 			byte q=qual[i];
327 //			float probError=(b=='N' ? nprob : ADJUST_QUALITY ? CalcTrueQuality.estimateErrorProbAvg(qual, bases, i) : QualityTools.PROB_ERROR[q]);
328 //			float probError=(b=='N' ? nprob : ADJUST_QUALITY ? CalcTrueQuality.estimateErrorProbGeoAvg(qual, bases, i) : QualityTools.PROB_ERROR[q]);
329 //			float probError=(b=='N' ? nprob : ADJUST_QUALITY ? CalcTrueQuality.estimateErrorProb2(qual, bases, i) : QualityTools.PROB_ERROR[q]);
330 
331 //			float probError=(b=='N' ? nprob : q==1 ? PROB1 : QualityTools.PROB_ERROR[q]);
332 //			float probError=(b=='N' ? nprob : QualityTools.PROB_ERROR[q]);
333 			float probError=((b=='N' || q<1) ? nprob : QualityTools.PROB_ERROR[q]);
334 
335 //			assert(q>0 || b=='N') : "index "+i+": q="+q+", b="+(char)b+"\n"+new String(bases)+"\n"+Arrays.toString(qual)+"\n";
336 
337 			float delta=avgErrorRate-probError;
338 			score=score+delta;
339 			if(score>0){
340 				count++;
341 				if(score>maxScore || (score==maxScore && count>maxCount)){
342 					maxScore=score;
343 					maxCount=count;
344 					maxLoc=i;
345 				}
346 			}else{
347 				score=0;
348 				count=0;
349 			}
350 		}
351 
352 		final int left, right;
353 		if(maxScore>0){
354 			assert(maxLoc>=0);
355 			assert(maxCount>0);
356 			left=maxLoc-maxCount+1;
357 			assert(left>=0 && left<=bases.length);
358 			right=bases.length-maxLoc-1;
359 		}else{
360 			left=0;
361 			right=bases.length;
362 		}
363 		final long packed=((((long)left)<<32) | (((long)right)&0xFFFFFFFFL));
364 
365 		if(verbose){
366 			System.err.println(Arrays.toString(qual));
367 			System.err.println("After testLocal: maxScore="+maxScore+", maxLoc="+maxLoc+", maxCount="+maxCount+
368 					", left="+left+", right="+right+", returning "+Long.toHexString(packed));
369 		}
370 		return packed;
371 	}
372 
373 	/** Count number of bases that need trimming on left side */
testLeft(byte[] bases, byte[] qual, final byte trimq)374 	private static int testLeft(byte[] bases, byte[] qual, final byte trimq){
375 		if(bases==null || bases.length==0){return 0;}
376 		if(qual==null){return trimq<0 ? 0 : testLeftN(bases);}
377 		int good=0;
378 		int lastBad=-1;
379 		int i=0;
380 		for(; i<bases.length && good<minGoodInterval; i++){
381 			final byte q=qual[i];
382 			final byte b=bases[i];
383 			assert(q>0 || b=='N') : "index "+i+": q="+q+", b="+(char)b+"\n"+new String(bases)+"\n"+Arrays.toString(qual)+"\n";
384 			if(q>trimq){good++;}
385 			else{good=0; lastBad=i;}
386 		}
387 		if(verbose){
388 //			System.err.println(Arrays.toString(qual));
389 			System.err.println("After testLeft: good="+good+", lastBad="+lastBad+", i="+i+", returning "+(lastBad+1));
390 //			assert(false);
391 		}
392 		return lastBad+1;
393 	}
394 
395 	/** Count number of bases that need trimming on right side using a sliding window */
testRightWindow(byte[] bases, byte[] qual, final byte trimq, final int window)396 	private static int testRightWindow(byte[] bases, byte[] qual, final byte trimq, final int window){
397 		if(bases==null || bases.length==0){return 0;}
398 		if(qual==null || qual.length<window){return trimq>0 ? 0 : testRightN(bases);}
399 		final int thresh=Tools.max(window*trimq, 1);
400 		int sum=0;
401 		for(int i=0, j=-window; i<qual.length; i++, j++){
402 			final byte q=qual[i];
403 			sum+=q;
404 			if(j>=-1){
405 				if(j>=0){sum-=qual[j];}
406 				if(sum<thresh){
407 					return qual.length-j-1;
408 				}
409 			}
410 		}
411 		return 0;
412 	}
413 
414 	/** Count number of bases that need trimming on right side */
testRight(byte[] bases, byte[] qual, final byte trimq)415 	private static int testRight(byte[] bases, byte[] qual, final byte trimq){
416 		if(bases==null || bases.length==0){return 0;}
417 		if(qual==null){return trimq<0 ? 0 : testRightN(bases);}
418 		int good=0;
419 		int lastBad=bases.length;
420 		int i=bases.length-1;
421 		for(; i>=0 && good<minGoodInterval; i--){
422 			final byte q=qual[i];
423 			final byte b=bases[i];
424 			assert(q>0 || b=='N') : "index "+i+": q="+q+", b="+(char)b+"\n"+new String(bases)+"\n"+Arrays.toString(qual)+"\n";
425 			if(q>trimq){good++;}
426 			else{good=0; lastBad=i;}
427 		}
428 		if(verbose){
429 			System.err.println("After trimLeft: good="+good+", lastBad="+lastBad+", i="+i+", returning "+(bases.length-lastBad));
430 		}
431 		return bases.length-lastBad;
432 	}
433 
434 	/** Count number of bases that need trimming on left side, considering only N as bad */
testLeftN(byte[] bases)435 	public static int testLeftN(byte[] bases){
436 		if(bases==null || bases.length==0){return 0;}
437 		int good=0;
438 		int lastBad=-1;
439 		for(int i=0; i<bases.length && good<minGoodInterval; i++){
440 			final byte b=bases[i];
441 			//if(dna.AminoAcid.isFullyDefined(b)){good++;}
442 			if(b!=((byte)'N')){good++;}
443 			else{good=0; lastBad=i;}
444 		}
445 		return lastBad+1;
446 	}
447 
448 	/** Count number of bases that need trimming on right side, considering only N as bad */
testRightN(byte[] bases)449 	public static int testRightN(byte[] bases){
450 		if(bases==null || bases.length==0){return 0;}
451 		int good=0;
452 		int lastBad=bases.length;
453 		for(int i=bases.length-1; i>=0 && good<minGoodInterval; i--){
454 			final byte b=bases[i];
455 			//if(dna.AminoAcid.isFullyDefined(b)){good++;}
456 			if(b!=((byte)'N')){good++;}
457 			else{good=0; lastBad=i;}
458 		}
459 		return bases.length-lastBad;
460 	}
461 
untrim()462 	public boolean untrim(){
463 		if(leftTrimmed==0 && rightTrimmed==0){return false;}
464 		r.setPerfect(false);
465 
466 		final int lt, rt;
467 		if(r.strand()==Shared.PLUS){
468 			lt=leftTrimmed;
469 			rt=rightTrimmed;
470 		}else{
471 			lt=rightTrimmed;
472 			rt=leftTrimmed;
473 		}
474 
475 		boolean returnToShort=false;
476 		if(verbose){System.err.println("Untrimming");}
477 		if(r.match!=null){
478 			if(r.shortmatch()){
479 				r.toLongMatchString(false);
480 				returnToShort=true;
481 			}
482 			byte[] match2=new byte[r.match.length+lt+rt];
483 			int i=0;
484 			for(; i<lt; i++){
485 				match2[i]='C';
486 			}
487 			for(int j=0; j<r.match.length; i++, j++){
488 				match2[i]=r.match[j];
489 			}
490 			for(; i<match2.length; i++){
491 				match2[i]='C';
492 			}
493 			r.match=match2;
494 		}
495 		r.bases=bases1;
496 		r.quality=qual1;
497 		r.start-=lt;
498 		r.stop+=rt;
499 		if(returnToShort){
500 			r.toShortMatchString(true);
501 		}
502 
503 		if(r.sites!=null){
504 			for(SiteScore ss : r.sites){untrim(ss);}
505 		}
506 
507 		return true;
508 	}
509 
untrim(SiteScore ss)510 	private boolean untrim(SiteScore ss){
511 		if(ss==null){return false;}
512 		if(leftTrimmed==0 && rightTrimmed==0){return false;}
513 		ss.perfect=ss.semiperfect=false;
514 
515 		final int lt, rt;
516 		if(ss.strand==Shared.PLUS){
517 			lt=leftTrimmed;
518 			rt=rightTrimmed;
519 		}else{
520 			lt=rightTrimmed;
521 			rt=leftTrimmed;
522 		}
523 
524 		boolean returnToShort=false;
525 		if(verbose){System.err.println("Untrimming ss "+ss);}
526 		if(ss.match!=null){
527 
528 			boolean shortmatch=false;
529 			for(byte b : ss.match){
530 				if(Tools.isDigit(b)){shortmatch=true; break;}
531 			}
532 
533 			if(shortmatch){
534 				ss.match=Read.toLongMatchString(ss.match);
535 				returnToShort=true;
536 			}
537 			byte[] match2=new byte[ss.match.length+lt+rt];
538 			int i=0;
539 			for(; i<lt; i++){
540 				match2[i]='C';
541 			}
542 			for(int j=0; j<ss.match.length; i++, j++){
543 				match2[i]=ss.match[j];
544 			}
545 			for(; i<match2.length; i++){
546 				match2[i]='C';
547 			}
548 			ss.match=match2;
549 		}
550 		ss.setLimits(ss.start-lt, ss.stop+rt);
551 		if(returnToShort){ss.match=Read.toShortMatchString(ss.match);}
552 		return true;
553 	}
554 
trimMatch(Read r)555 	private static boolean trimMatch(Read r){
556 		if(r.match==null && r.sites==null){return false;}
557 
558 		//Easy mode!
559 		r.match=null;
560 		if(r.sites!=null){
561 			for(SiteScore ss : r.sites){
562 				if(ss!=null){ss.match=null;}
563 			}
564 		}
565 		return true;
566 
567 		//TODO - need to adjust read start and stop based on this information.  Also check strand!
568 //		byte[] match=r.match;
569 //		if(r.shortmatch()){match=Read.toLongMatchString(match);}
570 //		byte[] match2=new byte[match.length-leftTrimmed-rightTrimmed];
571 //		for(int mpos=0, bpos=0; bpos<leftTrimmed; mpos++){
572 //			byte m=match[mpos];
573 //			if(m=='D'){
574 //				//do nothing
575 //			}else{
576 //				bpos++;
577 //			}
578 //		}
579 	}
580 
581 	/** Special case of 100% match */
trimReadWithMatchFast(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength)582 	public static int trimReadWithMatchFast(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
583 		assert(r.match!=null);
584 		if(r.bases==null){return 0;}
585 		if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
586 		if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
587 
588 		final boolean shortmatch=r.shortmatch();
589 		final byte[] old=r.match;
590 		r.match=null;
591 		final int trimmed;
592 		if(sl.strand()==Shared.MINUS){
593 			trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
594 		}else{
595 			trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
596 		}
597 		if(trimmed<1){
598 			r.match=old;
599 			return 0;
600 		}
601 
602 		ByteBuilder bb=new ByteBuilder();
603 		final int len=r.length();
604 		if(shortmatch){
605 			bb.append((byte)'m');
606 			if(len>1){bb.append(len);}
607 		}else{
608 			for(int i=0; i<len; i++){bb.append((byte)'m');}
609 		}
610 		r.match=bb.toBytes();
611 		bb.clear();
612 
613 		if(sl!=null){
614 			sl.pos+=leftTrimAmount;
615 			if(sl.cigar!=null){
616 				bb.append(len);
617 				bb.append(SamLine.VERSION>1.3 ? '=' : 'm');
618 				sl.cigar=bb.toString();
619 			}
620 			sl.seq=r.bases;
621 			sl.qual=r.quality;
622 			if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
623 				ArrayList<String> list=new ArrayList<String>(2);
624 				for(int i=0; i<sl.optional.size(); i++){
625 					String s=sl.optional.get(i);
626 					if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
627 				}
628 				sl.optional.clear();
629 				sl.optional.addAll(list);
630 			}
631 		}
632 		return trimmed;
633 	}
634 
635 	//TODO: This is slow
636 	//TODO: Note, returns a negative number if the whole read is supposed to be trimmed
trimReadWithMatch(final Read r, final SamLine sl, int leftTrimAmount, int rightTrimAmount, int minFinalLength, int scafLen, boolean trimClip)637 	public static int trimReadWithMatch(final Read r, final SamLine sl,
638 			int leftTrimAmount, int rightTrimAmount, int minFinalLength, int scafLen, boolean trimClip){
639 		if(r.bases==null || (leftTrimAmount<1 && rightTrimAmount<1 && !trimClip)){return 0;}
640 		if(!r.containsNonM() && !trimClip){
641 			return trimReadWithMatchFast(r, sl, leftTrimAmount, rightTrimAmount, minFinalLength);
642 		}
643 
644 //		if(r.match==null){
645 //			assert(sl.cigar==null);
646 //			int trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
647 //			if(sl!=null){
648 //				assert(sl.cigar==null);
649 //				int start=sl.pos-1;
650 //				int stop=start+sl.seq.length;
651 //				sl.seq=r.bases;
652 //				sl.qual=r.quality;
653 //				if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
654 //					ArrayList<String> list=new ArrayList<String>(2);
655 //					for(int i=0; i<sl.optional.size(); i++){
656 //						String s=sl.optional.get(i);
657 //						if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
658 //					}
659 //					sl.optional.clear();
660 //					sl.optional.addAll(list);
661 //				}
662 //			}
663 //		}
664 
665 		if(trimClip){
666 			r.toLongMatchString(false);
667 			byte[] match=r.match;
668 			int leftClip=0, rightClip=0;
669 			for(int i=0; i<match.length; i++){
670 				if(match[i]=='C'){leftClip++;}
671 				else{break;}
672 			}
673 			for(int i=match.length-1; i>=0; i--){
674 				if(match[i]=='C'){rightClip++;}
675 				else{break;}
676 			}
677 			leftTrimAmount=Tools.max(leftTrimAmount, leftClip);
678 			rightTrimAmount=Tools.max(rightTrimAmount, rightClip);
679 		}
680 
681 		if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
682 		if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
683 
684 		final int oldPos=(sl==null ? 1 : sl.pos);
685 
686 		r.toLongMatchString(false);
687 		byte[] match=r.match;
688 
689 		assert(!r.shortmatch());
690 
691 //		System.err.println("Q: "+sl);
692 //		System.err.println(new String(match));
693 
694 		ByteBuilder bb=new ByteBuilder(match.length);
695 //		System.err.println("leftTrimAmount="+leftTrimAmount+", rightTrimAmount="+rightTrimAmount);
696 
697 		int leftTrim=leftTrimAmount>0 ? r.length() : 0, rightTrim=rightTrimAmount>0 ? r.length() : 0;
698 //		System.err.println("leftTrim="+leftTrim+", rightTrim="+rightTrim);
699 		{
700 			final int mlen=match.length;
701 			boolean keep=leftTrimAmount<1;
702 			int rpos=(sl==null ? 1 : sl.pos);
703 			for(int mpos=0, cpos=0; mpos<mlen; mpos++){
704 				final byte m=match[mpos];
705 				if(m=='m' || m=='S' || m=='V' || m=='N'){
706 					cpos++;
707 					rpos++;
708 				}else if(m=='D'){
709 					rpos++;
710 				}else if(m=='I' || m=='C'){
711 					cpos++;
712 				}else if(m=='d'){
713 					rpos++;
714 				}else if(m=='i'){
715 					cpos++;
716 				}else{
717 					assert(false) : "Unknown symbol "+(char)m;
718 				}
719 
720 				if(keep){
721 					bb.append(m);
722 				}else if(cpos>=leftTrimAmount){
723 					byte next=(mpos<mlen-1 ? match[mpos+1] : (byte)'m');
724 					if(m=='m' || m=='S' ||  m=='V' ||  m=='N' || next=='m' || next=='S' || next=='V' || next=='N'){
725 						keep=true;
726 						if(sl!=null){sl.pos=rpos;}
727 						leftTrim=cpos;
728 					}
729 				}
730 			}
731 		}
732 //		System.err.println("R: "+sl);
733 		match=bb.toBytes();
734 		Tools.reverseInPlace(match);
735 		bb.clear();
736 		{
737 			final int mlen=match.length;
738 			boolean keep=rightTrimAmount<1;
739 			for(int mpos=0, cpos=0; mpos<mlen; mpos++){
740 				final byte m=match[mpos];
741 				if(m=='m' || m=='S' || m=='V' || m=='N' || m=='I' || m=='C'){
742 					cpos++;
743 				}else if(m=='D'){
744 				}else if(m=='d'){
745 				}else if(m=='i'){
746 					cpos++;
747 				}else{
748 					assert(false) : "Unknown symbol "+(char)m;
749 				}
750 
751 				if(keep){
752 					bb.append(m);
753 				}else if(cpos>=rightTrimAmount){
754 					byte next=(mpos<mlen-1 ? match[mpos+1] : (byte)'m');
755 //					if(m=='m' || m=='S' ||  m=='V' ||  m=='N' || next=='m' || next=='S' || next=='V' || next=='N'){
756 					if(next!='I' && next!='D' && next!='i' && next!='d'){
757 						keep=true;
758 						rightTrim=cpos;
759 					}
760 //					}
761 				}
762 			}
763 		}
764 //		System.err.println("S: "+sl);
765 		match=bb.toBytes();
766 		Tools.reverseInPlace(match);
767 
768 //		System.err.println("leftTrim="+leftTrim+", rightTrim="+rightTrim);
769 
770 		if(leftTrim+rightTrim>=r.length()){
771 			if(sl!=null){sl.pos=oldPos;}
772 			return -leftTrim-rightTrim;
773 		}
774 
775 //		System.err.println("T: "+sl);
776 		r.match=null;
777 		if(sl!=null && sl.strand()==Shared.MINUS){
778 			int temp=leftTrim;
779 			leftTrim=rightTrim;
780 			rightTrim=temp;
781 		}
782 		final int trimmed=trimByAmount(r, leftTrim, rightTrim, minFinalLength, false);
783 //		System.err.println("tba: "+leftTrim+", "+rightTrim+", "+minFinalLength);
784 		r.match=match;
785 
786 		if(sl!=null){
787 			int start=sl.pos-1;
788 			int stop=start+Read.calcMatchLength(match)-1;
789 			if(SamLine.VERSION>1.3){
790 				sl.cigar=SamLine.toCigar14(match, start, stop, scafLen, r.bases);
791 			}else{
792 				sl.cigar=SamLine.toCigar13(match, start, stop, scafLen, r.bases);
793 			}
794 			sl.seq=r.bases;
795 			sl.qual=r.quality;
796 			if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
797 				ArrayList<String> list=new ArrayList<String>(2);
798 				for(int i=0; i<sl.optional.size(); i++){
799 					String s=sl.optional.get(i);
800 					if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
801 				}
802 				sl.optional.clear();
803 				sl.optional.addAll(list);
804 			}
805 		}
806 		return trimmed;
807 	}
808 
trimmed()809 	public int trimmed() {
810 		return leftTrimmed+rightTrimmed;
811 	}
812 
813 	public final Read r;
814 
815 	/** untrimmed bases */
816 	public final byte[] bases1;
817 	/** untrimmed qualities */
818 	public final byte[] qual1;
819 	/** trimmed bases */
820 	public byte[] bases2;
821 	/** trimmed qualities */
822 	public byte[] qual2;
823 
824 
825 	public final float trimq;
826 	public int leftTrimmed;
827 	public int rightTrimmed;
828 
829 	/** Require this many consecutive good bases to stop trimming.  Minimum is 1.
830 	 * This is for the old trimming mode and not really used anymore */
831 	public static int minGoodInterval=2;
832 
833 	public static boolean verbose=false;
834 	public static boolean optimalMode=true;
835 	public static boolean windowMode=false;
836 	public static float optimalBias=-1f;
837 
838 	public static int windowLength=4;
839 
840 	private static final float NPROB=0.75f;
841 //	public static float PROB1=QualityTools.PROB_ERROR[1];
842 
843 
844 }
845