1 package var2;
2 
3 import align2.MSA;
4 import dna.AminoAcid;
5 import shared.Shared;
6 import shared.Tools;
7 import stream.Read;
8 import stream.SamLine;
9 import stream.SiteScore;
10 
11 public class Realigner {
12 
Realigner()13 	public Realigner(){
14 		this(defaultMaxrows, defaultColumns, defaultPadding, defaultMsaType);
15 	}
16 
Realigner(int maxrows_, int columns_, int padding_, String msaType_)17 	public Realigner(int maxrows_, int columns_, int padding_, String msaType_){
18 		maxrows=maxrows_;
19 		columns=columns_;
20 		padding=padding_;
21 		msaType=msaType_;
22 		msa=MSA.makeMSA(maxrows, columns+2, msaType);
23 	}
24 
realign(Read r, SamLine sl, final boolean unclip)25 	public boolean realign(Read r, SamLine sl, final boolean unclip){
26 		if(!r.mapped() || sl.supplementary() || !sl.primary()){return false;}
27 		Scaffold scaf=map.getScaffold(sl.rnameS());
28 		assert(scaf!=null) : sl.rnameS();
29 		return realign(r, sl, scaf, unclip);
30 	}
31 
realign(final Read r, final SamLine sl, final Scaffold scaf, final boolean unclip)32 	public boolean realign(final Read r, final SamLine sl, final Scaffold scaf, final boolean unclip){
33 		return realign(r, sl, scaf.bases, unclip);
34 	}
35 
realign(final Read r, final SamLine sl, final byte[] ref, final boolean unclip)36 	public boolean realign(final Read r, final SamLine sl, final byte[] ref, final boolean unclip){
37 		if(!r.mapped() || sl.supplementary() || !sl.primary()){return false;}
38 		int[] mSCNID=r.countMatchSymbols();
39 		int sumBad=mSCNID[1]+mSCNID[4]+mSCNID[5], sumIndel=mSCNID[4]+mSCNID[5];
40 		if(mSCNID[2]>0){//continue
41 		}else if(sumBad>3){//continue
42 		}else if(sumIndel>1 || (sumIndel>0 && mSCNID[1]>1)){//continue
43 		}else{return false;}
44 
45 		if(mSCNID[1]<3 && mSCNID[2]==0 && mSCNID[4]<2 && mSCNID[5]<2 && sumBad<3 && sumIndel<2){return false;}
46 
47 		if(r.length()+2>msa.maxColumns+2*padding){
48 			msa=MSA.makeMSA(msa.maxRows, r.length()+2+r.length()/4+2*padding, msaType);
49 		}
50 		if(r.length()+2>msa.maxRows){
51 			msa=MSA.makeMSA(r.length()+2+r.length()/4+2*padding, msa.maxColumns, msaType);
52 		}
53 
54 		final int leadingClip=SamLine.countLeadingClip(sl.cigar, true, false);
55 		final int trailingClip=SamLine.countTrailingClip(sl.cigar, true, false);
56 		final int totalClip=leadingClip+trailingClip;
57 		final boolean clipped=totalClip>0;
58 
59 		final int start=sl.start(true, false);
60 		final int stop=sl.stop(start, true, false);
61 		final int paddedStart=start-padding, paddedStop=stop+padding;
62 		final int len0=paddedStop-paddedStart+1;
63 		final int a=0, b=len0-1;
64 		if(len0>=columns){return false;}
65 
66 		realignmentsAttempted++;
67 
68 		SiteScore ss=null;
69 		final byte[] rbases=makeRbases(ref, start, stop, padding);
70 		byte[] qbases=r.bases;
71 		if(sl.strand()==Shared.MINUS){
72 			qbases=AminoAcid.reverseComplementBases(qbases);
73 		}
74 
75 //		SiteScore oldSS=r.toSite(); //123 Slow
76 //		oldSS.match=r.match.clone(); //123 Slow
77 //		String oldSL=sl.toString(); //123 Slow
78 //		assert(oldSS.lengthsAgree()) : oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n";
79 //		assert(!oldSS.matchContainsXY()) : oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n";
80 
81 		assert(!r.shortmatch()); //Otherwise convert it or change the score function.
82 		final int score0=msa.score(r.match);
83 		final int minScore=clipped ? 1 : Tools.max(1, score0-1);
84 
85 		final int[] max;
86 		max=msa.fillLimited(qbases, rbases, a, b, minScore, null);
87 		if(max==null){return false;}
88 
89 		final int[] score=msa.score(qbases, rbases, a, b, max[0], max[1], max[2], false);
90 		if(score==null){return false;}
91 		realignmentsSucceeded++;
92 
93 		if(score[0]<=minScore || (score[0]<=score0 && !clipped)){return false;}
94 		realignmentsImproved++;
95 //		assert(false) : score[0]+", "+minScore+", "+score0;
96 
97 		ss=new SiteScore(1, r.strand(), score[1], score[2], 1, score[0]);
98 		ss.setSlowScore(ss.quickScore);
99 		ss.score=ss.quickScore;
100 		ss.match=msa.traceback(qbases, rbases, a, b, score[3], score[4], score[5], false);
101 		assert(ss.match!=null);
102 
103 		SiteScore oldSS2=ss.clone(); //123 Slow
104 		oldSS2.match=ss.match.clone(); //123 Slow
105 //		assert(ss.lengthsAgree()) : ss.start+", "+ss.stop+", "+ss.matchLength()+", "+ss.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+ss+"\n"
106 //			+oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n"
107 //			;
108 //		assert(!oldSS.matchContainsXY()) : ss.start+", "+ss.stop+", "+ss.matchLength()+", "+ss.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+ss+"\n"
109 //			+oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n"
110 //			;
111 
112 		//These pass
113 //		assert(sl.strand()==ss.strand()) : sl.strand()+", "+ss.strand()+", "+r.strand()+", "+oldSS.strand()+", "+oldSS2.strand();
114 //		assert(sl.strand()==r.strand());
115 //		assert(sl.strand()==oldSS.strand());
116 //		assert(sl.strand()==oldSS2.strand());
117 
118 //		SiteScore old=r.toSite();
119 
120 
121 		//Correct for adjusted ref
122 		ss.start=ss.start+paddedStart;
123 		ss.stop=ss.stop+paddedStart;
124 
125 		{
126 			int clipped2=ss.clipTipIndels(ref.length);
127 			if(unclip && clipped2>0 && ref!=null && oldSS2.match[oldSS2.match.length-1]=='Y'){
128 				ss.unclip(qbases, ref);
129 //				System.err.println(sl.strand()+"\n"+new String(new String(oldSS2.match)+"\n"+new String(ss.match)));
130 			}
131 		}
132 
133 		if(ss.matchContainsXY()){return false;}
134 		if(ss.matchContainsAB()){return false;}
135 
136 		realignmentsRetained++;
137 
138 		r.start=ss.start;
139 		r.stop=ss.stop;
140 
141 //		System.err.println("old:\t"+old+"\nnew:\t"+ss+"\nold score:"+score0+"\n");
142 //		assert(false);
143 
144 		r.match=ss.match;
145 		sl.pos=Tools.max(0, r.start)+1;
146 		sl.cigar=SamLine.toCigar14(r.match, r.start, r.stop, ref.length, qbases);
147 		sl.optional=null;
148 
149 //		assert((ss.start>=0) == (SamLine.countLeadingClip(sl.cigar, true, true)==0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n"+ss+"\n"
150 //			+oldSL+"\n"+oldSS+"\n"+oldSS2+"\n"
151 //		;
152 //		assert((ss.stop<scaf.length) == (SamLine.countTrailingClip(sl.cigar, true, true)==0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n"
153 //			+oldSL+"\n"+oldSS+"\n"+oldSS2+"\n"
154 //		;
155 
156 //		assert((ss.start>=0) || (SamLine.countLeadingClip(sl.cigar, true, true)>0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n"+ss+"\n"
157 //			+oldSL+"\n"+oldSS+"\n"+oldSS2+"\n"
158 //		;
159 //		assert((ss.stop<scaf.length) || (SamLine.countTrailingClip(sl.cigar, true, true)>0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n"
160 //			+oldSL+"\n"+oldSS+"\n"+oldSS2+"\n"
161 //		;
162 
163 		//Perform clipping
164 		r.match=SamLine.cigarToShortMatch_old(sl.cigar, true);
165 		r.setShortMatch(true);
166 		r.toLongMatchString(true);
167 
168 		return true;
169 	}
170 
makeRbases(byte[] bases, int start, int stop, int padding)171 	private static byte[] makeRbases(byte[] bases, int start, int stop, int padding) {
172 		final int start2=start-padding, stop2=stop+padding;
173 		byte[] out=new byte[stop2-start2+1];
174 		for(int opos=0, bpos=start2; opos<out.length; opos++, bpos++){
175 			byte b=(bpos<0 || bpos>=bases.length ? (byte)'N' : bases[bpos]);
176 			out[opos]=b;
177 		}
178 		return out;
179 	}
180 
msa()181 	public MSA msa(){return msa;}
182 
183 	long realignmentsAttempted=0;
184 	long realignmentsSucceeded=0;
185 	long realignmentsRetained=0;
186 	long realignmentsImproved=0;
187 
188 	private int maxrows=602;
189 	private int columns=2000;
190 	private int padding=100;
191 	private String msaType;
192 	private MSA msa;
193 
194 	public static int defaultMaxrows=603;
195 	public static int defaultColumns=2000;
196 	public static int defaultPadding=200;
197 	public static String defaultMsaType="MultiStateAligner11ts";
198 	public static ScafMap map=null;
199 
200 }
201