1 package var2; 2 3 import align2.MSA; 4 import dna.AminoAcid; 5 import shared.Shared; 6 import shared.Tools; 7 import stream.Read; 8 import stream.SamLine; 9 import stream.SiteScore; 10 11 public class Realigner { 12 Realigner()13 public Realigner(){ 14 this(defaultMaxrows, defaultColumns, defaultPadding, defaultMsaType); 15 } 16 Realigner(int maxrows_, int columns_, int padding_, String msaType_)17 public Realigner(int maxrows_, int columns_, int padding_, String msaType_){ 18 maxrows=maxrows_; 19 columns=columns_; 20 padding=padding_; 21 msaType=msaType_; 22 msa=MSA.makeMSA(maxrows, columns+2, msaType); 23 } 24 realign(Read r, SamLine sl, final boolean unclip)25 public boolean realign(Read r, SamLine sl, final boolean unclip){ 26 if(!r.mapped() || sl.supplementary() || !sl.primary()){return false;} 27 Scaffold scaf=map.getScaffold(sl.rnameS()); 28 assert(scaf!=null) : sl.rnameS(); 29 return realign(r, sl, scaf, unclip); 30 } 31 realign(final Read r, final SamLine sl, final Scaffold scaf, final boolean unclip)32 public boolean realign(final Read r, final SamLine sl, final Scaffold scaf, final boolean unclip){ 33 return realign(r, sl, scaf.bases, unclip); 34 } 35 realign(final Read r, final SamLine sl, final byte[] ref, final boolean unclip)36 public boolean realign(final Read r, final SamLine sl, final byte[] ref, final boolean unclip){ 37 if(!r.mapped() || sl.supplementary() || !sl.primary()){return false;} 38 int[] mSCNID=r.countMatchSymbols(); 39 int sumBad=mSCNID[1]+mSCNID[4]+mSCNID[5], sumIndel=mSCNID[4]+mSCNID[5]; 40 if(mSCNID[2]>0){//continue 41 }else if(sumBad>3){//continue 42 }else if(sumIndel>1 || (sumIndel>0 && mSCNID[1]>1)){//continue 43 }else{return false;} 44 45 if(mSCNID[1]<3 && mSCNID[2]==0 && mSCNID[4]<2 && mSCNID[5]<2 && sumBad<3 && sumIndel<2){return false;} 46 47 if(r.length()+2>msa.maxColumns+2*padding){ 48 msa=MSA.makeMSA(msa.maxRows, r.length()+2+r.length()/4+2*padding, msaType); 49 } 50 if(r.length()+2>msa.maxRows){ 51 msa=MSA.makeMSA(r.length()+2+r.length()/4+2*padding, msa.maxColumns, msaType); 52 } 53 54 final int leadingClip=SamLine.countLeadingClip(sl.cigar, true, false); 55 final int trailingClip=SamLine.countTrailingClip(sl.cigar, true, false); 56 final int totalClip=leadingClip+trailingClip; 57 final boolean clipped=totalClip>0; 58 59 final int start=sl.start(true, false); 60 final int stop=sl.stop(start, true, false); 61 final int paddedStart=start-padding, paddedStop=stop+padding; 62 final int len0=paddedStop-paddedStart+1; 63 final int a=0, b=len0-1; 64 if(len0>=columns){return false;} 65 66 realignmentsAttempted++; 67 68 SiteScore ss=null; 69 final byte[] rbases=makeRbases(ref, start, stop, padding); 70 byte[] qbases=r.bases; 71 if(sl.strand()==Shared.MINUS){ 72 qbases=AminoAcid.reverseComplementBases(qbases); 73 } 74 75 // SiteScore oldSS=r.toSite(); //123 Slow 76 // oldSS.match=r.match.clone(); //123 Slow 77 // String oldSL=sl.toString(); //123 Slow 78 // assert(oldSS.lengthsAgree()) : oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n"; 79 // assert(!oldSS.matchContainsXY()) : oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n"; 80 81 assert(!r.shortmatch()); //Otherwise convert it or change the score function. 82 final int score0=msa.score(r.match); 83 final int minScore=clipped ? 1 : Tools.max(1, score0-1); 84 85 final int[] max; 86 max=msa.fillLimited(qbases, rbases, a, b, minScore, null); 87 if(max==null){return false;} 88 89 final int[] score=msa.score(qbases, rbases, a, b, max[0], max[1], max[2], false); 90 if(score==null){return false;} 91 realignmentsSucceeded++; 92 93 if(score[0]<=minScore || (score[0]<=score0 && !clipped)){return false;} 94 realignmentsImproved++; 95 // assert(false) : score[0]+", "+minScore+", "+score0; 96 97 ss=new SiteScore(1, r.strand(), score[1], score[2], 1, score[0]); 98 ss.setSlowScore(ss.quickScore); 99 ss.score=ss.quickScore; 100 ss.match=msa.traceback(qbases, rbases, a, b, score[3], score[4], score[5], false); 101 assert(ss.match!=null); 102 103 SiteScore oldSS2=ss.clone(); //123 Slow 104 oldSS2.match=ss.match.clone(); //123 Slow 105 // assert(ss.lengthsAgree()) : ss.start+", "+ss.stop+", "+ss.matchLength()+", "+ss.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+ss+"\n" 106 // +oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n" 107 // ; 108 // assert(!oldSS.matchContainsXY()) : ss.start+", "+ss.stop+", "+ss.matchLength()+", "+ss.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+ss+"\n" 109 // +oldSS.start+", "+oldSS.stop+", "+oldSS.matchLength()+", "+oldSS.mappedLength()+", "+scaf.length+"\n"+sl+"\n"+oldSS+"\n" 110 // ; 111 112 //These pass 113 // assert(sl.strand()==ss.strand()) : sl.strand()+", "+ss.strand()+", "+r.strand()+", "+oldSS.strand()+", "+oldSS2.strand(); 114 // assert(sl.strand()==r.strand()); 115 // assert(sl.strand()==oldSS.strand()); 116 // assert(sl.strand()==oldSS2.strand()); 117 118 // SiteScore old=r.toSite(); 119 120 121 //Correct for adjusted ref 122 ss.start=ss.start+paddedStart; 123 ss.stop=ss.stop+paddedStart; 124 125 { 126 int clipped2=ss.clipTipIndels(ref.length); 127 if(unclip && clipped2>0 && ref!=null && oldSS2.match[oldSS2.match.length-1]=='Y'){ 128 ss.unclip(qbases, ref); 129 // System.err.println(sl.strand()+"\n"+new String(new String(oldSS2.match)+"\n"+new String(ss.match))); 130 } 131 } 132 133 if(ss.matchContainsXY()){return false;} 134 if(ss.matchContainsAB()){return false;} 135 136 realignmentsRetained++; 137 138 r.start=ss.start; 139 r.stop=ss.stop; 140 141 // System.err.println("old:\t"+old+"\nnew:\t"+ss+"\nold score:"+score0+"\n"); 142 // assert(false); 143 144 r.match=ss.match; 145 sl.pos=Tools.max(0, r.start)+1; 146 sl.cigar=SamLine.toCigar14(r.match, r.start, r.stop, ref.length, qbases); 147 sl.optional=null; 148 149 // assert((ss.start>=0) == (SamLine.countLeadingClip(sl.cigar, true, true)==0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n"+ss+"\n" 150 // +oldSL+"\n"+oldSS+"\n"+oldSS2+"\n" 151 // ; 152 // assert((ss.stop<scaf.length) == (SamLine.countTrailingClip(sl.cigar, true, true)==0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n" 153 // +oldSL+"\n"+oldSS+"\n"+oldSS2+"\n" 154 // ; 155 156 // assert((ss.start>=0) || (SamLine.countLeadingClip(sl.cigar, true, true)>0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n"+ss+"\n" 157 // +oldSL+"\n"+oldSS+"\n"+oldSS2+"\n" 158 // ; 159 // assert((ss.stop<scaf.length) || (SamLine.countTrailingClip(sl.cigar, true, true)>0)) : ss.start+", "+ss.stop+", "+scaf.length+"\n"+sl+"\n" 160 // +oldSL+"\n"+oldSS+"\n"+oldSS2+"\n" 161 // ; 162 163 //Perform clipping 164 r.match=SamLine.cigarToShortMatch_old(sl.cigar, true); 165 r.setShortMatch(true); 166 r.toLongMatchString(true); 167 168 return true; 169 } 170 makeRbases(byte[] bases, int start, int stop, int padding)171 private static byte[] makeRbases(byte[] bases, int start, int stop, int padding) { 172 final int start2=start-padding, stop2=stop+padding; 173 byte[] out=new byte[stop2-start2+1]; 174 for(int opos=0, bpos=start2; opos<out.length; opos++, bpos++){ 175 byte b=(bpos<0 || bpos>=bases.length ? (byte)'N' : bases[bpos]); 176 out[opos]=b; 177 } 178 return out; 179 } 180 msa()181 public MSA msa(){return msa;} 182 183 long realignmentsAttempted=0; 184 long realignmentsSucceeded=0; 185 long realignmentsRetained=0; 186 long realignmentsImproved=0; 187 188 private int maxrows=602; 189 private int columns=2000; 190 private int padding=100; 191 private String msaType; 192 private MSA msa; 193 194 public static int defaultMaxrows=603; 195 public static int defaultColumns=2000; 196 public static int defaultPadding=200; 197 public static String defaultMsaType="MultiStateAligner11ts"; 198 public static ScafMap map=null; 199 200 } 201