1 package icecream; 2 3 import java.util.ArrayList; 4 5 import shared.Tools; 6 import stream.Read; 7 import stream.SamLine; 8 import structures.IntList; 9 10 /** 11 * Container for the list of reads from a single 12 * PacBio ZMW. 13 * @author Brian Bushnell 14 * @date June 5, 2020 15 */ 16 public class ZMW extends ArrayList<Read> { 17 18 /** 19 * For serialization. 20 */ 21 private static final long serialVersionUID = -2580124131008824113L; 22 ZMW()23 public ZMW(){super();} 24 ZMW(int initialSize)25 public ZMW(int initialSize){super(initialSize);} 26 countBases()27 public long countBases(){ 28 long x=0; 29 for(Read r : this){ 30 x+=r.length(); 31 } 32 return x; 33 } 34 medianLength(boolean includeDiscarded)35 public int medianLength(boolean includeDiscarded){ 36 if(size()<3){return -1;} 37 IntList lengths=new IntList(size()-2); 38 39 for(int i=1; i<size()-1; i++){ 40 Read r=get(i); 41 if(includeDiscarded || !r.discarded()){ 42 lengths.add(get(i).length()); 43 } 44 } 45 lengths.sort(); 46 int median=lengths.get(lengths.size/2); 47 return median; 48 } 49 longestLength(boolean includeDiscarded)50 public int longestLength(boolean includeDiscarded){ 51 int max=0; 52 for(Read r : this){ 53 if(includeDiscarded || !r.discarded()){ 54 max=Tools.max(max, r.length()); 55 } 56 } 57 return max; 58 } 59 medianRead(boolean includeDiscarded)60 public Read medianRead(boolean includeDiscarded){ 61 int len=medianLength(includeDiscarded); 62 if(len<0){return longestRead(includeDiscarded);} 63 for(int i=1; i<size()-1; i++){ 64 Read r=get(i); 65 if((includeDiscarded || !r.discarded()) && r.length()==len){ 66 return r; 67 } 68 } 69 return null; 70 } 71 longestRead(boolean includeDiscarded)72 public Read longestRead(boolean includeDiscarded){ 73 Read max=null; 74 for(Read r : this){ 75 if((includeDiscarded || !r.discarded()) && (max==null || r.length()>max.length())){max=r;} 76 } 77 return max; 78 } 79 zid()80 public int zid(){ 81 if(zid==-1){parseZID();} 82 return zid; 83 } 84 parseZID()85 private int parseZID(){ 86 return (size()<1 ? -1 : PBHeader.parseZMW(get(0).id)); 87 } 88 fixReadHeader(Read r, int leftTrim, int rightTrim)89 public static void fixReadHeader(Read r, int leftTrim, int rightTrim){ 90 leftTrim=Tools.max(0, leftTrim); 91 rightTrim=Tools.max(0, rightTrim); 92 if(leftTrim<1 && rightTrim<1){return;} 93 final int idx=r.id.lastIndexOf('/'); 94 if(idx>0 && idx<r.id.length()-3){ 95 String prefix=r.id.substring(0, idx+1); 96 String suffix=r.id.substring(idx+1); 97 if(suffix.indexOf('_')>0){ 98 String coords=suffix, comment=""; 99 int tab=suffix.indexOf('\t'); 100 if(tab<0){tab=suffix.indexOf(' ');} 101 if(tab>0){ 102 coords=coords.substring(0, tab); 103 comment=coords.substring(tab); 104 } 105 String[] split=Tools.underscorePattern.split(coords); 106 int left=Integer.parseInt(split[0]); 107 int right=Integer.parseInt(split[1]); 108 left+=leftTrim; 109 right-=rightTrim; 110 if(left>right){left=right;} 111 112 if(right-left!=r.length()){right=left+r.length();} 113 // System.err.println(r.length()+", "+(right-left)); 114 115 r.id=prefix+left+"_"+right+comment; 116 final SamLine sl=r.samline; 117 if(sl!=null){ 118 sl.qname=r.id; 119 if(sl.optional!=null){ 120 for(int i=0; i<sl.optional.size(); i++){ 121 String s=sl.optional.get(i); 122 if(s.startsWith("qe:i:")){ 123 s="qe:i:"+right; 124 sl.optional.set(i, s); 125 }else if(s.startsWith("qs:i:")){ 126 s="qs:i:"+left; 127 sl.optional.set(i, s); 128 } 129 } 130 } 131 } 132 } 133 } 134 } 135 setDiscarded(boolean b)136 public void setDiscarded(boolean b){ 137 for(Read r : this){ 138 r.setDiscarded(b); 139 } 140 } 141 lengths()142 public int[] lengths() { 143 final int size=size(); 144 int[] array=new int[size]; 145 for(int i=0; i<size; i++){ 146 Read r=get(i); 147 array[i]=r==null ? -1 : r.length(); 148 } 149 return array; 150 } 151 estimatePasses()152 public float estimatePasses(){ 153 final int size=size(); 154 if(size<1){return 0;} 155 else if(size==1){return 0.25f;} 156 else if(size==2){return 0.5f;} 157 158 int median=medianLength(true); 159 int first=first().length(); 160 int last=last().length(); 161 162 return size-2+estimatePasses(first, median)+estimatePasses(last, median); 163 } 164 estimatePasses(int len, int median)165 private float estimatePasses(int len, int median){ 166 float ratio=len/(float)median; 167 //TODO: I want this to be more asymptotic 168 return Tools.min(0.99f, ratio/(1+0.05f*ratio)); 169 } 170 discarded()171 public boolean discarded() { 172 for(Read r : this){ 173 if(!r.discarded()){return false;} 174 } 175 return true; 176 } 177 178 /** 179 * Identifier assigned by streamer, not by PacBio. 180 * First identifier is 0, then 1, etc. 181 */ 182 public long id; 183 184 /** 185 * ZMW ID assigned by PacBio. 186 */ 187 private int zid=-1; 188 first()189 public Read first(){return get(0);} last()190 public Read last(){return get(size()-1);} 191 192 } 193