1 package icecream;
2 
3 import java.util.ArrayList;
4 
5 import shared.Tools;
6 import stream.Read;
7 import stream.SamLine;
8 import structures.IntList;
9 
10 /**
11  * Container for the list of reads from a single
12  * PacBio ZMW.
13  * @author Brian Bushnell
14  * @date June 5, 2020
15  */
16 public class ZMW extends ArrayList<Read> {
17 
18 	/**
19 	 * For serialization.
20 	 */
21 	private static final long serialVersionUID = -2580124131008824113L;
22 
ZMW()23 	public ZMW(){super();}
24 
ZMW(int initialSize)25 	public ZMW(int initialSize){super(initialSize);}
26 
countBases()27 	public long countBases(){
28 		long x=0;
29 		for(Read r : this){
30 			x+=r.length();
31 		}
32 		return x;
33 	}
34 
medianLength(boolean includeDiscarded)35 	public int medianLength(boolean includeDiscarded){
36 		if(size()<3){return -1;}
37 		IntList lengths=new IntList(size()-2);
38 
39 		for(int i=1; i<size()-1; i++){
40 			Read r=get(i);
41 			if(includeDiscarded || !r.discarded()){
42 				lengths.add(get(i).length());
43 			}
44 		}
45 		lengths.sort();
46 		int median=lengths.get(lengths.size/2);
47 		return median;
48 	}
49 
longestLength(boolean includeDiscarded)50 	public int longestLength(boolean includeDiscarded){
51 		int max=0;
52 		for(Read r : this){
53 			if(includeDiscarded || !r.discarded()){
54 				max=Tools.max(max, r.length());
55 			}
56 		}
57 		return max;
58 	}
59 
medianRead(boolean includeDiscarded)60 	public Read medianRead(boolean includeDiscarded){
61 		int len=medianLength(includeDiscarded);
62 		if(len<0){return longestRead(includeDiscarded);}
63 		for(int i=1; i<size()-1; i++){
64 			Read r=get(i);
65 			if((includeDiscarded || !r.discarded()) && r.length()==len){
66 				return r;
67 			}
68 		}
69 		return null;
70 	}
71 
longestRead(boolean includeDiscarded)72 	public Read longestRead(boolean includeDiscarded){
73 		Read max=null;
74 		for(Read r : this){
75 			if((includeDiscarded || !r.discarded()) && (max==null || r.length()>max.length())){max=r;}
76 		}
77 		return max;
78 	}
79 
zid()80 	public int zid(){
81 		if(zid==-1){parseZID();}
82 		return zid;
83 	}
84 
parseZID()85 	private int parseZID(){
86 		return (size()<1 ? -1 : PBHeader.parseZMW(get(0).id));
87 	}
88 
fixReadHeader(Read r, int leftTrim, int rightTrim)89 	public static void fixReadHeader(Read r, int leftTrim, int rightTrim){
90 		leftTrim=Tools.max(0, leftTrim);
91 		rightTrim=Tools.max(0, rightTrim);
92 		if(leftTrim<1 && rightTrim<1){return;}
93 		final int idx=r.id.lastIndexOf('/');
94 		if(idx>0 && idx<r.id.length()-3){
95 			String prefix=r.id.substring(0, idx+1);
96 			String suffix=r.id.substring(idx+1);
97 			if(suffix.indexOf('_')>0){
98 				String coords=suffix, comment="";
99 				int tab=suffix.indexOf('\t');
100 				if(tab<0){tab=suffix.indexOf(' ');}
101 				if(tab>0){
102 					coords=coords.substring(0, tab);
103 					comment=coords.substring(tab);
104 				}
105 				String[] split=Tools.underscorePattern.split(coords);
106 				int left=Integer.parseInt(split[0]);
107 				int right=Integer.parseInt(split[1]);
108 				left+=leftTrim;
109 				right-=rightTrim;
110 				if(left>right){left=right;}
111 
112 				if(right-left!=r.length()){right=left+r.length();}
113 //				System.err.println(r.length()+", "+(right-left));
114 
115 				r.id=prefix+left+"_"+right+comment;
116 				final SamLine sl=r.samline;
117 				if(sl!=null){
118 					sl.qname=r.id;
119 					if(sl.optional!=null){
120 						for(int i=0; i<sl.optional.size(); i++){
121 							String s=sl.optional.get(i);
122 							if(s.startsWith("qe:i:")){
123 								s="qe:i:"+right;
124 								sl.optional.set(i, s);
125 							}else if(s.startsWith("qs:i:")){
126 								s="qs:i:"+left;
127 								sl.optional.set(i, s);
128 							}
129 						}
130 					}
131 				}
132 			}
133 		}
134 	}
135 
setDiscarded(boolean b)136 	public void setDiscarded(boolean b){
137 		for(Read r : this){
138 			r.setDiscarded(b);
139 		}
140 	}
141 
lengths()142 	public int[] lengths() {
143 		final int size=size();
144 		int[] array=new int[size];
145 		for(int i=0; i<size; i++){
146 			Read r=get(i);
147 			array[i]=r==null ? -1 : r.length();
148 		}
149 		return array;
150 	}
151 
estimatePasses()152 	public float estimatePasses(){
153 		final int size=size();
154 		if(size<1){return 0;}
155 		else if(size==1){return 0.25f;}
156 		else if(size==2){return 0.5f;}
157 
158 		int median=medianLength(true);
159 		int first=first().length();
160 		int last=last().length();
161 
162 		return size-2+estimatePasses(first, median)+estimatePasses(last, median);
163 	}
164 
estimatePasses(int len, int median)165 	private float estimatePasses(int len, int median){
166 		float ratio=len/(float)median;
167 		//TODO: I want this to be more asymptotic
168 		return Tools.min(0.99f, ratio/(1+0.05f*ratio));
169 	}
170 
discarded()171 	public boolean discarded() {
172 		for(Read r : this){
173 			if(!r.discarded()){return false;}
174 		}
175 		return true;
176 	}
177 
178 	/**
179 	 * Identifier assigned by streamer, not by PacBio.
180 	 * First identifier is 0, then 1, etc.
181 	 */
182 	public long id;
183 
184 	/**
185 	 * ZMW ID assigned by PacBio.
186 	 */
187 	private int zid=-1;
188 
first()189 	public Read first(){return get(0);}
last()190 	public Read last(){return get(size()-1);}
191 
192 }
193