1 package var2;
2 
3 import shared.Parse;
4 import shared.Tools;
5 
6 public class VarFilter {
7 
parse(String a, String b, String arg)8 	public boolean parse(String a, String b, String arg){
9 		if(a.equals("minreads") || a.equals("minad") || a.equals("minalleledepth") || a.equals("mincount")){
10 			minAlleleDepth=Integer.parseInt(b);
11 		}else if(a.equals("maxreads") || a.equals("maxad") || a.equals("maxalleledepth")){
12 			maxAlleleDepth=Integer.parseInt(b);
13 		}else if(a.equals("mincov") || a.equals("mincoverage") || a.equals("mindepth")){
14 			minCov=Integer.parseInt(b);
15 		}else if(a.equals("maxcov") || a.equals("maxcoverage") || a.equals("maxdepth")){
16 			maxCov=Integer.parseInt(b);
17 		}else if(a.equals("minqualitymax") || a.equals("minmaxquality")){
18 			minMaxQuality=Integer.parseInt(b);
19 		}else if(a.equals("minedistmax") || a.equals("minmaxedist")){
20 			minMaxEdist=Integer.parseInt(b);
21 		}else if(a.equals("minmapqmax") || a.equals("minmaxmapq")){
22 			minMaxMapq=Integer.parseInt(b);
23 		}else if(a.equals("minidmax") || a.equals("minmaxid")){
24 			minMaxIdentity=Double.parseDouble(b);
25 			if(minMaxIdentity>1){minMaxIdentity/=100;}
26 		}
27 
28 		else if(a.equals("minpairingrate") || a.equals("minpairrate")){
29 			minPairingRate=Double.parseDouble(b);
30 		}else if(a.equals("minstrandratio")){
31 			minStrandRatio=Double.parseDouble(b);
32 		}else if(a.equals("minscore")){
33 			minScore=Double.parseDouble(b);
34 		}else if(a.equals("maxscore")){
35 			maxScore=Double.parseDouble(b);
36 		}else if(a.equals("minquality") || a.equals("minavgquality") || a.equals("maq")){
37 			minAvgQuality=Double.parseDouble(b);
38 		}else if(a.equals("maxquality") || a.equals("maxavgquality")){
39 			maxAvgQuality=Double.parseDouble(b);
40 		}else if(a.equals("minedist") || a.equals("minavgedist") || a.equals("mae")){
41 			minAvgEdist=Double.parseDouble(b);
42 		}else if(a.equals("minavgmapq")){
43 			minAvgMapq=Double.parseDouble(b);
44 		}else if(a.equals("maxavgmapq")){
45 			maxAvgMapq=Double.parseDouble(b);
46 		}else if(a.equals("minallelefraction") || a.equals("minallelefrequency") || a.equals("maf")){
47 			minAlleleFraction=Double.parseDouble(b);
48 		}else if(a.equals("maxallelefraction") || a.equals("maxallelefrequency")){
49 			maxAlleleFraction=Double.parseDouble(b);
50 		}else if(a.equals("minidentity") || a.equals("mid") || a.equals("minid")){
51 			minIdentity=Double.parseDouble(b);
52 			if(minIdentity>1){minIdentity/=100;}
53 		}else if(a.equals("maxidentity") || a.equals("maxid")){
54 			maxIdentity=Double.parseDouble(b);
55 			if(maxIdentity>1 && maxIdentity<=100){maxIdentity/=100;}
56 		}else if(a.equals("lowcoveragepenalty") || a.equals("lowcovpenalty") || a.equals("covpenalty")){
57 			Var.lowCoveragePenalty=Double.parseDouble(b);
58 			assert(Var.lowCoveragePenalty>=0) : "Low coverage penalty must be at least 0.";
59 		}
60 
61 		//For adjacent nearby 'rainbow' SNPs
62 		else if(a.equals("adjacentcount") || a.equals("nearbycount") ||
63 				a.equals("maxnearbycount") || a.equals("maxnearby")){
64 			maxNearbyCount=Parse.parseIntKMG(b);
65 			assert(maxNearbyCount>=1); //1 means at least 1 nearby var
66 			maxNearbyCount=Tools.max(maxNearbyCount, 1);
67 		}else if(a.equals("failadjacent") || a.equals("failnearby")){
68 			failNearby=Parse.parseBoolean(b);
69 		}else if(a.equals("flagadjacent") || a.equals("flagnearby")){
70 			flagNearby=Parse.parseBoolean(b);
71 		}else if(a.equals("penalizeadjacent") || a.equals("penalizenearby")){
72 			penalizeNearby=Parse.parseBoolean(b);
73 		}else if(a.equals("adjacentdist") || a.equals("nearbydist") ||
74 				a.equals("nearbyrange") || a.equals("maxnearbydist")){
75 			nearbyDist=Parse.parseIntKMG(b);
76 			assert(nearbyDist>0); //0 means itself
77 		}else if(a.equals("nearbygap") || a.equals("maxnearbygap") || a.equals("nearbyslop")){
78 			nearbyGap=Parse.parseIntKMG(b);
79 			assert(nearbyGap>=0); //0 means on top of each other
80 		}
81 
82 		else if(a.equals("rarity")){
83 			rarity=Double.parseDouble(b);
84 			assert(rarity>=0 && rarity<=1);
85 			minAlleleFraction=Tools.min(minAlleleFraction, rarity);
86 		}
87 
88 		else if(a.equals("clearfilters")){
89 			if(Parse.parseBoolean(b)){clear();}
90 		}else{
91 			return false;
92 		}
93 		return true;
94 	}
95 
clear()96 	public void clear(){
97 		minAlleleDepth=-1;
98 		maxAlleleDepth=Integer.MAX_VALUE;
99 		minCov=-1;
100 		maxCov=Integer.MAX_VALUE;
101 
102 		minMaxQuality=0;
103 		minMaxEdist=0;
104 		minMaxMapq=0;
105 		minMaxIdentity=0;
106 
107 		minPairingRate=0;
108 		minStrandRatio=0;
109 		minScore=0;
110 		minAvgQuality=0;
111 		minAvgEdist=0;
112 		minAvgMapq=0;
113 		minAlleleFraction=0;
114 		minIdentity=0;
115 
116 		maxScore=Integer.MAX_VALUE;
117 		maxAvgQuality=Integer.MAX_VALUE;
118 		maxAvgMapq=Integer.MAX_VALUE;
119 		maxAlleleFraction=Integer.MAX_VALUE;
120 		maxIdentity=Integer.MAX_VALUE;
121 
122 		maxNearbyCount=-1;
123 		failNearby=false;
124 	}
125 
126 
setFrom(VarFilter filter)127 	public void setFrom(VarFilter filter) {
128 		minAlleleDepth=filter.minAlleleDepth;
129 		maxAlleleDepth=filter.maxAlleleDepth;
130 		minCov=filter.minCov;
131 		maxCov=filter.maxCov;
132 
133 		minMaxQuality=filter.minMaxQuality;
134 		minMaxEdist=filter.minMaxEdist;
135 		minMaxMapq=filter.minMaxMapq;
136 		minMaxIdentity=filter.minMaxIdentity;
137 
138 		minPairingRate=filter.minPairingRate;
139 		minStrandRatio=filter.minStrandRatio;
140 		minScore=filter.minScore;
141 		minAvgQuality=filter.minAvgQuality;
142 		minAvgEdist=filter.minAvgEdist;
143 		minAvgMapq=filter.minAvgMapq;
144 		minAlleleFraction=filter.minAlleleFraction;
145 		minIdentity=filter.minIdentity;
146 
147 		maxScore=filter.maxScore;
148 		maxAvgQuality=filter.maxAvgQuality;
149 		maxAvgMapq=filter.maxAvgMapq;
150 		maxAlleleFraction=filter.maxAlleleFraction;
151 		maxIdentity=filter.maxIdentity;
152 	}
153 
passesFast(Var v)154 	public boolean passesFast(Var v){
155 		if(v.forced()){return true;}
156 		final int count=v.alleleCount();
157 		if(count<minAlleleDepth || count>maxAlleleDepth){return false;}
158 		if(v.baseQMax<minMaxQuality){return false;}
159 		if(v.endDistMax<minMaxEdist){return false;}
160 		if(v.mapQMax<minMaxMapq){return false;}
161 //		final int cov=v.coverage();
162 //		if(cov<minCov || cov>maxCov){return false;}
163 		return true;
164 	}
165 
passesFilter(Var v, double pairingRate, double totalQualityAvg, double totalMapqAvg, double readLengthAvg, int ploidy, ScafMap map, boolean considerNearby)166 	public boolean passesFilter(Var v, double pairingRate, double totalQualityAvg,
167 			double totalMapqAvg, double readLengthAvg, int ploidy, ScafMap map, boolean considerNearby){
168 		final int count=v.alleleCount();
169 		//System.err.println("a0: "+count+", "+v.r1minus+", "+ v.r1plus+", "+ v.r2minus+", "+ v.r2plus);
170 //		assert(false) : v.r1minus+", "+ v.r1plus+", "+ v.r2minus+", "+ v.r2plus;
171 		if(count<minAlleleDepth || count>maxAlleleDepth){return false;}
172 		final int cov=v.coverage();
173 		if(cov<minCov || cov>maxCov){return false;}
174 		if(v.baseQMax<minMaxQuality){return false;}
175 		if(v.endDistMax<minMaxEdist){return false;}
176 		if(v.mapQMax<minMaxMapq){return false;}
177 		if(considerNearby && failNearby){
178 			assert(v.nearbyVarCount>=0) : "Nearby vars were not counted.";
179 			if(v.nearbyVarCount>maxNearbyCount){return false;}
180 		}
181 
182 		//System.err.println("d");
183 		if(v.idMax*0.001f<minMaxIdentity){return false;}
184 		//System.err.println("e");
185 
186 		//Slower, uses division.
187 //		if(pairingRate>0 && minPairingRate>0 && v.pairingRate()<minPairingRate){return false;}
188 //		if(minStrandRatio>0 && v.strandRatio()<minStrandRatio){return false;}
189 //		if(minAvgQuality>0 && v.baseQAvg()<minAvgQuality){return false;}
190 //		if(minAvgEdist>0 && v.edistAvg()<minAvgEdist){return false;}
191 //		if(minAvgMapq>0 && v.mapQAvg()<minAvgMapq){return false;}
192 
193 		if(pairingRate>0 && minPairingRate>0 && count*minPairingRate>v.properPairCount){return false;}
194 		//System.err.println("f");
195 		if(minAvgQuality>0 && count*minAvgQuality>v.baseQSum){return false;}
196 		//System.err.println("g");
197 		if(minAvgEdist>0 && count*minAvgEdist>v.endDistSum){return false;}
198 		//System.err.println("h");
199 		if(minAvgMapq>0 && count*minAvgMapq>v.mapQSum){return false;}
200 		//System.err.println("i");
201 		if(minIdentity>0 && count*minIdentity*1000>v.idSum){return false;}
202 		//System.err.println("j");
203 
204 		if(maxAvgQuality<Integer.MAX_VALUE && count*maxAvgQuality<v.baseQSum){return false;}
205 		//System.err.println("k");
206 		if(maxAvgMapq<Integer.MAX_VALUE && count*maxAvgMapq<v.mapQSum){return false;}
207 		//System.err.println("l");
208 		if(maxIdentity<Integer.MAX_VALUE && count*maxIdentity*1000<v.idSum){return false;}
209 		//System.err.println("m");
210 
211 		if(minStrandRatio>0 && v.strandRatio()<minStrandRatio){return false;}
212 		//System.err.println("n");
213 
214 		if(minAlleleFraction>0 && v.coverage()>0){
215 			final double af=v.revisedAlleleFraction==-1 ? v.alleleFraction() : v.revisedAlleleFraction;
216 			if(af<minAlleleFraction){return false;}
217 		}
218 		//System.err.println("o");
219 		if(maxAlleleFraction<Integer.MAX_VALUE && v.coverage()>0){
220 			final double af=v.revisedAlleleFraction==-1 ? v.alleleFraction() : v.revisedAlleleFraction;
221 			if(af>maxAlleleFraction){return false;}
222 		}
223 		//System.err.println("p");
224 
225 		if(minScore>0 || maxScore<Integer.MAX_VALUE){
226 			double phredScore=v.phredScore(pairingRate, totalQualityAvg, totalMapqAvg, readLengthAvg, rarity, ploidy, map);
227 //			assert(false) : minScore+", "+maxScore+", "+phredScore+"\n"
228 //					+pairingRate+", "+totalQualityAvg+", "+totalMapqAvg+", "+readLengthAvg+", "+rarity+", "+ploidy+"\n"+
229 //					v.toBasicHeader()+"\n"+v.toString()+"\n"+v.count();
230 			if(phredScore<minScore || phredScore>maxScore){return false;}
231 		}
232 		//System.err.println("q");
233 
234 		return true;
235 	}
236 
toString(double pairingRate, int ploidy)237 	public String toString(double pairingRate, int ploidy){
238 		StringBuilder sb=new StringBuilder();
239 
240 		sb.append("pairingRate=").append(pairingRate).append("\n");
241 		sb.append("ploidy=").append(ploidy).append("\n");
242 
243 		sb.append("minReads=").append(minAlleleDepth).append("\n");
244 		sb.append("maxReads=").append(maxAlleleDepth).append("\n");
245 		sb.append("minCov=").append(minCov).append("\n");
246 		sb.append("maxCov=").append(maxCov).append("\n");
247 		sb.append("minMaxQuality=").append(minMaxQuality).append("\n");
248 		sb.append("minMaxEdist=").append(minMaxEdist).append("\n");
249 		sb.append("minMaxMapq=").append(minMaxMapq).append("\n");
250 		sb.append("minMaxIdentity=").append(minMaxIdentity).append("\n");
251 
252 		sb.append("minPairingRate=").append(minPairingRate).append("\n");
253 		sb.append("minStrandRatio=").append(minStrandRatio).append("\n");
254 		sb.append("minScore=").append(minScore).append("\n");
255 		sb.append("minAvgQuality=").append(minAvgQuality).append("\n");
256 		sb.append("minAvgEdist=").append(minAvgEdist).append("\n");
257 		sb.append("minAvgMapq=").append(minAvgMapq).append("\n");
258 		sb.append("minAlleleFraction=").append(minAlleleFraction);
259 		sb.append("minIdentity=").append(minIdentity);
260 
261 		return sb.toString();
262 	}
263 
264 	public int minAlleleDepth=2;
265 	public int maxAlleleDepth=Integer.MAX_VALUE;
266 	public int minCov=-1;
267 	public int maxCov=Integer.MAX_VALUE;
268 
269 	public int minMaxQuality=15;
270 	public int minMaxEdist=20;
271 	public int minMaxMapq=0;
272 	public double minMaxIdentity=0;
273 
274 	public double minPairingRate=0.1;
275 	public double minStrandRatio=0.1;
276 	public double minScore=20;
277 	public double maxScore=Integer.MAX_VALUE;
278 	public double minAvgQuality=12;
279 	public double maxAvgQuality=Integer.MAX_VALUE;
280 	public double minAvgEdist=10;
281 	public double minAvgMapq=0;
282 	public double maxAvgMapq=Integer.MAX_VALUE;
283 	public double minAlleleFraction=0.1;
284 	public double maxAlleleFraction=Integer.MAX_VALUE;
285 	public double minIdentity=0;
286 	public double maxIdentity=Integer.MAX_VALUE;
287 	public double rarity=1;
288 
289 	public int maxNearbyCount=1;//Max nearby allowed before being flagged or failed
290 	public int nearbyDist=20;
291 	public int nearbyGap=2;
292 
293 	public boolean flagNearby=false;
294 	public boolean failNearby=false;
295 	public boolean penalizeNearby=false;
296 	public boolean countNearbyVars=true;
297 
298 }
299