1 package var2; 2 3 import shared.Parse; 4 import shared.Tools; 5 6 public class VarFilter { 7 parse(String a, String b, String arg)8 public boolean parse(String a, String b, String arg){ 9 if(a.equals("minreads") || a.equals("minad") || a.equals("minalleledepth") || a.equals("mincount")){ 10 minAlleleDepth=Integer.parseInt(b); 11 }else if(a.equals("maxreads") || a.equals("maxad") || a.equals("maxalleledepth")){ 12 maxAlleleDepth=Integer.parseInt(b); 13 }else if(a.equals("mincov") || a.equals("mincoverage") || a.equals("mindepth")){ 14 minCov=Integer.parseInt(b); 15 }else if(a.equals("maxcov") || a.equals("maxcoverage") || a.equals("maxdepth")){ 16 maxCov=Integer.parseInt(b); 17 }else if(a.equals("minqualitymax") || a.equals("minmaxquality")){ 18 minMaxQuality=Integer.parseInt(b); 19 }else if(a.equals("minedistmax") || a.equals("minmaxedist")){ 20 minMaxEdist=Integer.parseInt(b); 21 }else if(a.equals("minmapqmax") || a.equals("minmaxmapq")){ 22 minMaxMapq=Integer.parseInt(b); 23 }else if(a.equals("minidmax") || a.equals("minmaxid")){ 24 minMaxIdentity=Double.parseDouble(b); 25 if(minMaxIdentity>1){minMaxIdentity/=100;} 26 } 27 28 else if(a.equals("minpairingrate") || a.equals("minpairrate")){ 29 minPairingRate=Double.parseDouble(b); 30 }else if(a.equals("minstrandratio")){ 31 minStrandRatio=Double.parseDouble(b); 32 }else if(a.equals("minscore")){ 33 minScore=Double.parseDouble(b); 34 }else if(a.equals("maxscore")){ 35 maxScore=Double.parseDouble(b); 36 }else if(a.equals("minquality") || a.equals("minavgquality") || a.equals("maq")){ 37 minAvgQuality=Double.parseDouble(b); 38 }else if(a.equals("maxquality") || a.equals("maxavgquality")){ 39 maxAvgQuality=Double.parseDouble(b); 40 }else if(a.equals("minedist") || a.equals("minavgedist") || a.equals("mae")){ 41 minAvgEdist=Double.parseDouble(b); 42 }else if(a.equals("minavgmapq")){ 43 minAvgMapq=Double.parseDouble(b); 44 }else if(a.equals("maxavgmapq")){ 45 maxAvgMapq=Double.parseDouble(b); 46 }else if(a.equals("minallelefraction") || a.equals("minallelefrequency") || a.equals("maf")){ 47 minAlleleFraction=Double.parseDouble(b); 48 }else if(a.equals("maxallelefraction") || a.equals("maxallelefrequency")){ 49 maxAlleleFraction=Double.parseDouble(b); 50 }else if(a.equals("minidentity") || a.equals("mid") || a.equals("minid")){ 51 minIdentity=Double.parseDouble(b); 52 if(minIdentity>1){minIdentity/=100;} 53 }else if(a.equals("maxidentity") || a.equals("maxid")){ 54 maxIdentity=Double.parseDouble(b); 55 if(maxIdentity>1 && maxIdentity<=100){maxIdentity/=100;} 56 }else if(a.equals("lowcoveragepenalty") || a.equals("lowcovpenalty") || a.equals("covpenalty")){ 57 Var.lowCoveragePenalty=Double.parseDouble(b); 58 assert(Var.lowCoveragePenalty>=0) : "Low coverage penalty must be at least 0."; 59 } 60 61 //For adjacent nearby 'rainbow' SNPs 62 else if(a.equals("adjacentcount") || a.equals("nearbycount") || 63 a.equals("maxnearbycount") || a.equals("maxnearby")){ 64 maxNearbyCount=Parse.parseIntKMG(b); 65 assert(maxNearbyCount>=1); //1 means at least 1 nearby var 66 maxNearbyCount=Tools.max(maxNearbyCount, 1); 67 }else if(a.equals("failadjacent") || a.equals("failnearby")){ 68 failNearby=Parse.parseBoolean(b); 69 }else if(a.equals("flagadjacent") || a.equals("flagnearby")){ 70 flagNearby=Parse.parseBoolean(b); 71 }else if(a.equals("penalizeadjacent") || a.equals("penalizenearby")){ 72 penalizeNearby=Parse.parseBoolean(b); 73 }else if(a.equals("adjacentdist") || a.equals("nearbydist") || 74 a.equals("nearbyrange") || a.equals("maxnearbydist")){ 75 nearbyDist=Parse.parseIntKMG(b); 76 assert(nearbyDist>0); //0 means itself 77 }else if(a.equals("nearbygap") || a.equals("maxnearbygap") || a.equals("nearbyslop")){ 78 nearbyGap=Parse.parseIntKMG(b); 79 assert(nearbyGap>=0); //0 means on top of each other 80 } 81 82 else if(a.equals("rarity")){ 83 rarity=Double.parseDouble(b); 84 assert(rarity>=0 && rarity<=1); 85 minAlleleFraction=Tools.min(minAlleleFraction, rarity); 86 } 87 88 else if(a.equals("clearfilters")){ 89 if(Parse.parseBoolean(b)){clear();} 90 }else{ 91 return false; 92 } 93 return true; 94 } 95 clear()96 public void clear(){ 97 minAlleleDepth=-1; 98 maxAlleleDepth=Integer.MAX_VALUE; 99 minCov=-1; 100 maxCov=Integer.MAX_VALUE; 101 102 minMaxQuality=0; 103 minMaxEdist=0; 104 minMaxMapq=0; 105 minMaxIdentity=0; 106 107 minPairingRate=0; 108 minStrandRatio=0; 109 minScore=0; 110 minAvgQuality=0; 111 minAvgEdist=0; 112 minAvgMapq=0; 113 minAlleleFraction=0; 114 minIdentity=0; 115 116 maxScore=Integer.MAX_VALUE; 117 maxAvgQuality=Integer.MAX_VALUE; 118 maxAvgMapq=Integer.MAX_VALUE; 119 maxAlleleFraction=Integer.MAX_VALUE; 120 maxIdentity=Integer.MAX_VALUE; 121 122 maxNearbyCount=-1; 123 failNearby=false; 124 } 125 126 setFrom(VarFilter filter)127 public void setFrom(VarFilter filter) { 128 minAlleleDepth=filter.minAlleleDepth; 129 maxAlleleDepth=filter.maxAlleleDepth; 130 minCov=filter.minCov; 131 maxCov=filter.maxCov; 132 133 minMaxQuality=filter.minMaxQuality; 134 minMaxEdist=filter.minMaxEdist; 135 minMaxMapq=filter.minMaxMapq; 136 minMaxIdentity=filter.minMaxIdentity; 137 138 minPairingRate=filter.minPairingRate; 139 minStrandRatio=filter.minStrandRatio; 140 minScore=filter.minScore; 141 minAvgQuality=filter.minAvgQuality; 142 minAvgEdist=filter.minAvgEdist; 143 minAvgMapq=filter.minAvgMapq; 144 minAlleleFraction=filter.minAlleleFraction; 145 minIdentity=filter.minIdentity; 146 147 maxScore=filter.maxScore; 148 maxAvgQuality=filter.maxAvgQuality; 149 maxAvgMapq=filter.maxAvgMapq; 150 maxAlleleFraction=filter.maxAlleleFraction; 151 maxIdentity=filter.maxIdentity; 152 } 153 passesFast(Var v)154 public boolean passesFast(Var v){ 155 if(v.forced()){return true;} 156 final int count=v.alleleCount(); 157 if(count<minAlleleDepth || count>maxAlleleDepth){return false;} 158 if(v.baseQMax<minMaxQuality){return false;} 159 if(v.endDistMax<minMaxEdist){return false;} 160 if(v.mapQMax<minMaxMapq){return false;} 161 // final int cov=v.coverage(); 162 // if(cov<minCov || cov>maxCov){return false;} 163 return true; 164 } 165 passesFilter(Var v, double pairingRate, double totalQualityAvg, double totalMapqAvg, double readLengthAvg, int ploidy, ScafMap map, boolean considerNearby)166 public boolean passesFilter(Var v, double pairingRate, double totalQualityAvg, 167 double totalMapqAvg, double readLengthAvg, int ploidy, ScafMap map, boolean considerNearby){ 168 final int count=v.alleleCount(); 169 //System.err.println("a0: "+count+", "+v.r1minus+", "+ v.r1plus+", "+ v.r2minus+", "+ v.r2plus); 170 // assert(false) : v.r1minus+", "+ v.r1plus+", "+ v.r2minus+", "+ v.r2plus; 171 if(count<minAlleleDepth || count>maxAlleleDepth){return false;} 172 final int cov=v.coverage(); 173 if(cov<minCov || cov>maxCov){return false;} 174 if(v.baseQMax<minMaxQuality){return false;} 175 if(v.endDistMax<minMaxEdist){return false;} 176 if(v.mapQMax<minMaxMapq){return false;} 177 if(considerNearby && failNearby){ 178 assert(v.nearbyVarCount>=0) : "Nearby vars were not counted."; 179 if(v.nearbyVarCount>maxNearbyCount){return false;} 180 } 181 182 //System.err.println("d"); 183 if(v.idMax*0.001f<minMaxIdentity){return false;} 184 //System.err.println("e"); 185 186 //Slower, uses division. 187 // if(pairingRate>0 && minPairingRate>0 && v.pairingRate()<minPairingRate){return false;} 188 // if(minStrandRatio>0 && v.strandRatio()<minStrandRatio){return false;} 189 // if(minAvgQuality>0 && v.baseQAvg()<minAvgQuality){return false;} 190 // if(minAvgEdist>0 && v.edistAvg()<minAvgEdist){return false;} 191 // if(minAvgMapq>0 && v.mapQAvg()<minAvgMapq){return false;} 192 193 if(pairingRate>0 && minPairingRate>0 && count*minPairingRate>v.properPairCount){return false;} 194 //System.err.println("f"); 195 if(minAvgQuality>0 && count*minAvgQuality>v.baseQSum){return false;} 196 //System.err.println("g"); 197 if(minAvgEdist>0 && count*minAvgEdist>v.endDistSum){return false;} 198 //System.err.println("h"); 199 if(minAvgMapq>0 && count*minAvgMapq>v.mapQSum){return false;} 200 //System.err.println("i"); 201 if(minIdentity>0 && count*minIdentity*1000>v.idSum){return false;} 202 //System.err.println("j"); 203 204 if(maxAvgQuality<Integer.MAX_VALUE && count*maxAvgQuality<v.baseQSum){return false;} 205 //System.err.println("k"); 206 if(maxAvgMapq<Integer.MAX_VALUE && count*maxAvgMapq<v.mapQSum){return false;} 207 //System.err.println("l"); 208 if(maxIdentity<Integer.MAX_VALUE && count*maxIdentity*1000<v.idSum){return false;} 209 //System.err.println("m"); 210 211 if(minStrandRatio>0 && v.strandRatio()<minStrandRatio){return false;} 212 //System.err.println("n"); 213 214 if(minAlleleFraction>0 && v.coverage()>0){ 215 final double af=v.revisedAlleleFraction==-1 ? v.alleleFraction() : v.revisedAlleleFraction; 216 if(af<minAlleleFraction){return false;} 217 } 218 //System.err.println("o"); 219 if(maxAlleleFraction<Integer.MAX_VALUE && v.coverage()>0){ 220 final double af=v.revisedAlleleFraction==-1 ? v.alleleFraction() : v.revisedAlleleFraction; 221 if(af>maxAlleleFraction){return false;} 222 } 223 //System.err.println("p"); 224 225 if(minScore>0 || maxScore<Integer.MAX_VALUE){ 226 double phredScore=v.phredScore(pairingRate, totalQualityAvg, totalMapqAvg, readLengthAvg, rarity, ploidy, map); 227 // assert(false) : minScore+", "+maxScore+", "+phredScore+"\n" 228 // +pairingRate+", "+totalQualityAvg+", "+totalMapqAvg+", "+readLengthAvg+", "+rarity+", "+ploidy+"\n"+ 229 // v.toBasicHeader()+"\n"+v.toString()+"\n"+v.count(); 230 if(phredScore<minScore || phredScore>maxScore){return false;} 231 } 232 //System.err.println("q"); 233 234 return true; 235 } 236 toString(double pairingRate, int ploidy)237 public String toString(double pairingRate, int ploidy){ 238 StringBuilder sb=new StringBuilder(); 239 240 sb.append("pairingRate=").append(pairingRate).append("\n"); 241 sb.append("ploidy=").append(ploidy).append("\n"); 242 243 sb.append("minReads=").append(minAlleleDepth).append("\n"); 244 sb.append("maxReads=").append(maxAlleleDepth).append("\n"); 245 sb.append("minCov=").append(minCov).append("\n"); 246 sb.append("maxCov=").append(maxCov).append("\n"); 247 sb.append("minMaxQuality=").append(minMaxQuality).append("\n"); 248 sb.append("minMaxEdist=").append(minMaxEdist).append("\n"); 249 sb.append("minMaxMapq=").append(minMaxMapq).append("\n"); 250 sb.append("minMaxIdentity=").append(minMaxIdentity).append("\n"); 251 252 sb.append("minPairingRate=").append(minPairingRate).append("\n"); 253 sb.append("minStrandRatio=").append(minStrandRatio).append("\n"); 254 sb.append("minScore=").append(minScore).append("\n"); 255 sb.append("minAvgQuality=").append(minAvgQuality).append("\n"); 256 sb.append("minAvgEdist=").append(minAvgEdist).append("\n"); 257 sb.append("minAvgMapq=").append(minAvgMapq).append("\n"); 258 sb.append("minAlleleFraction=").append(minAlleleFraction); 259 sb.append("minIdentity=").append(minIdentity); 260 261 return sb.toString(); 262 } 263 264 public int minAlleleDepth=2; 265 public int maxAlleleDepth=Integer.MAX_VALUE; 266 public int minCov=-1; 267 public int maxCov=Integer.MAX_VALUE; 268 269 public int minMaxQuality=15; 270 public int minMaxEdist=20; 271 public int minMaxMapq=0; 272 public double minMaxIdentity=0; 273 274 public double minPairingRate=0.1; 275 public double minStrandRatio=0.1; 276 public double minScore=20; 277 public double maxScore=Integer.MAX_VALUE; 278 public double minAvgQuality=12; 279 public double maxAvgQuality=Integer.MAX_VALUE; 280 public double minAvgEdist=10; 281 public double minAvgMapq=0; 282 public double maxAvgMapq=Integer.MAX_VALUE; 283 public double minAlleleFraction=0.1; 284 public double maxAlleleFraction=Integer.MAX_VALUE; 285 public double minIdentity=0; 286 public double maxIdentity=Integer.MAX_VALUE; 287 public double rarity=1; 288 289 public int maxNearbyCount=1;//Max nearby allowed before being flagged or failed 290 public int nearbyDist=20; 291 public int nearbyGap=2; 292 293 public boolean flagNearby=false; 294 public boolean failNearby=false; 295 public boolean penalizeNearby=false; 296 public boolean countNearbyVars=true; 297 298 } 299