1 package var; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.HashMap; 6 7 import dna.Data; 8 import fileIO.TextStreamWriter; 9 import shared.Parse; 10 import shared.PreParser; 11 import shared.Shared; 12 import shared.Timer; 13 import shared.Tools; 14 15 public class StackVariations2 { 16 main(String[] args)17 public static void main(String[] args){ 18 {//Preparse block for help, config files, and outstream 19 PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false); 20 args=pp.args; 21 //outstream=pp.outstream; 22 } 23 24 Timer t=new Timer(); 25 26 String inPattern=(args[0].equalsIgnoreCase("null") ? null : args[0]); 27 String outPattern=args[1]; 28 29 assert(!inPattern.equalsIgnoreCase(outPattern)); 30 31 int minChrom=-1; 32 int maxChrom=-1; 33 34 boolean filter=false; 35 Data.GENOME_BUILD=-1; 36 37 for(int i=2; i<args.length; i++){ 38 final String arg=args[i]; 39 final String[] split=arg.split("="); 40 String a=split[0].toLowerCase(); 41 String b=split.length>1 ? split[1] : null; 42 43 if(a.equalsIgnoreCase("filter")){ 44 filter=true; 45 }else if(a.startsWith("filter")){ 46 if(b.equals("1") || b.startsWith("t")){filter=true;} 47 else if(b.equals("0") || b.startsWith("f")){filter=false;} 48 else{throw new RuntimeException("Unknown parameter "+args[i]);} 49 }else if(a.equalsIgnoreCase("strict")){ 50 if(b==null){STRICT=true;} 51 else if(b.equals("1") || b.startsWith("t")){STRICT=true;} 52 else if(b.equals("0") || b.startsWith("f")){STRICT=false;} 53 else{throw new RuntimeException("Unknown parameter "+args[i]);} 54 }else if(a.equals("genome") || a.equals("build")){ 55 Data.setGenome(Integer.parseInt(b)); 56 if(minChrom==-1){minChrom=1;} 57 if(maxChrom==-1){maxChrom=Data.numChroms;} 58 }else if(a.equals("minchrom")){ 59 minChrom=Integer.parseInt(b); 60 }else if(a.equals("maxchrom")){ 61 maxChrom=Integer.parseInt(b); 62 }else if(a.equals("threads") || a.equals("t")){ 63 THREADS=Integer.parseInt(b); 64 }else if(a.equals("minreads")){ 65 MIN_READS_TO_KEEP=Integer.parseInt(b); 66 }else if(a.equals("blocksize")){ 67 GenerateVarlets2.BLOCKSIZE=(Integer.parseInt(b)); 68 }else if(a.equals("deletefiles") || a.startsWith("deletetemp") || a.startsWith("deleteinput") || a.equals("delete")){ 69 DELETE_INPUT=(Parse.parseBoolean(b)); 70 }else{ 71 throw new RuntimeException("Unknown parameter "+args[i]); 72 } 73 } 74 75 assert(minChrom>=0 && maxChrom>=minChrom) : "Please set minchrom and maxchrom."; 76 if(Data.GENOME_BUILD<0){throw new RuntimeException("Please set genome number.");} 77 THREADS=Tools.max(1, THREADS); 78 79 // for(byte i=minChrom; i<=maxChrom; i++){ 80 // String fname1=inPattern.replace("#", i+""); 81 // String fname2=outPattern.replace("#", i+""); 82 // assert(new File(fname1).exists()); 83 // assert(!new File(fname2).exists()); 84 // processFile(fname1, fname2, filter); 85 // } 86 87 runThreaded(inPattern, outPattern, minChrom, maxChrom, filter); 88 89 t.stop(); 90 System.out.println("Input Vars: \t"+(totalIn_global-totalInNR_global)); 91 System.out.println("Input No-ref: \t"+totalInNR_global); 92 System.out.println("Input Delta Length:\t"+deltaLenIn_global); 93 System.out.println(); 94 System.out.println("Kept Vars: \t"+(totalKept_global-totalKeptNR_global)); 95 System.out.println("Kept No-ref: \t"+totalKeptNR_global); 96 System.out.println("Kept Snp: \t"+snpKept_global); 97 System.out.println("Kept Del: \t"+delKept_global+"\t\tLength: \t"+delLenKept_global); 98 System.out.println("Kept Ins: \t"+insKept_global+"\t\tLength: \t"+insLenKept_global); 99 System.out.println("Kept Sub: \t"+subKept_global+"\t\tLength: \t"+subLenKept_global); 100 System.out.println("Kept Delta Length: \t"+deltaLenKept_global); 101 System.out.println("Kept Avg Score: \t"+(scoreKept_global/(Tools.max(1, totalKept_global)))); 102 System.out.println(); 103 System.out.println("Dropped Vars: \t"+(totalDropped_global-totalDroppedNR_global)); 104 System.out.println("Dropped No-ref: \t"+totalDroppedNR_global); 105 System.out.println("Dropped Avg Score: \t"+(scoreDropped_global/Tools.max(1, totalDropped_global))); 106 System.out.println(); 107 System.out.println("Time: \t"+t); 108 } 109 runThreaded(String inPattern, String outPattern, int minChrom, int maxChrom, boolean filter)110 public static final void runThreaded(String inPattern, String outPattern, int minChrom, int maxChrom, boolean filter){ 111 ArrayList<SVThread> svts=new ArrayList<SVThread>(); 112 for(int i=minChrom; i<=maxChrom; i++){ 113 assert(inPattern==null || !inPattern.equalsIgnoreCase(outPattern)); 114 String fname1=inPattern; 115 String fname2=outPattern.replace("#", i+""); 116 addThread(1); 117 SVThread svt=new SVThread(fname1, fname2, i, filter); 118 svts.add(svt); 119 new Thread(svt).start(); 120 } 121 while(addThread(0)>0){} 122 for(SVThread svt : svts){ 123 124 snpKept_global+=svt.snpKept; 125 delKept_global+=svt.delKept; 126 insKept_global+=svt.insKept; 127 subKept_global+=svt.subKept; 128 delLenKept_global+=svt.delLenKept; 129 insLenKept_global+=svt.insLenKept; 130 subLenKept_global+=svt.subLenKept; 131 deltaLenKept_global+=svt.deltaLenKept; 132 133 deltaLenIn_global+=svt.deltaLenIn; 134 totalIn_global+=svt.totalIn; 135 totalInNR_global+=svt.totalInNR; 136 totalKept_global+=svt.totalKept; 137 totalDropped_global+=svt.totalDropped; 138 totalKeptNR_global+=svt.totalKeptNR; 139 totalDroppedNR_global+=svt.totalDroppedNR; 140 scoreKept_global+=svt.scoreKept; 141 scoreDropped_global+=svt.scoreDropped; 142 } 143 } 144 145 passesFilterSNP(Varlet v)146 public static boolean passesFilterSNP(Varlet v){ 147 148 149 //Best so far: 150 151 if(STRICT){ 152 153 if(v.endDist<3){return false;} 154 if(v.tailDist<10){return false;} 155 156 //NOTE! Last thing I did was make this more strict by adding 1 to all the num reads/unique reads required. 157 if(v.minStrandReads()>=2){ 158 159 if(v.errors>2){return false;} 160 if(v.expectedErrors>1.5f){return false;} 161 // if(v.expectedErrors-v.errors>3f){return false;} 162 if(v.maxReadQuality()<18){return false;} 163 if(v.avgReadQuality()<13){return false;} 164 if(v.maxVarQuality()<26){return false;} 165 if(v.avgVarQuality()<18){return false;} 166 if(v.numReads<4){return false;} 167 if(v.numSemiUniqueReads<4){return false;} 168 if(v.numUniqueReads<2){return false;} 169 if(v.paired<3){return false;} 170 171 }else if(v.minStrandReads()>=1){ 172 173 if(v.errors>2){return false;} 174 if(v.expectedErrors>1.2f){return false;} 175 // if(v.expectedErrors-v.errors>3f){return false;} 176 if(v.maxReadQuality()<19){return false;} 177 if(v.avgReadQuality()<14){return false;} 178 if(v.maxVarQuality()<28){return false;} 179 if(v.avgVarQuality()<19){return false;} 180 if(v.numReads<3){return false;} 181 if(v.numSemiUniqueReads<3){return false;} 182 if(v.numUniqueReads<2){return false;} 183 if(v.paired<3){return false;} 184 185 }else{ 186 if(v.endDist<8){return false;} 187 if(v.tailDist<14){return false;} 188 189 if(v.errors>0){return false;} 190 if(v.expectedErrors>0.5f){return false;} 191 // if(v.expectedErrors-v.errors>2f){return false;} 192 if(v.maxReadQuality()<21){return false;} 193 if(v.avgReadQuality()<17){return false;} 194 if(v.maxVarQuality()<30){return false;} 195 if(v.avgVarQuality()<21){return false;} 196 if(v.numReads<6){return false;} 197 if(v.numSemiUniqueReads<5){return false;} 198 if(v.numUniqueReads<3){return false;} 199 if(v.paired<5){return false;} 200 if(v.score()<8100){return false;} 201 } 202 203 // else{ 204 // if(v.endDist<8){return false;} 205 // if(v.tailDist<14){return false;} 206 // 207 // if(v.errors>0){return false;} 208 // if(v.expectedErrors>0.5f){return false;} 209 //// if(v.expectedErrors-v.errors>2f){return false;} 210 // if(v.maxReadQuality()<21){return false;} 211 // if(v.avgReadQuality()<17){return false;} 212 // if(v.maxVarQuality()<30){return false;} 213 // if(v.avgVarQuality()<21){return false;} 214 // if(v.numReads<5){return false;} 215 // if(v.numSemiUniqueReads<4){return false;} 216 // if(v.numUniqueReads<2){return false;} 217 // if(v.paired<4){return false;} 218 // if(v.score()<8100){return false;} 219 // } 220 221 }else{ 222 223 assert(false) : "disabled"; 224 225 } 226 227 228 229 return true; 230 } 231 passesFilterOther(Varlet v)232 public static boolean passesFilterOther(Varlet v){ 233 234 235 236 if(v.endDist<3){return false;} 237 if(v.tailDist<10){return false;} 238 239 //NOTE! Last thing I did was make this more strict by adding 1 to all the num reads/unique reads required. 240 if(v.minStrandReads()>=2){ 241 242 if(v.errors>2){return false;} 243 if(v.expectedErrors>1.5f){return false;} 244 // if(v.expectedErrors-v.errors>3f){return false;} 245 if(v.maxReadQuality()<16){return false;} 246 if(v.avgReadQuality()<12){return false;} 247 if(v.maxVarQuality()<26){return false;} 248 if(v.avgVarQuality()<16){return false;} 249 if(v.numReads<4){return false;} 250 if(v.numSemiUniqueReads<4){return false;} 251 if(v.numUniqueReads<2){return false;} 252 if(v.paired<3){return false;} 253 254 }else if(v.minStrandReads()>=1){ 255 256 if(v.errors>2){return false;} 257 if(v.expectedErrors>1.2f){return false;} 258 // if(v.expectedErrors-v.errors>3f){return false;} 259 if(v.maxReadQuality()<17){return false;} 260 if(v.avgReadQuality()<13){return false;} 261 if(v.maxVarQuality()<28){return false;} 262 if(v.avgVarQuality()<17){return false;} 263 if(v.numReads<4){return false;} 264 if(v.numSemiUniqueReads<4){return false;} 265 if(v.numUniqueReads<2){return false;} 266 if(v.paired<3){return false;} 267 268 }else{ 269 if(v.endDist<8){return false;} 270 if(v.tailDist<14){return false;} 271 272 if(v.errors>0){return false;} 273 if(v.expectedErrors>0.5f){return false;} 274 // if(v.expectedErrors-v.errors>2f){return false;} 275 if(v.maxReadQuality()<20){return false;} 276 if(v.avgReadQuality()<16){return false;} 277 if(v.maxVarQuality()<30){return false;} 278 if(v.avgVarQuality()<20){return false;} 279 if(v.numReads<6){return false;} 280 if(v.numSemiUniqueReads<5){return false;} 281 if(v.numUniqueReads<3){return false;} 282 if(v.paired<5){return false;} 283 if(v.score()<6500){return false;} 284 } 285 286 287 288 289 290 return true; 291 } 292 293 mergeAll(ArrayList<Varlet> vars)294 public static ArrayList<Varlet> mergeAll(ArrayList<Varlet> vars){ 295 if(vars==null || vars.size()==0){return null;} 296 ArrayList<Varlet> out=new ArrayList<Varlet>(8+vars.size()/16); 297 Shared.sort(vars); 298 299 ArrayList<Varlet> temp=new ArrayList<Varlet>(64); 300 for(int i=0; i<vars.size(); i++){ 301 // while(vars.get(i).beginLoc<3746582){i++;} 302 Varlet v=vars.get(i); 303 // System.err.println("Grabbed "+v.beginLoc+" ~ "+v.call); 304 if(temp.isEmpty()){ 305 // System.err.println("Adding "+v.beginLoc+" ~ "+v.call); 306 temp.add(v); 307 }else{ 308 if(v.equals(temp.get(0))){ 309 temp.add(v); 310 // System.err.println("Adding "+v.beginLoc+" ~ "+v.call); 311 }else{ 312 // System.err.println("Merging "+temp.size()+" x "+v.beginLoc+" ~ "+v.call); 313 Varlet result=mergeEqualVarlets(temp); 314 if(result.numReads>MIN_READS_TO_KEEP){ 315 out.add(result); 316 }else if(result.numReads==MIN_READS_TO_KEEP){ 317 if(result.maxVarQuality()>=MIN_QUALITY_AT_MIN_READS && 318 result.errors<=MAX_ERRORS_AT_MIN_READS && 319 result.expectedErrors<=MAX_EXPECTED_ERRORS_AT_MIN_READS && 320 (result.paired>0 || !REQUIRE_PAIRED_AT_MIN_READS)){ 321 out.add(result); 322 } 323 } 324 temp.clear(); 325 temp.add(v); 326 } 327 } 328 329 330 } 331 332 if(!temp.isEmpty()){ 333 if(temp.size()>=MIN_READS_TO_KEEP){ 334 Varlet result=mergeEqualVarlets(temp); 335 out.add(result); 336 } 337 temp.clear(); 338 } 339 340 {//For testing 341 Shared.sort(out); //Should already be sorted... 342 for(int i=1; i<out.size(); i++){ 343 assert(!out.get(i).equals(out.get(i-1))); 344 } 345 } 346 347 348 if(verbose){System.err.println("out.size="+out.size());} 349 350 return out; 351 } 352 353 mergeEqualVarlets(ArrayList<Varlet> vars)354 public static Varlet mergeEqualVarlets(ArrayList<Varlet> vars){ 355 356 // System.err.println("Merging "+vars.size()+" vars."); 357 358 if(vars.size()==1){return vars.get(0);} 359 360 HashMap<Integer, ArrayList<Varlet>> plus=new HashMap<Integer, ArrayList<Varlet>>(Tools.min(8, vars.size())); 361 HashMap<Integer, ArrayList<Varlet>> minus=new HashMap<Integer, ArrayList<Varlet>>(Tools.min(8, vars.size())); 362 363 int numReads=0; 364 int numSemiUniqueReads=0; 365 int numUniqueReads=0; 366 int pairedReads=0; 367 int plusReads1=0; 368 int minusReads1=0; 369 int plusReads2=0; 370 int minusReads2=0; 371 372 int totalQuality=0; 373 int totalVarQuality=0; 374 375 int maxReadQuality=0; 376 int maxVarQuality=0; 377 378 int maxMapScore=0; 379 int bestLen=0; 380 int minReadStart=Integer.MAX_VALUE; 381 int maxReadStop=-999999; 382 383 int maxHeadDist=-1; 384 int maxTailDist=-1; 385 int maxEndDist=-1; 386 387 Varlet bestVar=null; 388 389 int minErrors=999; 390 float minExpectedErrors=999f; 391 392 for(Varlet v : vars){ 393 394 numReads+=v.numReads; 395 numSemiUniqueReads+=v.numSemiUniqueReads; 396 plusReads1+=v.numPlusReads1; 397 minusReads1+=v.numMinusReads1; 398 plusReads2+=v.numPlusReads2; 399 minusReads2+=v.numMinusReads2; 400 401 if(v.errors<minErrors || (v.errors<=minErrors && v.maxReadQuality()>maxReadQuality)){ 402 bestVar=v; 403 } 404 405 totalQuality+=v.avgReadQuality()*v.numReads; 406 maxReadQuality=Tools.max(maxReadQuality, v.maxReadQuality()); 407 408 totalVarQuality+=v.avgVarQuality()*v.numReads; 409 maxVarQuality=Tools.max(maxVarQuality, v.maxVarQuality()); 410 411 if(bestLen==0 || (v.mapScore>=maxMapScore && v.readLen>=bestLen)){ 412 bestLen=v.readLen; 413 } 414 415 maxHeadDist=Tools.max(maxHeadDist, v.headDist); 416 maxTailDist=Tools.max(maxTailDist, v.tailDist); 417 maxEndDist=Tools.max(maxEndDist, v.endDist); 418 419 minErrors=Tools.min(minErrors, v.errors); 420 minExpectedErrors=Tools.min(minExpectedErrors, v.expectedErrors); 421 maxMapScore=Tools.max(maxMapScore, v.mapScore); 422 minReadStart=Tools.min(minReadStart, v.readStart); 423 maxReadStop=Tools.max(maxReadStop, v.readStop); 424 assert(minReadStart<maxReadStop) : "\n"+minReadStart+"\n"+maxReadStop+"\n"+v.toText(); 425 426 pairedReads+=v.paired; 427 428 if(v.strand==Shared.PLUS){ 429 ArrayList<Varlet> value=plus.get(v.readStart); 430 if(value==null){ 431 numUniqueReads++; 432 value=new ArrayList<Varlet>(2); 433 plus.put(v.readStart, value); 434 } 435 value.add(v); 436 }else{ 437 ArrayList<Varlet> value=minus.get(v.readStop); 438 if(value==null){ 439 numUniqueReads++; 440 value=new ArrayList<Varlet>(2); 441 minus.put(v.readStop, value); 442 } 443 value.add(v); 444 } 445 } 446 447 // byte plusReads=(byte) ((plus.isEmpty() ? 0 : 1)+(minus.isEmpty() ? 0 : 1)); 448 449 float avgVarQuality=totalVarQuality/(float)numReads; 450 float avgReadQuality=totalQuality/(float)numReads; 451 452 int netQuality=(int)Math.ceil((avgVarQuality+maxVarQuality)/2); 453 int netReadQuality=(int)Math.ceil((avgReadQuality+maxReadQuality)/2); 454 455 Varlet v=new Varlet(bestVar.chromosome, ((plusReads1+plusReads2>0) && (minusReads1+minusReads2>0) ? Shared.PLUS : bestVar.strand), 456 bestVar.beginLoc, bestVar.endLoc, bestVar.matchStart, bestVar.matchStop, bestVar.varType, bestVar.ref, bestVar.call, 457 netQuality, netReadQuality, maxMapScore, minErrors, minExpectedErrors, pairedReads, bestVar.readID, bestLen, 458 minReadStart, maxReadStop, numReads, maxHeadDist, maxTailDist, maxEndDist, bestVar.pairNum()); 459 460 461 v.setMaxReadQuality(maxReadQuality); 462 v.setMaxVarQuality(maxVarQuality); 463 v.setAvgReadQuality((int)Math.ceil(avgReadQuality)); 464 v.setAvgVarQuality((int)Math.ceil(avgVarQuality)); 465 466 v.numSemiUniqueReads=numSemiUniqueReads; 467 v.numUniqueReads=numUniqueReads; 468 v.numPlusReads1=plusReads1; 469 v.numMinusReads1=minusReads1; 470 v.numPlusReads2=plusReads2; 471 v.numMinusReads2=minusReads2; 472 assert(plusReads1+minusReads1+plusReads2+minusReads2==numSemiUniqueReads); 473 474 assert(v.numReads>=v.numSemiUniqueReads); 475 assert(v.numSemiUniqueReads>=v.numUniqueReads); 476 477 //This assertion is only correct if stacking is done from raw, uncombined varlets. 478 assert(v.numSemiUniqueReads==vars.size()) : "\n"+vars.size()+", "+v.numReads+", "+v.numSemiUniqueReads+", "+v.numUniqueReads 479 +"\n"+v.toText(); 480 481 assert(v.numUniqueReads<=v.numReads && v.numUniqueReads>0); 482 assert(v.numUniqueReads==plus.size()+minus.size()) : "numUniqueReads="+numUniqueReads+ 483 ", v.numUniqueReads="+v.numUniqueReads+", v.numReads="+v.numReads 484 +", plus.size()="+plus.size()+", minus.size()="+minus.size()+"\n"+vars+"\n"; 485 486 return v; 487 } 488 489 490 private static class SVThread implements Runnable { 491 SVThread(String fname1_, String fname2_, final int chrom_, boolean filter_)492 public SVThread(String fname1_, String fname2_, final int chrom_, boolean filter_){ 493 fname1=fname1_; 494 fname2=fname2_; 495 filter=filter_; 496 chrom=chrom_; 497 } 498 499 @Override run()500 public void run() { 501 // addThread(1); 502 assert(activeThreads>0); 503 processFile(fname1, fname2); 504 addThread(-1); 505 } 506 processFile(final String inName, final String outName)507 private final void processFile(final String inName, final String outName){ 508 509 final long[] keys=GenerateVarlets2.keys(chrom); 510 final TextStreamWriter tsw=(inName==null ? null : new TextStreamWriter(outName, true, false, false)); 511 if(tsw!=null){ 512 tsw.start(); 513 tsw.println(Varlet.textHeader()); 514 } 515 516 for(final long key : keys){ 517 String blockname=GenerateVarlets2.fname(key, inName); 518 519 ArrayList<Varlet> initial=Varlet.fromTextFile(blockname); 520 521 for(Varlet v : initial){ 522 if(v.varType==Variation.NOREF){totalInNR++;} 523 totalIn++; 524 525 int dif=v.lengthDif(); 526 deltaLenIn+=dif; 527 } 528 529 if(verbose){System.err.println("Initial: \t"+initial.size());} 530 531 int merged=mergeAll2(initial, tsw); 532 533 initial=null; 534 if(verbose){System.err.println("Merged: \t"+merged);} 535 536 } 537 538 if(tsw!=null){ 539 tsw.poison(); 540 if(DELETE_INPUT){ 541 for(int i=0; i<10 && tsw.isAlive(); i++){ 542 try { 543 tsw.join(10000); 544 } catch (InterruptedException e) { 545 // TODO Auto-generated catch block 546 e.printStackTrace(); 547 } 548 } 549 if(tsw.isAlive()){ 550 System.err.println(tsw.getClass().getName()+" for "+outName+" refused to die."); 551 assert(false); 552 } 553 } 554 } 555 556 if(DELETE_INPUT){ 557 for(final long key : keys){ 558 String blockname=GenerateVarlets2.fname(key, inName); 559 // System.out.println("Deleting "+blockname); 560 new File(blockname).delete(); 561 } 562 } 563 } 564 565 566 567 568 mergeAll2(ArrayList<Varlet> vars, TextStreamWriter tsw)569 private final int mergeAll2(ArrayList<Varlet> vars, TextStreamWriter tsw){ 570 if(vars==null || vars.size()==0){return 0;} 571 572 Shared.sort(vars); 573 int out=0; 574 575 ArrayList<Varlet> temp=new ArrayList<Varlet>(64); 576 for(int i=0; i<vars.size(); i++){ 577 // while(vars.get(i).beginLoc<3746582){i++;} 578 // Varlet v=vars.get(i); 579 final Varlet v=vars.set(i, null); 580 // System.err.println("Grabbed "+v.beginLoc+" ~ "+v.call); 581 if(temp.isEmpty()){ 582 // System.err.println("Adding "+v.beginLoc+" ~ "+v.call); 583 temp.add(v); 584 }else{ 585 if(v.equals(temp.get(0))){ 586 temp.add(v); 587 // System.err.println("Adding "+v.beginLoc+" ~ "+v.call); 588 }else{ 589 // System.err.println("Merging "+temp.size()+" x "+v.beginLoc+" ~ "+v.call); 590 Varlet result=mergeEqualVarlets(temp); 591 592 processMergedVar(result, tsw); 593 out++; 594 595 temp.clear(); 596 temp.add(v); 597 } 598 } 599 } 600 601 if(!temp.isEmpty()){ 602 if(temp.size()>=MIN_READS_TO_KEEP){ 603 Varlet result=mergeEqualVarlets(temp); 604 out++; 605 processMergedVar(result, tsw); 606 } 607 temp.clear(); 608 } 609 610 return out; 611 } 612 613 processMergedVar(Varlet v, TextStreamWriter tsw)614 private final boolean processMergedVar(Varlet v, TextStreamWriter tsw){ 615 616 if(v==null){return false;} 617 if(v.numReads<MIN_READS_TO_KEEP){return false;} 618 if(v.numReads==MIN_READS_TO_KEEP){ 619 if(v.maxVarQuality()<MIN_QUALITY_AT_MIN_READS || 620 v.errors<=MAX_ERRORS_AT_MIN_READS || 621 v.expectedErrors<=MAX_EXPECTED_ERRORS_AT_MIN_READS || 622 (v.paired<1 && REQUIRE_PAIRED_AT_MIN_READS)){ 623 return false; 624 } 625 } 626 627 boolean keep; 628 629 if(filter){ 630 keep=filterLight(v); 631 }else{ 632 keep=true; 633 totalKept++; 634 scoreKept+=v.score(); 635 } 636 637 if(keep){ 638 StringBuilder sb=v.toText(); 639 sb.append('\n'); 640 tsw.print(sb); 641 } 642 return keep; 643 } 644 645 filterLight(Varlet v)646 private final boolean filterLight(Varlet v){ 647 int dropped=0; 648 649 int dif=v.lengthDif(); 650 // deltaLenIn+=dif; 651 652 boolean passes=true; 653 if(v.varType==Variation.NOCALL){ 654 passes=false; 655 }else if(v.numSemiUniqueReads<2){ 656 passes=false; 657 }else if(v.endDist<6 || v.tailDist<10){ 658 passes=false; 659 }else if(v.maxVarQuality()<24){ 660 passes=false; 661 }else if(v.expectedErrors>2){ 662 passes=false; 663 } 664 665 if(passes && STRICT){ 666 passes=passesFilterLight(v); 667 } 668 669 if(passes){ 670 if(v.varType==Variation.NOREF){totalKeptNR++;} 671 else if(v.varType==Variation.SNP){snpKept++;} 672 else if(v.varType==Variation.DEL){ 673 delKept++; 674 // delLenKept-=v.lengthRef(); 675 delLenKept+=dif; 676 } 677 else if(v.varType==Variation.INS){ 678 insKept++; 679 // insLenKept+=v.lengthVar(); 680 insLenKept+=dif; 681 } 682 else if(v.varType==Variation.DELINS){ 683 subKept++; 684 // subLenKept+=(v.lengthRef()-v.lengthVar()); 685 subLenKept+=dif; 686 } 687 totalKept++; 688 scoreKept+=v.score(); 689 deltaLenKept+=dif; 690 }else{ 691 if(v.varType==Variation.NOREF){totalDroppedNR++;} 692 dropped++; 693 scoreDropped+=v.score(); 694 } 695 696 totalDropped+=dropped; 697 return passes; 698 } 699 passesFilterLight(Varlet v)700 private static boolean passesFilterLight(Varlet v){ 701 if(v.endDist<4){return false;} 702 if(v.tailDist<10){return false;} 703 704 //NOTE! Last thing I did was make this more strict by adding 1 to all the num reads/unique reads required. 705 if(v.minStrandReads()>=2){ 706 707 if(v.errors>2){return false;} 708 if(v.expectedErrors>1.4f){return false;} 709 // if(v.expectedErrors-v.errors>3f){return false;} 710 if(v.maxReadQuality()<17){return false;} 711 if(v.avgReadQuality()<13){return false;} 712 if(v.maxVarQuality()<26){return false;} 713 if(v.avgVarQuality()<17){return false;} 714 if(v.numReads<3){return false;} 715 if(v.numSemiUniqueReads<3){return false;} 716 if(v.numUniqueReads<2){return false;} 717 // if(v.paired<3){return false;} 718 if(v.score()<8200){return false;} 719 720 }else if(v.minStrandReads()>=1){ 721 if(v.endDist<7){return false;} 722 if(v.tailDist<12){return false;} 723 724 if(v.errors>2){return false;} 725 if(v.expectedErrors>1.1f){return false;} 726 // if(v.expectedErrors-v.errors>3f){return false;} 727 if(v.maxReadQuality()<18){return false;} 728 if(v.avgReadQuality()<14){return false;} 729 if(v.maxVarQuality()<28){return false;} 730 if(v.avgVarQuality()<18){return false;} 731 if(v.numReads<4){return false;} 732 if(v.numSemiUniqueReads<3){return false;} 733 if(v.numUniqueReads<2){return false;} 734 // if(v.paired<3){return false;} 735 if(v.score()<8020){return false;} 736 }else{ 737 if(v.endDist<8){return false;} 738 if(v.tailDist<14){return false;} 739 740 if(v.errors>0){return false;} 741 if(v.expectedErrors>0.5f){return false;} 742 // if(v.expectedErrors-v.errors>2f){return false;} 743 if(v.maxReadQuality()<21){return false;} 744 if(v.avgReadQuality()<17){return false;} 745 if(v.maxVarQuality()<30){return false;} 746 if(v.avgVarQuality()<21){return false;} 747 if(v.numReads<6){return false;} 748 if(v.numSemiUniqueReads<5){return false;} 749 if(v.numUniqueReads<3){return false;} 750 // if(v.paired<5){return false;} 751 if(v.score()<7670){return false;} 752 } 753 return true; 754 } 755 756 private long deltaLenKept=0; 757 private long snpKept=0; 758 private long delKept=0; 759 private long insKept=0; 760 private long subKept=0; 761 private long delLenKept=0; 762 private long insLenKept=0; 763 private long subLenKept=0; 764 765 private long deltaLenIn=0; 766 private long totalIn=0; 767 private long totalInNR=0; 768 769 private long totalKept=0; 770 private long totalKeptNR=0; 771 private long totalDropped=0; 772 private long totalDroppedNR=0; 773 private long scoreKept=0; 774 private long scoreDropped=0; 775 776 private final String fname1; 777 private final String fname2; 778 private final boolean filter; 779 private final int chrom; 780 } 781 addThread(int x)782 private static int addThread(int x){ 783 synchronized(THREADLOCK){ 784 while(x>0 && activeThreads>=THREADS){ 785 try { 786 THREADLOCK.wait(200); 787 } catch (InterruptedException e) { 788 // TODO Auto-generated catch block 789 e.printStackTrace(); 790 } 791 } 792 activeThreads+=x; 793 return activeThreads; 794 } 795 } 796 797 798 public static long deltaLenKept_global=0; 799 public static long deltaLenIn_global=0; 800 801 public static long snpKept_global=0; 802 public static long delKept_global=0; 803 public static long insKept_global=0; 804 public static long subKept_global=0; 805 public static long delLenKept_global=0; 806 public static long insLenKept_global=0; 807 public static long subLenKept_global=0; 808 809 public static long totalIn_global=0; 810 public static long totalInNR_global=0; 811 public static long totalKept_global=0; 812 public static long totalDropped_global=0; 813 public static long totalKeptNR_global=0; 814 public static long totalDroppedNR_global=0; 815 public static long scoreKept_global=0; 816 public static long scoreDropped_global=0; 817 818 private static int activeThreads=0; 819 820 private static final String THREADLOCK=new String("THREADLOCK"); 821 private static int THREADS=7; 822 private static boolean DELETE_INPUT=false; 823 public static int MIN_READS_TO_KEEP=1; 824 public static final int MIN_QUALITY_AT_MIN_READS=14; 825 public static final int MAX_ERRORS_AT_MIN_READS=2; 826 public static final int MAX_EXPECTED_ERRORS_AT_MIN_READS=4; 827 public static final boolean REQUIRE_PAIRED_AT_MIN_READS=false; 828 public static boolean STRICT=false; 829 public static boolean VSTRICT=false; 830 public static boolean USTRICT=false; 831 832 public static final boolean verbose=false; 833 } 834