1 package pacbio; 2 3 import java.util.ArrayList; 4 import java.util.Locale; 5 6 import aligner.MultiStateAligner9PacBioAdapter; 7 import aligner.MultiStateAligner9PacBioAdapter2; 8 import dna.AminoAcid; 9 import dna.Data; 10 import fileIO.FileFormat; 11 import fileIO.ReadWrite; 12 import shared.KillSwitch; 13 import shared.Parse; 14 import shared.Parser; 15 import shared.PreParser; 16 import shared.ReadStats; 17 import shared.Shared; 18 import shared.Timer; 19 import shared.Tools; 20 import stream.ConcurrentReadInputStream; 21 import stream.ConcurrentReadOutputStream; 22 import stream.FastaReadInputStream; 23 import stream.Read; 24 import structures.ListNum; 25 26 /** 27 * Increased sensitivity to nearby adapters. 28 * @author Brian Bushnell 29 * @date Nov 5, 2012 30 * 31 */ 32 public class RemoveAdapters2 { 33 main(String[] args)34 public static void main(String[] args){ 35 {//Preparse block for help, config files, and outstream 36 PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false); 37 args=pp.args; 38 //outstream=pp.outstream; 39 } 40 41 boolean verbose=false; 42 43 String in1=null; 44 String in2=null; 45 long maxReads=-1; 46 47 String outname1=null; 48 String outname2=null; 49 50 String query=pacbioAdapter; 51 Shared.capBufferLen(20); 52 53 boolean splitReads=true; 54 55 for(int i=0; i<args.length; i++){ 56 final String arg=args[i]; 57 final String[] split=arg.split("="); 58 String a=split[0].toLowerCase(); 59 String b=split.length>1 ? split[1] : null; 60 61 if(Parser.parseCommonStatic(arg, a, b)){ 62 //do nothing 63 }else if(Parser.parseZip(arg, a, b)){ 64 //do nothing 65 }else if(Parser.parseQuality(arg, a, b)){ 66 //do nothing 67 }else if(Parser.parseFasta(arg, a, b)){ 68 //do nothing 69 }else if(a.equals("path") || a.equals("root") || a.equals("tempdir")){ 70 Data.setPath(b); 71 }else if(a.equals("usealtmsa")){ 72 USE_ALT_MSA=Parse.parseBoolean(b); 73 }else if(a.equals("fasta") || a.equals("in") || a.equals("input") || a.equals("in1") || a.equals("input1")){ 74 in1=b; 75 if(b.indexOf('#')>-1){ 76 in1=b.replace("#", "1"); 77 in2=b.replace("#", "2"); 78 } 79 }else if(a.equals("in2") || a.equals("input2")){ 80 in2=b; 81 }else if(a.equals("query") || a.equals("adapter")){ 82 query=b; 83 }else if(a.equals("split")){ 84 splitReads=Parse.parseBoolean(b); 85 }else if(a.equals("plusonly")){ 86 boolean x=Parse.parseBoolean(b); 87 if(x){TRY_PLUS=true; TRY_MINUS=false;} 88 }else if(a.equals("minusonly")){ 89 boolean x=Parse.parseBoolean(b); 90 if(x){TRY_PLUS=false; TRY_MINUS=true;} 91 }else if(a.startsWith("mincontig")){ 92 minContig=Integer.parseInt(b); 93 }else if(a.equals("append") || a.equals("app")){ 94 append=ReadStats.append=Parse.parseBoolean(b); 95 }else if(a.equals("overwrite") || a.equals("ow")){ 96 overwrite=Parse.parseBoolean(b); 97 System.out.println("Set overwrite to "+overwrite); 98 }else if(a.equals("threads") || a.equals("t")){ 99 if(b.equalsIgnoreCase("auto")){THREADS=Shared.LOGICAL_PROCESSORS;} 100 else{THREADS=Integer.parseInt(b);} 101 System.out.println("Set threads to "+THREADS); 102 }else if(a.equals("reads") || a.equals("maxreads")){ 103 maxReads=Parse.parseKMG(b); 104 }else if(a.startsWith("outname") || a.startsWith("outfile") || a.equals("out")){ 105 if(b==null || b.equalsIgnoreCase("null") || b.equalsIgnoreCase("none") || split.length==1){ 106 System.out.println("No output file."); 107 outname1=null; 108 OUTPUT_READS=false; 109 }else{ 110 OUTPUT_READS=true; 111 if(b.indexOf('#')>-1){ 112 outname1=b.replace('#', '1'); 113 outname2=b.replace('#', '2'); 114 }else{ 115 outname1=b; 116 } 117 } 118 }else if(a.equals("minratio")){ 119 MINIMUM_ALIGNMENT_SCORE_RATIO=Float.parseFloat(b); 120 System.out.println("Set MINIMUM_ALIGNMENT_SCORE_RATIO to "+MINIMUM_ALIGNMENT_SCORE_RATIO); 121 }else if(a.equals("suspectratio")){ 122 SUSPECT_RATIO=Float.parseFloat(b); 123 }else if(a.startsWith("verbose")){ 124 verbose=Parse.parseBoolean(b); 125 }else{ 126 throw new RuntimeException("Unknown parameter: "+args[i]); 127 } 128 } 129 130 {//Process parser fields 131 Parser.processQuality(); 132 } 133 134 assert(FastaReadInputStream.settingsOK()); 135 if(in1==null){throw new RuntimeException("Please specify input file.");} 136 137 138 final ConcurrentReadInputStream cris; 139 { 140 FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true); 141 FileFormat ff2=FileFormat.testInput(in2, FileFormat.FASTQ, null, true, true); 142 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2); 143 // if(verbose){System.err.println("Started cris");} 144 // cris.start(); //4567 145 // th.start(); 146 } 147 boolean paired=cris.paired(); 148 if(verbose){System.err.println("Paired: "+paired);} 149 150 ConcurrentReadOutputStream ros=null; 151 if(OUTPUT_READS){ 152 final int buff=(!ordered ? THREADS : Tools.max(24, 2*THREADS)); 153 154 FileFormat ff1=FileFormat.testOutput(outname1, FileFormat.FASTQ, null, true, overwrite, append, ordered); 155 FileFormat ff2=FileFormat.testOutput(outname2, FileFormat.FASTQ, null, true, overwrite, append, ordered); 156 ros=ConcurrentReadOutputStream.getStream(ff1, ff2, buff, null, true); 157 } 158 process(cris, ros, query, splitReads); 159 } 160 process(ConcurrentReadInputStream cris, ConcurrentReadOutputStream ros, String query, boolean split)161 public static void process(ConcurrentReadInputStream cris, ConcurrentReadOutputStream ros, String query, boolean split){ 162 163 Timer t=new Timer(); 164 165 cris.start(); //4567 166 167 System.out.println("Started read stream."); 168 169 170 if(ros!=null){ 171 ros.start(); 172 System.out.println("Started output threads."); 173 } 174 ProcessThread[] pts=new ProcessThread[THREADS]; 175 for(int i=0; i<pts.length; i++){ 176 pts[i]=new ProcessThread(cris, ros, MINIMUM_ALIGNMENT_SCORE_RATIO, query, split); 177 pts[i].start(); 178 } 179 System.out.println("Started "+pts.length+" processing thread"+(pts.length==1 ? "" : "s")+"."); 180 181 for(int i=0; i<pts.length; i++){ 182 ProcessThread pt=pts[i]; 183 synchronized(pt){ 184 while(pt.getState()!=Thread.State.TERMINATED){ 185 try { 186 pt.join(); 187 } catch (InterruptedException e) { 188 // TODO Auto-generated catch block 189 e.printStackTrace(); 190 } 191 } 192 } 193 if(i==0){ 194 System.out.print("Detecting finished threads: 0"); 195 }else{ 196 System.out.print(", "+i); 197 } 198 } 199 System.out.println('\n'); 200 ReadWrite.closeStreams(cris, ros); 201 202 printStatistics(pts); 203 204 t.stop(); 205 System.out.println("Time: \t"+t); 206 207 } 208 printStatistics(ProcessThread[] pts)209 public static void printStatistics(ProcessThread[] pts){ 210 211 long plusAdaptersFound=0; 212 long minusAdaptersFound=0; 213 long goodReadsFound=0; 214 long badReadsFound=0; 215 216 long truepositive=0; 217 long truenegative=0; 218 long falsepositive=0; 219 long falsenegative=0; 220 long expected=0; 221 long unexpected=0; 222 long basesIn=0; 223 long basesOut=0; 224 long readsOut=0; 225 226 for(ProcessThread pt : pts){ 227 plusAdaptersFound+=pt.plusAdaptersFound; 228 minusAdaptersFound+=pt.minusAdaptersFound; 229 goodReadsFound+=pt.goodReadsFound; 230 badReadsFound+=pt.badReadsFound; 231 basesIn+=pt.basesIn; 232 basesOut+=pt.basesOut; 233 readsOut+=pt.readsOut; 234 235 truepositive+=pt.truepositive; 236 truenegative+=pt.truenegative; 237 falsepositive+=pt.falsepositive; 238 falsenegative+=pt.falsenegative; 239 expected+=pt.expected; 240 unexpected+=pt.unexpected; 241 } 242 243 long totalReads=goodReadsFound+badReadsFound; 244 long totalAdapters=plusAdaptersFound+minusAdaptersFound; 245 if(expected<1){expected=1;} 246 if(unexpected<1){unexpected=1;} 247 248 System.out.println("Reads In: \t"+totalReads+" \t("+basesIn+" bases, avg length "+(basesIn/totalReads)+")"); 249 System.out.println("Good reads: \t"+goodReadsFound); 250 System.out.println("Bad reads: \t"+badReadsFound+" \t("+totalAdapters+" adapters)"); 251 System.out.println("Plus adapters: \t"+plusAdaptersFound); 252 System.out.println("Minus adapters: \t"+minusAdaptersFound); 253 System.out.println("Adapters per megabase: \t"+String.format(Locale.ROOT, "%.3f",totalAdapters*1000000f/basesIn)); 254 if(readsOut>0){System.out.println("Reads Out: \t"+readsOut+" \t("+basesOut+" bases, avg length "+(basesOut/readsOut)+")");} 255 System.out.println(); 256 if(truepositive>0 || truenegative>0 || falsepositive>0 || falsenegative>0){ 257 System.out.println("Adapters Expected: \t"+expected); 258 System.out.println("True Positive: \t"+truepositive+" \t"+String.format(Locale.ROOT, "%.3f%%", truepositive*100f/expected)); 259 System.out.println("True Negative: \t"+truenegative+" \t"+String.format(Locale.ROOT, "%.3f%%", truenegative*100f/unexpected)); 260 System.out.println("False Positive: \t"+falsepositive+" \t"+String.format(Locale.ROOT, "%.3f%%", falsepositive*100f/unexpected)); 261 System.out.println("False Negative: \t"+falsenegative+" \t"+String.format(Locale.ROOT, "%.3f%%", falsenegative*100f/expected)); 262 } 263 264 } 265 266 private static class ProcessThread extends Thread{ 267 ProcessThread(ConcurrentReadInputStream cris_, ConcurrentReadOutputStream ros_, float minRatio_, String query_, boolean split_)268 public ProcessThread(ConcurrentReadInputStream cris_, 269 ConcurrentReadOutputStream ros_, float minRatio_, String query_, boolean split_) { 270 cris=cris_; 271 ros=ros_; 272 minRatio=minRatio_; 273 query1=query_.getBytes(); 274 query2=AminoAcid.reverseComplementBases(query1); 275 ALIGN_ROWS=query1.length+1; 276 ALIGN_COLUMNS=ALIGN_ROWS*3+20; 277 SPLIT=split_; 278 279 stride=(int)(query1.length*0.95f); 280 window=(int)(query1.length*2.5f+10); 281 assert(window<ALIGN_COLUMNS); 282 283 msa=new MultiStateAligner9PacBioAdapter(ALIGN_ROWS, ALIGN_COLUMNS); 284 msa2=USE_ALT_MSA ? new MultiStateAligner9PacBioAdapter2() : null; 285 286 maxSwScore=msa.maxQuality(query1.length); 287 minSwScore=(int)(maxSwScore*MINIMUM_ALIGNMENT_SCORE_RATIO); 288 minSwScoreSuspect=(int)(maxSwScore*Tools.min(MINIMUM_ALIGNMENT_SCORE_RATIO*SUSPECT_RATIO, MINIMUM_ALIGNMENT_SCORE_RATIO-((1-SUSPECT_RATIO)*.2f))); 289 maxImperfectSwScore=msa.maxImperfectScore(query1.length); 290 291 suspectMidpoint=(minSwScoreSuspect+minSwScore)/2; 292 } 293 294 @Override 295 public void run(){ 296 ListNum<Read> ln=cris.nextList(); 297 ArrayList<Read> readlist=ln.list; 298 299 while(!readlist.isEmpty()){ 300 301 //System.err.println("Got a list of size "+readlist.size()); 302 for(int i=0; i<readlist.size(); i++){ 303 Read r=readlist.get(i); 304 305 if(r.length()<minContig && (r.mate==null || r.mateLength()<minContig)){ 306 readlist.set(i, null); 307 }else{ 308 309 //System.out.println("Got read: "+r.toText()); 310 //System.out.println("Synthetic: "+r.synthetic()); 311 312 if(r.synthetic()){syntheticReads++;} 313 314 processRead(r); 315 if(r.mate!=null){processRead(r.mate);} 316 } 317 318 } 319 320 // System.err.println("outputStream = "+outputStream==null ? "null" : "real"); 321 if(ros!=null){ //Important to send all lists to output, even empty ones, to keep list IDs straight. 322 if(DONT_OUTPUT_BROKEN_READS){removeDiscarded(readlist);} 323 for(Read r : readlist){ 324 if(r!=null){ 325 r.obj=null;//Not sure what r.obj is here 326 assert(r.bases!=null); 327 if(r.sites!=null && r.sites.isEmpty()){r.sites=null;} 328 } 329 } 330 // System.err.println("Adding list of length "+readlist.size()); 331 332 ArrayList<Read> out=SPLIT ? split(readlist) : readlist; 333 for(Read r : out){ 334 if(r!=null){ 335 Read r2=r.mate; 336 basesOut+=r.length(); 337 readsOut++; 338 if(r2!=null){ 339 basesOut+=r2.length(); 340 readsOut++; 341 } 342 } 343 } 344 ros.add(out, ln.id); 345 } 346 347 cris.returnList(ln.id, readlist.isEmpty()); 348 349 //System.err.println("Waiting on a list..."); 350 ln=cris.nextList(); 351 readlist=ln.list; 352 } 353 354 //System.err.println("Returning a list... (final)"); 355 assert(readlist.isEmpty()); 356 cris.returnList(ln.id, readlist.isEmpty()); 357 } 358 359 /** 360 * @param readlist 361 * @return 362 */ 363 private static ArrayList<Read> split(ArrayList<Read> in) { 364 ArrayList<Read> out=new ArrayList<Read>(in.size()); 365 for(Read r : in){ 366 if(r!=null){ 367 // assert(r.mate==null); 368 if(!r.hasAdapter()){out.add(r);} 369 else{out.addAll(split(r));} 370 Read r2=r.mate; 371 if(r2!=null){ 372 if(!r2.hasAdapter()){out.add(r2);} 373 else{out.addAll(split(r2));} 374 } 375 } 376 } 377 return out; 378 } 379 380 /** 381 * @param r 382 * @return 383 */ 384 private static ArrayList<Read> split(Read r) { 385 ArrayList<Read> sections=new ArrayList<Read>(); 386 387 int lastX=-1; 388 for(int i=0; i<r.length(); i++){ 389 if(r.bases[i]=='X'){ 390 if(i-lastX>minContig){ 391 byte[] b=KillSwitch.copyOfRange(r.bases, lastX+1, i); 392 byte[] q=r.quality==null ? null : KillSwitch.copyOfRange(r.quality, lastX+1, i); 393 Read r2=new Read(b, q, r.id+"_"+(lastX+1), r.numericID); 394 sections.add(r2); 395 } 396 lastX=i; 397 } 398 } 399 int i=r.length(); 400 if(i-lastX>minContig){ 401 byte[] b=KillSwitch.copyOfRange(r.bases, lastX+1, i); 402 byte[] q=r.quality==null ? null : KillSwitch.copyOfRange(r.quality, lastX+1, i); 403 Read r2=new Read(b, q, r.id+"_"+(lastX+1), r.numericID); 404 sections.add(r2); 405 } 406 return sections; 407 } 408 409 /** 410 * @param r 411 */ 412 private int processRead(Read r) { 413 414 int begin=0; 415 while(begin<r.length() && r.bases[begin]=='N'){begin++;} //Skip reads made of 'N' 416 if(begin>=r.length()){return 0;} 417 418 basesIn+=r.length(); 419 420 final byte[] array=npad(r.bases, npad); 421 422 int lim=array.length-npad-stride; 423 424 int plusFound=0; 425 int minusFound=0; 426 427 int lastSuspect=-1; 428 int lastConfirmed=-1; 429 430 for(int i=begin; i<lim; i+=stride){ 431 int j=Tools.min(i+window, array.length-1); 432 if(j-i<window){ 433 lim=0; //Last loop cycle 434 // i=Tools.max(0, array.length-2*query1.length); 435 } 436 437 if(TRY_MINUS){ 438 int[] rvec=msa.fillAndScoreLimited(query2, array, i, j, minSwScoreSuspect); 439 if(rvec!=null && rvec[0]>=minSwScoreSuspect){ 440 int score=rvec[0]; 441 int start=rvec[1]; 442 int stop=rvec[2]; 443 assert(score>=minSwScoreSuspect); 444 if((i==0 || start>i) && (j==array.length-1 || stop<j)){ 445 boolean kill=(score>=minSwScore || 446 (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect<suspectDistance) || 447 (lastConfirmed>0 && start>=lastConfirmed && start-lastConfirmed<suspectDistance)); 448 449 if(!kill && USE_LOCALITY && array.length-stop>window){//Look ahead 450 rvec=msa.fillAndScoreLimited(query2, array, stop, stop+window, minSwScoreSuspect); 451 if(rvec!=null){ 452 if(score>=suspectMidpoint && rvec[0]>=minSwScoreSuspect && rvec[1]-stop<suspectDistance){kill=true;} 453 else if(score>=minSwScoreSuspect && rvec[0]>=minSwScore && rvec[1]-stop<suspectDistance){kill=true;} 454 } 455 } 456 457 if(!kill && USE_ALT_MSA){//Try alternate msa 458 rvec=msa2.fillAndScoreLimited(query2, array, Tools.max(0, start-4), Tools.min(stop+4, array.length-1), minSwScoreSuspect); 459 if(rvec!=null && rvec[0]>=(minSwScore)){kill=true;} 460 } 461 462 if(kill){ 463 // System.out.println("-:"+score+", "+minSwScore+", "+minSwScoreSuspect+"\t"+lastSuspect+", "+start+", "+stop); 464 minusFound++; 465 for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';} 466 if(USE_LOCALITY && score>=minSwScore){lastConfirmed=Tools.max(lastConfirmed, stop);} 467 } 468 } 469 // System.out.println("Set lastSuspect="+stop+" on score "+score); 470 if(USE_LOCALITY){lastSuspect=Tools.max(lastSuspect, stop);} 471 } 472 } 473 474 if(TRY_PLUS){ 475 int[] rvec=msa.fillAndScoreLimited(query1, array, i, j, minSwScoreSuspect); 476 if(rvec!=null && rvec[0]>=minSwScoreSuspect){ 477 int score=rvec[0]; 478 int start=rvec[1]; 479 int stop=rvec[2]; 480 if((i==0 || start>i) && (j==array.length-1 || stop<j)){ 481 boolean kill=(score>=minSwScore || 482 (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect<suspectDistance) || 483 (lastConfirmed>0 && start>=lastConfirmed && start-lastConfirmed<suspectDistance)); 484 485 if(!kill && USE_LOCALITY && array.length-stop>window){//Look ahead 486 rvec=msa.fillAndScoreLimited(query1, array, stop, stop+window, minSwScoreSuspect); 487 if(rvec!=null){ 488 if(score>=suspectMidpoint && rvec[0]>=minSwScoreSuspect && rvec[1]-stop<suspectDistance){kill=true;} 489 else if(score>=minSwScoreSuspect && rvec[0]>=minSwScore && rvec[1]-stop<suspectDistance){kill=true;} 490 } 491 } 492 493 if(!kill && USE_ALT_MSA){//Try alternate msa 494 rvec=msa2.fillAndScoreLimited(query1, array, Tools.max(0, start-4), Tools.min(stop+4, array.length-1), minSwScoreSuspect); 495 if(rvec!=null && rvec[0]>=(minSwScore)){kill=true;} 496 } 497 498 if(kill){ 499 // System.out.println("+:"+score+", "+minSwScore+", "+minSwScoreSuspect+"\t"+lastSuspect+", "+start+", "+stop); 500 plusFound++; 501 for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';} 502 if(USE_LOCALITY && score>=minSwScore){lastConfirmed=Tools.max(lastConfirmed, stop);} 503 } 504 } 505 // System.out.println("Set lastSuspect="+stop+" on score "+score); 506 if(USE_LOCALITY){lastSuspect=Tools.max(lastSuspect, stop);} 507 } 508 } 509 } 510 511 int found=plusFound+minusFound; 512 513 // if(r.synthetic()){ 514 // if(/*r.hasadapter() && */(r.numericID&3)==0){ 515 // if(plusFound>0){truepositive++;}else{falsenegative++;} 516 // if(plusFound>1){falsepositive+=(plusFound-1);} 517 // falsepositive+=minusFound; 518 // expected++; 519 // }else if(/*r.hasadapter() && */(r.numericID&3)==1){ 520 // if(minusFound>0){truepositive++;}else{falsenegative++;} 521 // if(minusFound>1){falsepositive+=(minusFound-1);} 522 // falsepositive+=plusFound; 523 // expected++; 524 // }else{ 525 // falsepositive=falsepositive+plusFound+minusFound; 526 // if(plusFound+minusFound==0){truenegative++;} 527 // unexpected++; 528 // } 529 // } 530 531 if(r.synthetic()){ 532 if(/*r.hasadapter() && */(r.numericID&3)==0){ 533 if(found>0){truepositive++;}else{falsenegative++;} 534 if(found>1){falsepositive+=(found-1);} 535 expected++; 536 }else if(/*r.hasadapter() && */(r.numericID&3)==1){ 537 if(found>0){truepositive++;}else{falsenegative++;} 538 if(found>1){falsepositive+=(found-1);} 539 expected++; 540 }else{ 541 falsepositive+=found; 542 if(found==0){truenegative++;} 543 unexpected++; 544 } 545 } 546 547 plusAdaptersFound+=plusFound; 548 minusAdaptersFound+=minusFound; 549 if(found>0){ 550 for(int i=npad, j=0; j<r.length(); i++, j++){r.bases[j]=array[i];} 551 if(DONT_OUTPUT_BROKEN_READS){r.setDiscarded(true);} 552 badReadsFound++; 553 }else{ 554 goodReadsFound++; 555 } 556 557 r.setHasAdapter(found>0); 558 559 return found; 560 561 } 562 563 private byte[] npad(final byte[] array, final int pad){ 564 final int len=array.length+2*pad; 565 if(padbuffer==null || padbuffer.length!=len){padbuffer=new byte[len];} 566 byte[] r=padbuffer; 567 for(int i=0; i<pad; i++){r[i]='N';} 568 for(int i=pad, j=0; j<array.length; i++, j++){r[i]=array[j];} 569 for(int i=array.length+pad, limit=Tools.min(r.length, len+50); i<limit; i++){r[i]='N';} 570 padbuffer=null; //Kills the buffer. Causes more memory allocation, but better cache/NUMA locality if threads move around. 571 return r; 572 } 573 574 private byte[] padbuffer=null; 575 private final byte[] query1, query2; 576 private final ConcurrentReadInputStream cris; 577 private final ConcurrentReadOutputStream ros; 578 private final float minRatio; 579 private final MultiStateAligner9PacBioAdapter msa; 580 private final MultiStateAligner9PacBioAdapter2 msa2; 581 private final int ALIGN_ROWS; 582 private final int ALIGN_COLUMNS; 583 private final int stride; 584 private final int window; 585 private final boolean SPLIT; 586 587 long plusAdaptersFound=0; 588 long minusAdaptersFound=0; 589 long goodReadsFound=0; 590 long badReadsFound=0; 591 long truepositive=0; 592 long truenegative=0; 593 long falsepositive=0; 594 long falsenegative=0; 595 long expected=0; 596 long unexpected=0; 597 long basesIn=0; 598 long basesOut=0; 599 long readsOut=0; 600 601 private final int maxSwScore; 602 private final int minSwScore; 603 private final int minSwScoreSuspect; 604 private final int suspectMidpoint; 605 private final int maxImperfectSwScore; 606 607 long syntheticReads=0; 608 609 } 610 611 private static int removeDiscarded(ArrayList<Read> list){ 612 int removed=0; 613 for(int i=0; i<list.size(); i++){ 614 Read r=list.get(i); 615 if(r.discarded()){ 616 if(r.mate==null || r.mate.discarded()){ 617 list.set(i, null); 618 removed++; 619 } 620 } 621 } 622 return removed; 623 } 624 625 public static boolean DONT_OUTPUT_BROKEN_READS; 626 /** Permission to overwrite existing files */ 627 private static boolean overwrite=false; 628 /** Permission to append to existing files */ 629 private static boolean append=false; 630 private static int THREADS=Shared.LOGICAL_PROCESSORS; 631 private static boolean OUTPUT_READS=false; 632 private static boolean ordered=false; 633 private static boolean PERFECTMODE=false; 634 private static float MINIMUM_ALIGNMENT_SCORE_RATIO=0.31f; //0.31f: At 250bp reads, approx 0.01% false-positive and 94% true-positive. 635 private static float SUSPECT_RATIO=0.85F; 636 public static boolean USE_LOCALITY=true; 637 public static boolean USE_ALT_MSA=true; 638 public static boolean TRY_PLUS=true; 639 public static boolean TRY_MINUS=true; 640 private static int npad=35; 641 public static int minContig=50; 642 public static int suspectDistance=100; 643 644 public static final String pacbioAdapter="ATCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAT"; 645 public static final String pacbioStandard_v1="TCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAGAAGGCTGGGCAGGCTATGCACCCTGGTCCAGGTCAAA" + 646 "AGCTGCGGAACCCGCTAGCGGCCATCTTGGCCACTAGGGGTCCCGCAGATTCATATTGTCGTCTAGCATGCACAATGCTGCAAACCCAGCTTGCAATGCCCACAGCA" + 647 "AGCGGCCAATCTTTACGCCACGTTGAATTGTTTATTACCTGTGACTGGCTATGGCTTGCAACGCCACTCGTAAAACTAGTACTTTGCGGTTAGGGGAAGTAGACAAA" + 648 "CCCATTACTCCACTTCCCGGAAGTTCAACTCATTCCAACACGAAATAAAAGTAAACTCAACACCCCAAGCAGGCTATGTGGGGGGGTGATAGGGGTGGATTCTATTT" + 649 "CCTATCCCATCCCCTAGGATCTCAATTAAGTTACTAGCGAGTTAAATGTCTGTAGCGATCCCGTCAGTCCTATCGCGCGCATCAAGACCTGGTTGGTTGAGCGTGCA" + 650 "GTAGATCATCGATAAGCTGCGAGTTAGGTCATCCCAGACCGCATCTGGCGCCTAAACGTTCAGTGGTAGCTAAGGCGTCACCTTCGACTGTCTAAAGGCAATATGTC" + 651 "GTCCTTAGCTCCAAGTCCCTAGCAAGCGTGTCGGGTCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGACCCGACACGCTTGCTAGGGACTTGGAGCT" + 652 "AAGGACGACATATTGCCTTTAGACAGTCGAAGGTGACGCCTTAGCTACCACTGAACGTTTAGGCGCCAGATGCGGTCTGGGATGACCTAACTCGCAGCTTATCGATG" + 653 "ATCTACTGCACGCTCAACCAACCAGGTCTTGATGCGCGCGATAGGACTGACGGGATCGCTACAGACATTTAACTCGCTAGTAACTTAATTGAGATCCTAGGGGATGG" + 654 "GATAGGAAATAGAATCCACCCCTATCACCCCCCCACATAGCCTGCTTGGGGTGTTGAGTTTACTTTTATTTCGTGTTGGAATGAGTTGAACTTCCGGGAAGTGGAGT" + 655 "AATGGGTTTGTCTACTTCCCCTAACCGCAAAGTACTAGTTTTACGAGTGGCGTTGCAAGCCATAGCCAGTCACAGGTAATAAACAATTCAACGTGGCGTAAAGATTG" + 656 "GCCGCTTGCTGTGGGCATTGCAAGCTGGGTTTGCAGCATTGTGCATGCTAGACGACAATATGAATCTGCGGGACCCCTAGTGGCCAAGATGGCCGCTAGCGGGTTCC" + 657 "GCAGCTTTTGACCTGGACCAGGGTGCATAGCCTGCCCAGCCTTCTCTCTCTCTTT"; 658 659 660 } 661