1 package jgi; 2 3 import java.io.File; 4 import java.io.PrintStream; 5 import java.util.ArrayList; 6 import java.util.LinkedHashMap; 7 import java.util.Locale; 8 9 import fileIO.ByteFile; 10 import fileIO.FileFormat; 11 import fileIO.ReadWrite; 12 import shared.Parse; 13 import shared.Parser; 14 import shared.PreParser; 15 import shared.ReadStats; 16 import shared.Shared; 17 import shared.Timer; 18 import shared.Tools; 19 import shared.TrimRead; 20 import stream.ConcurrentReadInputStream; 21 import stream.ConcurrentReadOutputStream; 22 import stream.DualCris; 23 import stream.FASTQ; 24 import stream.FastaReadInputStream; 25 import stream.Read; 26 import stream.SamLine; 27 import structures.ListNum; 28 29 /** 30 * @author Brian Bushnell 31 * @date Sep 4, 2013 32 * 33 */ 34 public final class SplitPairsAndSingles { 35 main(String[] args)36 public static void main(String[] args){ 37 SplitPairsAndSingles x=new SplitPairsAndSingles(args); 38 x.process(); 39 40 //Close the print stream if it was redirected 41 Shared.closeStream(outstream); 42 } 43 SplitPairsAndSingles(String[] args)44 public SplitPairsAndSingles(String[] args){ 45 46 {//Preparse block for help, config files, and outstream 47 PreParser pp=new PreParser(args, getClass(), false); 48 args=pp.args; 49 outstream=pp.outstream; 50 } 51 52 ReadWrite.ZIPLEVEL=2; 53 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; 54 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); 55 56 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ 57 ByteFile.FORCE_MODE_BF2=true; 58 } 59 60 Parser parser=new Parser(); 61 parser.trimq=trimq; 62 parser.minReadLength=minReadLength; 63 boolean setOut=false, setOuts=false; 64 boolean fixInterleaving_=false, repair_=false, allowIdenticalPairNames_=false; 65 66 for(int i=0; i<args.length; i++){ 67 68 final String arg=args[i]; 69 String[] split=arg.split("="); 70 String a=split[0].toLowerCase(); 71 String b=split.length>1 ? split[1] : null; 72 73 if(Parser.parseCommonStatic(arg, a, b)){ 74 //do nothing 75 }else if(Parser.parseZip(arg, a, b)){ 76 //do nothing 77 }else if(Parser.parseQuality(arg, a, b)){ 78 //do nothing 79 }else if(Parser.parseFasta(arg, a, b)){ 80 //do nothing 81 }else if(parser.parseInterleaved(arg, a, b)){ 82 //do nothing 83 }else if(parser.parseTrim(arg, a, b)){ 84 //do nothing 85 }else if(a.equals("in") || a.equals("in1")){ 86 in1=b; 87 }else if(a.equals("in2")){ 88 in2=b; 89 }else if(a.equals("out") || a.equals("out1") || a.equals("outp") || a.equals("outp1") || a.equals("outpair") || a.equals("outpair1")){ 90 out1=b; 91 setOut=true; 92 }else if(a.equals("out2") || a.equals("outp2") || a.equals("outpair2")){ 93 out2=b; 94 }else if(a.equals("outs") || a.equals("outsingle") || a.equals("outb") || a.equals("outbad")){ 95 outsingle=b; 96 setOut=true; 97 }else if(a.equals("append") || a.equals("app")){ 98 append=ReadStats.append=Parse.parseBoolean(b); 99 }else if(a.equals("overwrite") || a.equals("ow")){ 100 overwrite=Parse.parseBoolean(b); 101 }else if(a.equals("showspeed") || a.equals("ss")){ 102 showSpeed=Parse.parseBoolean(b); 103 }else if(a.equals("verbose")){ 104 verbose=Parse.parseBoolean(b); 105 }else if(a.equals("addslash")){ 106 addslash=Parse.parseBoolean(b); 107 }else if(a.equals("addcolon")){ 108 addcolon=Parse.parseBoolean(b); 109 }else if(a.equals("reads") || a.startsWith("maxreads")){ 110 maxReads=Parse.parseKMG(b); 111 }else if(a.equals("fixinterleaving") || a.equals("fi") || a.equals("fint") || a.equals("fixint")){ 112 fixInterleaving_=Parse.parseBoolean(b); 113 if(fixInterleaving_){repair_=false;} 114 }else if(a.equals("allowidenticalnames") || a.equals("ain")){ 115 allowIdenticalPairNames_=Parse.parseBoolean(b); 116 }else if(a.equals("repair") || a.equals("rp")){ 117 repair_=Parse.parseBoolean(b); 118 if(repair_){fixInterleaving_=false;} 119 }else if(i==0 && in1==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){ 120 in1=args[i]; 121 }else if(i==1 && out1==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){ 122 out1=args[i]; 123 setOut=true; 124 }else if(i==2 && outsingle==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){ 125 outsingle=args[i]; 126 setOuts=true; 127 }else{ 128 throw new RuntimeException("Unknown parameter "+args[i]); 129 } 130 } 131 132 {//Process parser fields 133 Parser.processQuality(); 134 135 qtrimLeft=parser.qtrimLeft; 136 qtrimRight=parser.qtrimRight; 137 trimq=parser.trimq; 138 trimE=parser.trimE(); 139 minReadLength=parser.minReadLength; 140 } 141 142 allowIdenticalPairNames=allowIdenticalPairNames_; 143 fixInterleaving=fixInterleaving_; 144 repair=repair_; 145 assert(!repair || ! fixInterleaving) : "ERROR: Choose 'fixInterleaving' or 'repair', but not both."; 146 147 assert(FastaReadInputStream.settingsOK()); 148 149 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} 150 151 if(in1!=null && in1.contains("#") && !new File(in1).exists()){ 152 int pound=in1.lastIndexOf('#'); 153 String a=in1.substring(0, pound); 154 String b=in1.substring(pound+1); 155 in1=a+1+b; 156 in2=a+2+b; 157 } 158 if(in2!=null && (in2.contains("=") || in2.equalsIgnoreCase("null"))){in2=null;} 159 160 if(fixInterleaving){ 161 if(in2!=null){ 162 System.err.println("ERROR: 'FixInterleaving' mode only works with a single interleaved input file, not paired input files."); 163 System.err.println("Aborting."); 164 System.exit(1); 165 } 166 parser.setInterleaved=true; 167 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; 168 outstream.println("Paired input disabled; running in FixInterleaving mode"); 169 } 170 171 if(repair){ 172 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; 173 outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED); 174 }else{ 175 if(!parser.setInterleaved && in2==null){ 176 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=true; 177 outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED); 178 } 179 if(in2!=null){ 180 if(FASTQ.FORCE_INTERLEAVED){System.err.println("Reset INTERLEAVED to false because paired input files were specified.");} 181 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; 182 } 183 } 184 185 if(out1!=null && out1.contains("#")){ 186 int pound=out1.lastIndexOf('#'); 187 String a=out1.substring(0, pound); 188 String b=out1.substring(pound+1); 189 out1=a+1+b; 190 out2=a+2+b; 191 } 192 193 if(!setOut){ 194 System.err.println("No output stream specified. To write to stdout, please specify 'out=stdout.fq' or similar."); 195 // out1="stdout.fq"; 196 outstream=System.err; 197 out2=null; 198 }else if("stdout".equalsIgnoreCase(out1) || "standarddout".equalsIgnoreCase(out1)){ 199 out1="stdout.fq"; 200 outstream=System.err; 201 out2=null; 202 } 203 if(out1!=null && !Tools.canWrite(out1, overwrite)){throw new RuntimeException("Output file "+out1+" already exists, and overwrite="+overwrite);} 204 205 assert(!in1.equalsIgnoreCase(out1)); 206 assert(!in1.equalsIgnoreCase(outsingle)); 207 assert(!in1.equalsIgnoreCase(in2)); 208 assert(out1==null || !out1.equalsIgnoreCase(out2)) : "out2 may not be defined without out1, and out1 may not equal out2."; 209 assert(out1==null || !out1.equalsIgnoreCase(outsingle)); 210 211 pairMap=(repair ? new LinkedHashMap<String, Read>() : null); 212 213 //Close the print stream if it was redirected 214 Shared.closeStream(outstream); 215 } 216 process()217 public void process(){ 218 219 Timer t=new Timer(); 220 221 process2(); 222 223 t.stop(); 224 225 outstream.println("\nInput: \t"+readsIn+" reads \t\t"+basesIn+" bases."); 226 227 if(qtrimLeft || qtrimRight){ 228 outstream.println("Trimmed: \t"+readsTrimmed+" reads ("+String.format(Locale.ROOT, "%.2f",readsTrimmed*100.0/readsIn)+"%) \t"+ 229 basesTrimmed+" bases ("+String.format(Locale.ROOT, "%.2f",basesTrimmed*100.0/basesIn)+"%)"); 230 } 231 outstream.println("Result: \t"+readsOut+" reads ("+String.format(Locale.ROOT, "%.2f",readsOut*100.0/readsIn)+"%) \t"+ 232 basesOut+" bases ("+String.format(Locale.ROOT, "%.2f",basesOut*100.0/basesIn)+"%)"); 233 outstream.println("Pairs: \t"+pairsOut+" reads ("+String.format(Locale.ROOT, "%.2f",pairsOut*100.0/readsIn)+"%) \t"+ 234 pairBasesOut+" bases ("+String.format(Locale.ROOT, "%.2f",pairBasesOut*100.0/basesIn)+"%)"); 235 outstream.println("Singletons: \t"+singlesOut+" reads ("+String.format(Locale.ROOT, "%.2f",singlesOut*100.0/readsIn)+"%) \t"+ 236 singleBasesOut+" bases ("+String.format(Locale.ROOT, "%.2f",singleBasesOut*100.0/basesIn)+"%)"); 237 238 if(showSpeed){ 239 outstream.println(); 240 outstream.println(Tools.timeReadsBasesProcessed(t, readsIn, basesIn, 8)); 241 } 242 243 if(errorState){ 244 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); 245 } 246 } 247 process2()248 private void process2(){ 249 final ConcurrentReadInputStream cris; 250 if(in2!=null && repair){ 251 FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true); 252 FileFormat ff2=FileFormat.testInput(in2, FileFormat.FASTQ, null, true, true); 253 cris=DualCris.getReadInputStream(maxReads, true, ff1, ff2, null, null); 254 }else{ 255 FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true); 256 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, null, null, null); 257 } 258 if(verbose){System.err.println("Started cris");} 259 cris.start(); 260 boolean paired=cris.paired(); 261 if(verbose){System.err.println("Paired: "+paired);} 262 263 final ConcurrentReadOutputStream ros, rosb; 264 final int buff=4; 265 if(out1!=null){ 266 FileFormat ff1=FileFormat.testOutput(out1, FileFormat.FASTQ, null, true, overwrite, append, false); 267 FileFormat ff2=FileFormat.testOutput(out2, FileFormat.FASTQ, null, true, overwrite, append, false); 268 ros=ConcurrentReadOutputStream.getStream(ff1, ff2, buff, null, true); 269 ros.start(); 270 }else{ros=null;} 271 if(outsingle!=null){ 272 FileFormat ff1=FileFormat.testOutput(outsingle, FileFormat.FASTQ, null, true, overwrite, append, false); 273 rosb=ConcurrentReadOutputStream.getStream(ff1, null, buff, null, true); 274 rosb.start(); 275 }else{rosb=null;} 276 if(ros!=null || rosb!=null){ 277 outstream.println("Started output stream."); 278 } 279 280 // assert(false) : out1+", "+out2+", "+outsingle; 281 if(fixInterleaving){ 282 process3_fixInterleaving(cris, ros, rosb); 283 }else if(repair){ 284 if(cris.getClass()==DualCris.class){ 285 process3_repair((DualCris)cris, ros, rosb); 286 }else{ 287 process3_repair(cris, ros, rosb); 288 } 289 }else{ 290 process3(cris, ros, rosb); 291 } 292 293 294 ReadWrite.closeStreams(cris, ros, rosb); 295 } 296 // 297 // private void process3_old(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){ 298 // 299 // ListNum<Read> ln=cris.nextList(); 300 // ArrayList<Read> reads0=(ln!=null ? ln.list : null); 301 // ArrayList<Read> single=(rosb==null ? null : new ArrayList<Read>(Shared.bufferLen())); 302 // 303 // while(reads0!=null && reads0.size()>0){ 304 // ArrayList<Read> reads=(ArrayList<Read>) reads0.clone(); 305 // int removed=0; 306 // for(int i=0; i<reads.size(); i++){ 307 // Read r1=reads.get(i); 308 // Read r2=r1.mate; 309 // 310 // readsIn++; 311 // basesIn+=r1.length(); 312 // if(r2!=null){ 313 // readsIn++; 314 // basesIn+=r2.length(); 315 // } 316 // 317 // { 318 // if(trimLeft || trimRight){ 319 // if(r1!=null){ 320 // int x=TrimRead.trimFast(r1, trimLeft, trimRight, trimq, 1); 321 // basesTrimmed+=x; 322 // readsTrimmed+=(x>0 ? 1 : 0); 323 // } 324 // if(r2!=null){ 325 // int x=TrimRead.trimFast(r2, trimLeft, trimRight, trimq, 1); 326 // basesTrimmed+=x; 327 // readsTrimmed+=(x>0 ? 1 : 0); 328 // } 329 // } 330 // 331 // final int rlen1=(r1==null ? -1 : r1.length()); 332 // final int rlen2=(r2==null ? -1 : r2.length()); 333 // 334 // if(verbose){System.err.println("rlen1="+rlen1+", rlen2="+rlen2);} 335 // 336 // if(rlen1<minReadLength || rlen2<minReadLength){ 337 // reads.set(i, null); 338 // removed++; 339 // r1.mate=null; 340 // if(r2!=null){ 341 // r2.mate=null; 342 // } 343 // 344 // if(rlen1>=minReadLength){ 345 // single.add(r1); 346 // singlesOut++; 347 // singleBasesOut+=rlen1; 348 // } 349 // if(rlen2>=minReadLength){ 350 // single.add(r2); 351 // singlesOut++; 352 // singleBasesOut+=rlen2; 353 // } 354 // }else{ 355 // if(r1!=null){ 356 // pairsOut++; 357 // pairBasesOut+=rlen2; 358 // } 359 // if(r2!=null){ 360 // pairsOut++; 361 // pairBasesOut+=rlen2; 362 // } 363 // } 364 // } 365 // } 366 // 367 // if(rosb!=null){ 368 // if(verbose){System.err.println("Adding "+single.size()+" to single out.");} 369 // rosb.add(new ArrayList<Read>(single), ln.id); 370 // single.clear(); 371 // } 372 // 373 // if(ros!=null){ 374 // if(removed>0){Tools.condenseStrict(reads);} 375 // ArrayList<Read> x=new ArrayList<Read>(reads.size()); 376 // x.addAll(reads); 377 // if(verbose){System.err.println("Adding "+x.size()+" to pair out.");} 378 // ros.add(x, ln.id); 379 // } 380 // 381 // cris.returnList(ln); 382 // ln=cris.nextList(); 383 // reads0=(ln!=null ? ln.list : null); 384 // } 385 // cris.returnList(ln); 386 // 387 // readsOut+=singlesOut+pairsOut; 388 // basesOut+=singleBasesOut+pairBasesOut; 389 // } 390 process3(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)391 private void process3(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){ 392 393 ListNum<Read> ln=cris.nextList(); 394 ArrayList<Read> reads=ln.list; 395 396 final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen())); 397 final ArrayList<Read> singles=(rosb==null ? null : new ArrayList<Read>(Shared.bufferLen())); 398 399 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 400 for(int i=0; i<reads.size(); i++){ 401 Read r1=reads.get(i); 402 Read r2=r1.mate; 403 processPair(r1, r2, pairs, singles); 404 } 405 406 cris.returnList(ln); 407 ln=cris.nextList(); 408 reads=(ln!=null ? ln.list : null); 409 410 if(rosb!=null){ 411 if(verbose){System.err.println("Adding "+singles.size()+" to single out.");} 412 rosb.add(new ArrayList<Read>(singles), ln.id); 413 singles.clear(); 414 } 415 416 if(ros!=null){ 417 if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");} 418 ros.add(new ArrayList<Read>(pairs), ln.id); 419 pairs.clear(); 420 } 421 } 422 cris.returnList(ln); 423 424 readsOut+=singlesOut+pairsOut; 425 basesOut+=singleBasesOut+pairBasesOut; 426 } 427 process3_fixInterleaving(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)428 private void process3_fixInterleaving(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){ 429 430 ListNum<Read> ln=cris.nextList(); 431 ArrayList<Read> reads=ln.list; 432 433 final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen())); 434 final ArrayList<Read> singles=(rosb==null ? null : new ArrayList<Read>(Shared.bufferLen())); 435 436 Read current=null, prev=null; 437 438 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 439 for(int i=0; i<reads.size(); i++){ 440 441 current=reads.get(i); 442 // if(verbose){System.err.println("Fetched "+current);} 443 444 if(prev!=null){ 445 boolean b=FASTQ.testPairNames(prev, current, allowIdenticalPairNames); 446 if(b){ 447 if(verbose){System.err.println("A");} 448 processPair(prev, current, pairs, singles); 449 prev=null; 450 current=null; 451 }else{ 452 if(verbose){System.err.println("B");} 453 processPair(prev, null, null, singles); 454 prev=null; 455 } 456 } 457 prev=current; 458 current=null; 459 } 460 461 // if(verbose){System.err.println("X\n"+current+"\n"+prev+"\n");} 462 463 cris.returnList(ln); 464 ln=cris.nextList(); 465 reads=(ln!=null ? ln.list : null); 466 467 if((ln==null || reads==null || reads.isEmpty()) && prev!=null){ //Process last read 468 boolean b=FASTQ.testPairNames(prev, current, allowIdenticalPairNames); 469 if(b){ 470 if(verbose){System.err.println("C");} 471 processPair(prev, current, pairs, singles); 472 prev=null; 473 current=null; 474 }else{ 475 if(verbose){System.err.println("D");} 476 processPair(prev, null, null, singles); 477 prev=null; 478 } 479 } 480 481 if(rosb!=null){ 482 if(verbose){System.err.println("Adding "+singles.size()+" to single out.");} 483 rosb.add(new ArrayList<Read>(singles), ln.id); 484 singles.clear(); 485 } 486 487 if(ros!=null){ 488 if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");} 489 ros.add(new ArrayList<Read>(pairs), ln.id); 490 pairs.clear(); 491 } 492 } 493 cris.returnList(ln); 494 495 readsOut+=singlesOut+pairsOut; 496 basesOut+=singleBasesOut+pairBasesOut; 497 } 498 process3_repair(final DualCris cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)499 private void process3_repair(final DualCris cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){ 500 501 ListNum<Read> ln=cris.nextList(); 502 ArrayList<Read> reads=ln.list; 503 504 final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen())); 505 506 boolean foundR1=false, foundR2=false; 507 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 508 for(Read r1 : reads){ 509 Read r2=r1.mate; 510 511 if(r1.pairnum()==0){foundR1=true;} 512 else{foundR2=true;} 513 if(r2!=null){ 514 if(r2.pairnum()==0){foundR1=true;} 515 else{foundR2=true;} 516 } 517 518 { 519 Read pair=repair(r1); 520 if(pair!=null && pairs!=null){pairs.add(pair);} 521 } 522 { 523 Read pair=repair(r2); 524 if(pair!=null && pairs!=null){pairs.add(pair);} 525 } 526 } 527 528 // if(verbose){System.err.println("X\n"+current+"\n"+prev+"\n");} 529 530 cris.returnList(ln.id, foundR1, foundR2); 531 foundR1=foundR2=false; 532 ln=cris.nextList(); 533 reads=(ln!=null ? ln.list : null); 534 535 if(ros!=null){ 536 if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");} 537 ros.add(new ArrayList<Read>(pairs), ln.id); 538 pairs.clear(); 539 } 540 } 541 cris.returnList(ln.id, foundR1, foundR2); 542 543 if(!pairMap.isEmpty()){ 544 final ArrayList<Read> singles=new ArrayList<Read>(pairMap.size()); 545 for(String key : pairMap.keySet()){ 546 Read r=pairMap.get(key); 547 singles.add(r); 548 singlesOut++; 549 singleBasesOut+=r.length(); 550 } 551 pairMap.clear(); 552 if(verbose){System.err.println("Adding "+singles.size()+" to single out.");} 553 if(rosb!=null){rosb.add(singles, 0);} 554 } 555 556 readsOut+=singlesOut+pairsOut; 557 basesOut+=singleBasesOut+pairBasesOut; 558 } 559 process3_repair(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)560 private void process3_repair(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){ 561 562 ListNum<Read> ln=cris.nextList(); 563 ArrayList<Read> reads=ln.list; 564 565 final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen())); 566 567 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 568 for(Read r1 : reads){ 569 Read r2=r1.mate; 570 571 { 572 Read pair=repair(r1); 573 if(pair!=null && pairs!=null){pairs.add(pair);} 574 } 575 { 576 Read pair=repair(r2); 577 if(pair!=null && pairs!=null){pairs.add(pair);} 578 } 579 } 580 581 // if(verbose){System.err.println("X\n"+current+"\n"+prev+"\n");} 582 583 cris.returnList(ln); 584 ln=cris.nextList(); 585 reads=(ln!=null ? ln.list : null); 586 587 if(ros!=null && pairs!=null){//pairs!=null is implied 588 if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");} 589 ros.add(new ArrayList<Read>(pairs), ln.id); 590 pairs.clear(); 591 } 592 } 593 cris.returnList(ln); 594 595 if(!pairMap.isEmpty()){ 596 final ArrayList<Read> singles=new ArrayList<Read>(pairMap.size()); 597 for(String key : pairMap.keySet()){ 598 Read r=pairMap.get(key); 599 singles.add(r); 600 singlesOut++; 601 singleBasesOut+=r.length(); 602 } 603 pairMap.clear(); 604 if(verbose){System.err.println("Adding "+singles.size()+" to single out.");} 605 if(rosb!=null){rosb.add(singles, 0);} 606 } 607 608 readsOut+=singlesOut+pairsOut; 609 basesOut+=singleBasesOut+pairBasesOut; 610 } 611 612 processPair(Read r1, Read r2, ArrayList<Read> pairs, ArrayList<Read> singles)613 private int processPair(Read r1, Read r2, ArrayList<Read> pairs, ArrayList<Read> singles){ 614 int removed=0; 615 readsIn++; 616 basesIn+=r1.length(); 617 if(r2!=null){ 618 readsIn++; 619 basesIn+=r2.length(); 620 } 621 622 if(qtrimLeft || qtrimRight){ 623 if(r1!=null){ 624 int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1); 625 basesTrimmed+=x; 626 readsTrimmed+=(x>0 ? 1 : 0); 627 if(addcolon){ 628 String colon=colon1; 629 if(!r1.id.contains(colon)){r1.id+=colon;} 630 } 631 } 632 if(r2!=null){ 633 int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1); 634 basesTrimmed+=x; 635 readsTrimmed+=(x>0 ? 1 : 0); 636 if(addcolon){ 637 String colon=colon2; 638 if(!r2.id.contains(colon)){r2.id+=colon;} 639 } 640 } 641 } 642 final int rlen1=(r1==null ? -1 : r1.length()); 643 final int rlen2=(r2==null ? -1 : r2.length()); 644 if(verbose){System.err.println("rlen="+rlen1+", rlen2="+rlen2);} 645 646 if(rlen1>=minReadLength && rlen2>=minReadLength){ 647 if(verbose){System.err.println("Sending to pair out:\t"+r1.id+"\t"+r2.id);} 648 r1.mate=r2; 649 r2.mate=r1; 650 r1.setPairnum(0); 651 r2.setPairnum(1); 652 if(pairs!=null){pairs.add(r1);} 653 pairsOut+=2; 654 pairBasesOut+=(rlen1+rlen2); 655 }else if(rlen1>=minReadLength){ 656 if(verbose){System.err.println("Sending r1 to single out:\t"+r1.id+"\t"+(r2==null ? "*" : r2.id));} 657 r1.mate=null; 658 r1.setPairnum(0); 659 if(singles!=null){singles.add(r1);} 660 singlesOut++; 661 singleBasesOut+=rlen1; 662 if(r2!=null){removed++;} 663 }else if(rlen2>=minReadLength){ 664 if(verbose){System.err.println("Sending r2 to single out:\t"+(r1==null ? "*" : r1.id)+"\t"+r2.id);} 665 r2.mate=null; 666 r2.setPairnum(0); 667 if(singles!=null){singles.add(r2);} 668 singlesOut++; 669 singleBasesOut+=rlen2; 670 if(r1!=null){removed++;} 671 }else{ 672 if(verbose){System.err.println("Removed both reads:\t"+(r1==null ? "*" : r1.id)+"\t"+(r2==null ? "*" : r2.id));} 673 if(r1!=null){removed++;} 674 if(r2!=null){removed++;} 675 } 676 return removed; 677 } 678 679 repair(Read r)680 private Read repair(Read r){ 681 if(r==null){return null;} 682 r.mate=null; 683 684 readsIn++; 685 basesIn+=r.length(); 686 final String id=r.id; 687 688 final SamLine sl=r.samline; 689 if(sl!=null && (!sl.primary() || sl.supplementary())){return null;} 690 691 assert(id!=null) : "Read number "+r.numericID+" has no name and thus cannot be re-paired. To ignore this, run with the -da flag."; 692 if(id==null){return null;} 693 final int slash=id.indexOf('/'); 694 String[] split=id.split("\\s+"); 695 696 if(split.length==1 && slash>0){ 697 split=new String[] {id.substring(0, slash), id.substring(slash)}; 698 } 699 700 assert(split.length>0); 701 String prefix=split[0]; 702 String suffix=(split.length==1 ? null : split[split.length-1]); 703 704 if(sl!=null){ 705 r.setPairnum(sl.pairnum()); 706 }else if(suffix!=null){ 707 if(suffix.startsWith("/1") || suffix.startsWith("1:")){ 708 r.setPairnum(0); 709 }else if(suffix.startsWith("/2") || suffix.startsWith("2:")){ 710 r.setPairnum(1); 711 }else if(id.contains("/1") || id.contains("/2")){ 712 split=id.split("/"); 713 prefix=split[0]; 714 suffix=(split.length==1 ? null : split[split.length-1]); 715 716 if(suffix!=null){ 717 if(suffix.startsWith("1")){ 718 r.setPairnum(0); 719 }else if(suffix.startsWith("2")){ 720 r.setPairnum(1); 721 } 722 }else{ 723 //pairnum cannot be determined 724 } 725 }else{ 726 //pairnum cannot be determined 727 } 728 }else{ 729 //pairnum cannot be determined 730 } 731 732 if(addcolon){ 733 String colon=(r.pairnum()==0 ? colon1 : colon2); 734 if(!r.id.contains(colon)){r.id+=colon;} 735 } 736 737 Read old=pairMap.remove(prefix); 738 739 // System.out.println("Processing:\n"+r+"\n"+old+"\n"+readsIn+", "+readsOut+", "+pairsOut); 740 741 if(old==null){ 742 pairMap.put(prefix, r); 743 return null; 744 }else{ 745 r.mate=old; 746 old.mate=r; 747 748 int len=r.length()+old.length(); 749 pairsOut+=2; 750 pairBasesOut+=len; 751 752 if(old.pairnum()==1){ 753 r.setPairnum(0); 754 return r; 755 }else{ 756 old.setPairnum(0); 757 r.setPairnum(1); 758 return old; 759 } 760 } 761 } 762 763 764 private String in1=null, in2=null; 765 private String out1=null, out2=null; 766 private String outsingle=null; 767 private long maxReads=-1; 768 public boolean errorState=false; 769 770 long readsIn=0; 771 long basesIn=0; 772 long readsOut=0; 773 long basesOut=0; 774 long pairsOut=0; 775 long pairBasesOut=0; 776 long singlesOut=0; 777 long singleBasesOut=0; 778 long readsTrimmed=0; 779 long basesTrimmed=0; 780 781 private final LinkedHashMap<String, Read> pairMap; 782 783 private float trimq=6; 784 /** Error rate for trimming (derived from trimq) */ 785 private final float trimE; 786 private int minReadLength=20; 787 private final boolean qtrimLeft, qtrimRight; 788 789 private final boolean fixInterleaving; 790 private final boolean allowIdenticalPairNames; 791 private final boolean repair; 792 793 private boolean addslash=false; 794 private boolean addcolon=false; 795 796 private static PrintStream outstream=System.err; 797 /** Permission to overwrite existing files */ 798 public static boolean overwrite=false; 799 /** Permission to append to existing files */ 800 public static boolean append=false; 801 public static boolean showSpeed=true; 802 public static boolean verbose=false; 803 804 private static final String slash1=" /1"; 805 private static final String slash2=" /2"; 806 private static final String colon1=" 1:"; 807 private static final String colon2=" 2:"; 808 809 } 810