1 package jgi; 2 3 import java.io.File; 4 import java.io.PrintStream; 5 import java.util.ArrayList; 6 import java.util.Locale; 7 8 import align2.QualityTools; 9 import dna.AminoAcid; 10 import fileIO.ByteFile; 11 import fileIO.ByteFile1; 12 import fileIO.ByteFile2; 13 import fileIO.FileFormat; 14 import fileIO.ReadWrite; 15 import shared.Parse; 16 import shared.Parser; 17 import shared.PreParser; 18 import shared.ReadStats; 19 import shared.Shared; 20 import shared.Timer; 21 import shared.Tools; 22 import stream.ConcurrentGenericReadInputStream; 23 import stream.ConcurrentReadInputStream; 24 import stream.ConcurrentReadOutputStream; 25 import stream.FASTQ; 26 import stream.FastaReadInputStream; 27 import stream.Read; 28 import structures.ListNum; 29 30 /** 31 * @author Brian Bushnell 32 * @date Mar 16, 2014 33 * 34 */ 35 public class AddAdapters { 36 main(String[] args)37 public static void main(String[] args){ 38 Timer t=new Timer(); 39 AddAdapters x=new AddAdapters(args); 40 if(x.writeMode){ 41 x.write(t); 42 }else{ 43 x.read(t); 44 } 45 46 //Close the print stream if it was redirected 47 Shared.closeStream(x.outstream); 48 } 49 AddAdapters(String[] args)50 public AddAdapters(String[] args){ 51 52 {//Preparse block for help, config files, and outstream 53 PreParser pp=new PreParser(args, getClass(), false); 54 args=pp.args; 55 outstream=pp.outstream; 56 } 57 58 Parser parser=new Parser(); 59 60 61 Shared.capBuffers(4); 62 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; 63 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); 64 FASTQ.PARSE_CUSTOM=true; 65 66 67 for(int i=0; i<args.length; i++){ 68 String arg=args[i]; 69 String[] split=arg.split("="); 70 String a=split[0].toLowerCase(); 71 String b=split.length>1 ? split[1] : null; 72 73 if(Parser.parseCommonStatic(arg, a, b)){ 74 //do nothing 75 }else if(Parser.parseZip(arg, a, b)){ 76 //do nothing 77 }else if(Parser.parseQuality(arg, a, b)){ 78 //do nothing 79 }else if(Parser.parseFasta(arg, a, b)){ 80 //do nothing 81 }else if(parser.parseInterleaved(arg, a, b)){ 82 //do nothing 83 }else if(a.equals("verbose")){ 84 verbose=Parse.parseBoolean(b); 85 ByteFile1.verbose=verbose; 86 ByteFile2.verbose=verbose; 87 stream.FastaReadInputStream.verbose=verbose; 88 ConcurrentGenericReadInputStream.verbose=verbose; 89 stream.FastqReadInputStream.verbose=verbose; 90 ReadWrite.verbose=verbose; 91 }else if(a.equals("reads") || a.equals("maxreads")){ 92 maxReads=Parse.parseKMG(b); 93 }else if(a.equals("t") || a.equals("threads")){ 94 Shared.setThreads(b); 95 }else if(a.equals("in") || a.equals("input") || a.equals("in1") || a.equals("input1")){ 96 in1=b; 97 }else if(a.equals("in2") || a.equals("input2")){ 98 in2=b; 99 }else if(a.equals("out") || a.equals("output") || a.equals("out1") || a.equals("output1")){ 100 out1=b; 101 }else if(a.equals("out2") || a.equals("output2")){ 102 out2=b; 103 }else if(a.equals("extin")){ 104 extin=b; 105 }else if(a.equals("extout")){ 106 extout=b; 107 }else if(a.equals("adapter") || a.equals("adapters") || a.equals("ref")){ 108 adapterFile=b; 109 }else if(a.equals("literal") || a.equals("literals")){ 110 literals=(b==null ? null : b.split(",")); 111 }else if(a.equals("rate") || a.equals("prob")){ 112 adapterProb=Float.parseFloat(b); 113 }else if(a.equals("minlength") || a.equals("minlen") || a.equals("ml")){ 114 minlen=Integer.parseInt(b); 115 }else if(a.equals("3'") || a.equalsIgnoreCase("3prime") || a.equalsIgnoreCase("3-prime") || a.equalsIgnoreCase("right") || a.equalsIgnoreCase("r")){ 116 right=Parse.parseBoolean(b); 117 }else if(a.equals("5'") || a.equalsIgnoreCase("5prime") || a.equalsIgnoreCase("5-prime") || a.equalsIgnoreCase("left") || a.equalsIgnoreCase("l")){ 118 right=!Parse.parseBoolean(b); 119 }else if(a.equals("end")){ 120 assert(b!=null) : "Bad parameter: "+arg; 121 if(b.equals("3'") || b.equalsIgnoreCase("3prime") || b.equalsIgnoreCase("3-prime") || b.equalsIgnoreCase("right") || a.equalsIgnoreCase("r")){ 122 right=true; 123 }else if(b.equals("5'") || b.equalsIgnoreCase("5prime") || b.equalsIgnoreCase("5-prime") || b.equalsIgnoreCase("left") || a.equalsIgnoreCase("l")){ 124 right=true; 125 } 126 }else if(a.equals("addslash")){ 127 addslash=Parse.parseBoolean(b); 128 }else if(a.equals("adderrors")){ 129 adderrors=Parse.parseBoolean(b); 130 }else if(a.equals("addreversecomplement") || a.equals("arc")){ 131 addRC=Parse.parseBoolean(b); 132 }else if(a.equals("addpaired")){ 133 addPaired=Parse.parseBoolean(b); 134 }else if(a.equals("append") || a.equals("app")){ 135 append=ReadStats.append=Parse.parseBoolean(b); 136 }else if(a.equals("overwrite") || a.equals("ow")){ 137 overwrite=Parse.parseBoolean(b); 138 }else if(a.equals("write")){ 139 writeMode=Parse.parseBoolean(b); 140 }else if(a.equals("grade")){ 141 writeMode=!Parse.parseBoolean(b); 142 }else if(a.equals("mode")){ 143 if("grade".equalsIgnoreCase(b) || "read".equalsIgnoreCase(b)){ 144 writeMode=false; 145 }else if("generate".equalsIgnoreCase(b) || "write".equalsIgnoreCase(b) || "add".equalsIgnoreCase(b)){ 146 writeMode=true; 147 }else{ 148 throw new RuntimeException("Unknown mode "+b); 149 } 150 }else if(in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ 151 in1=arg; 152 }else{ 153 System.err.println("Unknown parameter "+args[i]); 154 assert(false) : "Unknown parameter "+args[i]; 155 // throw new RuntimeException("Unknown parameter "+args[i]); 156 } 157 } 158 159 {//Process parser fields 160 Parser.processQuality(); 161 } 162 163 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ 164 in2=in1.replace("#", "2"); 165 in1=in1.replace("#", "1"); 166 } 167 if(out1!=null && out2==null && out1.indexOf('#')>-1){ 168 out2=out1.replace("#", "2"); 169 out1=out1.replace("#", "1"); 170 } 171 if(in2!=null){ 172 if(FASTQ.FORCE_INTERLEAVED){System.err.println("Reset INTERLEAVED to false because paired input files were specified.");} 173 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; 174 } 175 176 assert(FastaReadInputStream.settingsOK()); 177 178 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} 179 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ 180 // if(ReadWrite.isCompressed(in1)){ByteFile.FORCE_MODE_BF2=true;} 181 ByteFile.FORCE_MODE_BF2=true; 182 } 183 184 if(writeMode && out1==null){ 185 if(out2!=null){throw new RuntimeException("Error - cannot define out2 without defining out1.");} 186 System.err.println("No output stream specified. To write to stdout, please specify 'out=stdout.fq' or similar."); 187 } 188 189 if(!parser.setInterleaved){ 190 assert(in1!=null && (!writeMode || out1!=null)) : "\nin1="+in1+"\nin2="+in2+"\nout1="+out1+"\nout2="+out2+"\n"; 191 if(in2!=null){ //If there are 2 input streams. 192 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; 193 outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED); 194 }else if(writeMode){ //There is one input stream. 195 if(out2!=null){ 196 FASTQ.FORCE_INTERLEAVED=true; 197 FASTQ.TEST_INTERLEAVED=false; 198 outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED); 199 } 200 } 201 } 202 203 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;} 204 if(out2!=null && out2.equalsIgnoreCase("null")){out2=null;} 205 206 if(!Tools.testOutputFiles(overwrite, append, false, out1, out2)){ 207 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+", "+out2+"\n"); 208 } 209 210 ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, false); 211 ffout2=FileFormat.testOutput(out2, FileFormat.FASTQ, extout, true, overwrite, append, false); 212 213 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); 214 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true); 215 216 ffa=FileFormat.testInput(adapterFile, FileFormat.FASTA, null, true, true); 217 218 adapters=makeAdapterList(); 219 220 if(writeMode){ 221 if(adapters==null || adapters.isEmpty()){ 222 throw new RuntimeException("\n\nPlease specify adapters with 'adapters=file.fa' or 'literal=AGCTACGT'\n"); 223 } 224 randy=Shared.threadLocalRandom(); 225 } 226 } 227 makeAdapterList()228 private final ArrayList<byte[]> makeAdapterList(){ 229 boolean oldTI=FASTQ.TEST_INTERLEAVED; 230 boolean oldFI=FASTQ.FORCE_INTERLEAVED; 231 FASTQ.TEST_INTERLEAVED=false; 232 FASTQ.FORCE_INTERLEAVED=false; 233 ArrayList<byte[]> x=makeAdapterList2(); 234 FASTQ.TEST_INTERLEAVED=oldTI; 235 FASTQ.FORCE_INTERLEAVED=oldFI; 236 return x; 237 } 238 makeAdapterList2()239 private final ArrayList<byte[]> makeAdapterList2(){ 240 if(ffa==null && literals==null){return null;} 241 ArrayList<byte[]> list=new ArrayList<byte[]>(); 242 if(ffa!=null){ 243 FastaReadInputStream fris=new FastaReadInputStream(ffa, false, false, -1); 244 for(Read r=fris.next(); r!=null; r=fris.next()){ 245 if(r.bases!=null){ 246 list.add(r.bases); 247 } 248 } 249 fris.close(); 250 } 251 if(literals!=null){ 252 for(String s : literals){ 253 if(s!=null && !"null".equalsIgnoreCase(s)){ 254 list.add(s.getBytes()); 255 } 256 } 257 } 258 259 if(addRC){ 260 int x=list.size(); 261 for(int i=0; i<x; i++){ 262 list.add(AminoAcid.reverseComplementBases(list.get(i))); 263 } 264 } 265 266 return list.size()>0 ? list : null; 267 } 268 write(Timer t)269 void write(Timer t){ 270 271 final ConcurrentReadInputStream cris; 272 { 273 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null); 274 if(verbose){System.err.println("Started cris");} 275 cris.start(); //4567 276 } 277 boolean paired=cris.paired(); 278 if(verbose){System.err.println("Input is "+(paired ? "paired" : "unpaired"));} 279 280 ConcurrentReadOutputStream ros=null; 281 if(out1!=null){ 282 final int buff=4; 283 284 if(cris.paired() && out2==null && (in1==null || !in1.contains(".sam"))){ 285 outstream.println("Writing interleaved."); 286 } 287 288 assert(!out1.equalsIgnoreCase(in1) && !out1.equalsIgnoreCase(in1)) : "Input file and output file have same name."; 289 assert(out2==null || (!out2.equalsIgnoreCase(in1) && !out2.equalsIgnoreCase(in2))) : "out1 and out2 have same name."; 290 291 ros=ConcurrentReadOutputStream.getStream(ffout1, ffout2, null, null, buff, null, false); 292 ros.start(); 293 } 294 295 { 296 297 ListNum<Read> ln=cris.nextList(); 298 ArrayList<Read> reads=(ln!=null ? ln.list : null); 299 300 // System.err.println("Fetched "+reads); 301 302 if(reads!=null && !reads.isEmpty()){ 303 Read r=reads.get(0); 304 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); 305 } 306 307 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 308 309 for(int idx=0; idx<reads.size(); idx++){ 310 final Read r1=reads.get(idx); 311 final Read r2=r1.mate; 312 313 final int initialLength1=r1.length(); 314 final int initialLength2=(r1.mateLength()); 315 316 addAdapter(r1, addPaired); 317 if(r2!=null && !addPaired){ 318 addAdapter(r2, addPaired); 319 } 320 321 if(r2==null){ 322 r1.id=r1.numericID+"_"+r1.id; 323 }else{ 324 String base=r1.numericID+"_"+r1.id+"_"+r2.id; 325 if(addslash){ 326 r1.id=base+" /1"; 327 r2.id=base+" /2"; 328 }else{ 329 r1.id=base; 330 r2.id=base; 331 } 332 } 333 } 334 335 if(ros!=null){ros.add(reads, ln.id);} 336 337 cris.returnList(ln); 338 ln=cris.nextList(); 339 reads=(ln!=null ? ln.list : null); 340 } 341 if(ln!=null){ 342 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); 343 } 344 } 345 346 errorState|=ReadWrite.closeStreams(cris, ros); 347 348 // System.err.println(cris.errorState()+", "+(ros==null ? "null" : (ros.errorState()+", "+ros.finishedSuccessfully()))); 349 // if(ros!=null){ 350 // ReadStreamWriter rs1=ros.getRS1(); 351 // ReadStreamWriter rs2=ros.getRS2(); 352 // System.err.println(rs1==null ? "null" : rs1.finishedSuccessfully()); 353 // System.err.println(rs2==null ? "null" : rs2.finishedSuccessfully()); 354 // } 355 // assert(false); 356 357 t.stop(); 358 359 outstream.println("Adapters Added: \t"+adaptersAdded+" reads ("+String.format(Locale.ROOT, "%.2f",adaptersAdded*100.0/readsProcessed)+"%) \t"+ 360 adapterBasesAdded+" bases ("+String.format(Locale.ROOT, "%.2f",adapterBasesAdded*100.0/basesProcessed)+"%)"); 361 362 outstream.println("Valid Output: \t"+validReads+" reads ("+String.format(Locale.ROOT, "%.2f",validReads*100.0/readsProcessed)+"%) \t"+ 363 validBases+" bases ("+String.format(Locale.ROOT, "%.2f",validBases*100.0/basesProcessed)+"%)"); 364 365 outstream.println(); 366 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); 367 368 if(errorState){ 369 throw new RuntimeException("ReformatReads terminated in an error state; the output may be corrupt."); 370 } 371 } 372 addAdapter(Read r, final int loc)373 private void addAdapter(Read r, final int loc){ 374 final byte[] bases=r.bases; 375 final byte[] quals=r.quality; 376 final int remaining, initial=(bases==null ? 0 : bases.length); 377 final byte[] adapter; 378 int ab=0, rb=0; 379 380 readsProcessed++; 381 basesProcessed+=initial; 382 383 if(bases==null){assert(false); return;} 384 if(initial>0 && loc>=0 && loc<initial){ 385 adapter=adapters.get(randy.nextInt(adapters.size())); 386 adaptersAdded++; 387 388 if(right){ 389 final int lim=Tools.min(initial, adapter.length+loc); 390 for(int i=loc, j=0; i<lim; i++, j++){ 391 if(AminoAcid.isFullyDefined(bases[i])){ 392 bases[i]=adapter[j]; 393 if(adderrors){ 394 byte q=(quals==null ? 30 : quals[i]); 395 if(randy.nextFloat()<QualityTools.PROB_ERROR[q]){ 396 int old=AminoAcid.baseToNumber[bases[i]]; 397 bases[i]=AminoAcid.numberToBase[(old+randy.nextInt(3)+1)&3]; 398 } 399 } 400 } 401 ab++; 402 } 403 for(int i=lim; i<initial; i++){ 404 if(AminoAcid.isFullyDefined(bases[i])){ 405 bases[i]=AminoAcid.numberToBase[randy.nextInt(4)]; 406 } 407 rb++; 408 } 409 remaining=loc; 410 }else{ 411 final int lim=Tools.max(-1, loc-adapter.length); 412 for(int i=loc, j=adapter.length-1; i>lim; i--, j--){ 413 if(AminoAcid.isFullyDefined(bases[i])){ 414 bases[i]=adapter[j]; 415 if(adderrors){ 416 byte q=(quals==null ? 30 : quals[i]); 417 if(randy.nextFloat()<QualityTools.PROB_ERROR[q]){ 418 int old=AminoAcid.baseToNumber[bases[i]]; 419 bases[i]=AminoAcid.numberToBase[(old+randy.nextInt(3)+1)&3]; 420 } 421 } 422 } 423 ab++; 424 } 425 for(int i=lim; i>-1; i--){ 426 if(AminoAcid.isFullyDefined(bases[i])){ 427 bases[i]=AminoAcid.numberToBase[randy.nextInt(4)]; 428 } 429 rb++; 430 } 431 remaining=initial-loc-1; 432 } 433 assert(remaining<initial) : "\nremaining="+remaining+", initial="+initial+", rb="+rb+", ab="+ab+ 434 ", loc="+loc+", adapter.length="+(adapter==null ? 0 : adapter.length)+"\n"; 435 }else{ 436 adapter=null; 437 remaining=initial; 438 } 439 440 assert(remaining==initial-(rb+ab)); 441 assert(remaining>=0); 442 443 adapterBasesAdded+=ab; 444 randomBasesAdded+=rb; 445 r.id=initial+"_"+remaining; 446 if(remaining>=minlen){ 447 validReads++; 448 validBases+=remaining; 449 } 450 } 451 addAdapter(Read r, boolean addPaired)452 private void addAdapter(Read r, boolean addPaired){ 453 final byte[] bases=r.bases; 454 final int initial=(bases==null ? 0 : bases.length); 455 final int loc; 456 457 if(initial>0 && randy.nextFloat()<adapterProb){ 458 loc=randy.nextInt(initial); 459 }else{ 460 loc=-1; 461 } 462 463 addAdapter(r, loc); 464 if(addPaired && r.mate!=null){addAdapter(r.mate, loc);} 465 } 466 467 /*--------------------------------------------------------------*/ 468 read(Timer t)469 void read(Timer t){ 470 471 final ConcurrentReadInputStream cris; 472 { 473 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null); 474 if(verbose){System.err.println("Started cris");} 475 cris.start(); //4567 476 } 477 boolean paired=cris.paired(); 478 if(verbose){System.err.println("Input is "+(paired ? "paired" : "unpaired"));} 479 480 { 481 482 ListNum<Read> ln=cris.nextList(); 483 ArrayList<Read> reads=(ln!=null ? ln.list : null); 484 485 // System.err.println("Fetched "+reads); 486 487 if(reads!=null && !reads.isEmpty()){ 488 Read r=reads.get(0); 489 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); 490 } 491 492 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 493 494 for(int idx=0; idx<reads.size(); idx++){ 495 final Read r1=reads.get(idx); 496 final Read r2=r1.mate; 497 498 grade(r1, r2); 499 } 500 501 cris.returnList(ln); 502 ln=cris.nextList(); 503 reads=(ln!=null ? ln.list : null); 504 } 505 if(ln!=null){ 506 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); 507 } 508 } 509 510 errorState|=ReadWrite.closeStream(cris); 511 512 t.stop(); 513 514 long validBasesRemoved=validBasesExpected-validBasesCounted; 515 long incorrect=readsProcessed-correct; 516 long incorrectBases=basesProcessed-correctBases; 517 518 outstream.println("Total output: \t"+readsProcessed+" reads \t"+basesProcessed+" bases "); 519 outstream.println("Perfectly Correct (% of output): \t"+correct+" reads ("+String.format(Locale.ROOT, "%.3f",correct*100.0/readsProcessed)+ 520 "%) \t"+correctBases+" bases ("+String.format(Locale.ROOT, "%.3f",correctBases*100.0/basesProcessed)+"%)"); 521 outstream.println("Incorrect (% of output): \t"+incorrect+" reads ("+String.format(Locale.ROOT, "%.3f",incorrect*100.0/readsProcessed)+ 522 "%) \t"+incorrectBases+" bases ("+String.format(Locale.ROOT, "%.3f",incorrectBases*100.0/basesProcessed)+"%)"); 523 outstream.println(); 524 // outstream.println("Too Short: \t"+tooShort+" reads ("+String.format(Locale.ROOT, "%.3f",tooShort*100.0/readsProcessed)+"%) \t"+ 525 // tooShortBases+" bases ("+String.format(Locale.ROOT, "%.3f",tooShortBases*100.0/basesProcessed)+"%)"); 526 // outstream.println("Too Long: \t"+tooLong+" reads ("+String.format(Locale.ROOT, "%.3f",tooLong*100.0/readsProcessed)+"%) \t"+ 527 // tooLongBases+" bases ("+String.format(Locale.ROOT, "%.3f",tooLongBases*100.0/basesProcessed)+"%)"); 528 529 outstream.println("Adapters Remaining (% of adapters): \t"+(adapterReadsRemaining)+" reads ("+String.format(Locale.ROOT, "%.3f",adapterReadsRemaining*100.0/adapterReadsTotal)+ 530 "%) \t"+adapterBasesRemaining+" bases ("+String.format(Locale.ROOT, "%.3f",adapterBasesRemaining*100.0/basesProcessed)+"%)"); 531 outstream.println("Non-Adapter Removed (% of valid): \t"+tooShort+" reads ("+String.format(Locale.ROOT, "%.4f",tooShort*100.0/readsProcessed)+ 532 "%) \t"+validBasesRemoved+" bases ("+String.format(Locale.ROOT, "%.4f",validBasesRemoved*100.0/validBasesExpected)+"%)"); 533 534 if(broken>0 || mispaired>0){ 535 outstream.println("Broken: \t"+broken+" reads ("+String.format(Locale.ROOT, "%.2f",broken*100.0/readsProcessed)+"%)"); 536 outstream.println("Mispaired: \t"+mispaired+" reads ("+String.format(Locale.ROOT, "%.2f",mispaired*100.0/readsProcessed)+"%)"); 537 } 538 539 if(errorState){ 540 throw new RuntimeException("ReformatReads terminated in an error state; the output may be corrupt."); 541 } 542 } 543 544 // private void grade_old(Read r1, Read r2){ 545 // 546 // final String a=r1.id.split(" ")[0]; 547 // final String b=(r2==null ? a : r2.id.split(" ")[0]); 548 // final int len=a.split("_").length; 549 // 550 // if(r2!=null){ 551 // if(r1.id.endsWith(" /2") || r2.id.endsWith(" /1") || !a.equals(b)){ 552 // mispaired+=2; 553 // } 554 // if(len==3){ 555 // r2.setPairnum(0); 556 // }else if(len==5){ 557 // if(r1.id.endsWith(" /2")){r1.setPairnum(1);} 558 // if(r2.id.endsWith(" /1")){r2.setPairnum(0);} 559 // }else{ 560 // throw new RuntimeException("Headers are corrupt. They must be generated by AddAdapters or RenameReads."); 561 // } 562 // }else{ 563 // if(len!=3){ 564 // throw new RuntimeException("Headers are corrupt, or paired reads are being processed as unpaired. Try running with 'int=t' or with 'in1=' and 'in2='"); 565 // } 566 // } 567 // grade(r1); 568 // grade(r2); 569 // } 570 grade(Read r1, Read r2)571 private void grade(Read r1, Read r2){ 572 grade(r1); 573 grade(r2); 574 } 575 grade(Read r)576 private void grade(Read r){ 577 if(r==null){return;} 578 579 int insert=r.insert(); 580 int originalLength=r.stop-r.start+1; 581 int length=r.length(); 582 583 final int offset=(2*r.pairnum()); 584 585 // String[] sa=r.id.split(" ")[0].split("_"); 586 // final long id=Long.parseLong(sa[0]); 587 final int initial=originalLength; 588 final int remaining=Tools.min(initial, insert); 589 final int actual=length; 590 591 readsProcessed++; 592 basesProcessed+=actual; 593 594 assert(initial>=remaining); 595 596 if(actual>initial){broken++;} 597 598 validBasesExpected+=remaining; 599 600 601 // System.err.println("initial="+initial+", remaining="+remaining+", actual="+actual); 602 603 if(initial==remaining){//Should not have trimmed 604 if(actual==remaining || (actual<2 && (remaining<1 || remaining<minlen))){ 605 correct++; 606 correctBases+=remaining; 607 validBasesCounted+=remaining; 608 trueNeg++; 609 }else if(actual<remaining){ 610 tooShort++; 611 tooShortReadBases+=actual; 612 tooShortBases+=(remaining-actual); 613 validBasesCounted+=actual; 614 falsePos++; 615 }else if(actual>remaining){ 616 tooLong++; 617 tooLongReadBases+=remaining; 618 tooLongBases+=(actual-remaining); 619 validBasesCounted+=remaining; 620 falseNeg++; 621 } 622 }else{//Should have trimmed 623 624 adapterBasesTotal+=(initial-remaining); 625 adapterReadsTotal++; 626 627 if(actual==remaining || (actual<2 && (remaining<1 || remaining<minlen))){ 628 correct++; 629 correctBases+=remaining; 630 validBasesCounted+=remaining; 631 truePos++; 632 }else if(actual<remaining){ 633 tooShort++; 634 tooShortReadBases+=actual; 635 tooShortBases+=(remaining-actual); 636 validBasesCounted+=actual; 637 truePos++; 638 }else if(actual>remaining){ 639 tooLong++; 640 tooLongReadBases+=actual; 641 tooLongBases+=(actual-remaining); 642 adapterBasesRemaining+=(actual-remaining); 643 validBasesCounted+=remaining; 644 falseNeg++; 645 adapterReadsRemaining++; 646 } 647 } 648 } 649 650 /*--------------------------------------------------------------*/ 651 652 public boolean errorState=false; 653 654 private String in1=null; 655 private String in2=null; 656 657 private String out1=null; 658 private String out2=null; 659 660 private String extin=null; 661 private String extout=null; 662 663 private String adapterFile=null; 664 private String[] literals=null; 665 666 private boolean overwrite=false; 667 private boolean append=false; 668 669 /** Add /1 and /2 to paired reads */ 670 private boolean addslash=true; 671 /** Encode correct answer in read ID field */ 672 private boolean changename=true; 673 /** Add errors from quality value */ 674 private boolean adderrors=true; 675 676 /** Add adapters to the same location for read 1 and read 2 */ 677 private boolean addPaired=true; 678 /** Add reverse-complemented adapters also */ 679 private boolean addRC=false; 680 /** aka 3' */ 681 private boolean right=true; 682 683 private long maxReads=-1; 684 private int minlen=1; 685 686 private boolean writeMode=true; 687 private float adapterProb=0.5f; 688 689 private long readsProcessed=0; 690 private long basesProcessed=0; 691 private long adaptersAdded=0; 692 private long adapterBasesAdded=0; 693 private long randomBasesAdded=0; 694 private long validReads=0; 695 private long validBases=0; 696 697 private long truePos=0; 698 private long trueNeg=0; 699 private long falsePos=0; 700 private long falseNeg=0; 701 private long broken=0; 702 private long mispaired=0; 703 704 private long tooShort=0; 705 private long tooLong=0; 706 private long correct=0; 707 private long fullyRemoved=0; 708 709 private long tooShortBases=0; 710 private long tooLongBases=0; 711 private long tooShortReadBases=0; 712 private long tooLongReadBases=0; 713 private long correctBases=0; 714 715 private long validBasesCounted=0; 716 private long validBasesExpected=0; 717 718 // private long invalidBasesCounted=0; 719 private long adapterBasesTotal=0; 720 private long adapterReadsTotal=0; 721 private long adapterReadsRemaining=0; 722 private long adapterBasesRemaining=0; 723 724 private final FileFormat ffin1; 725 private final FileFormat ffin2; 726 727 private final FileFormat ffout1; 728 private final FileFormat ffout2; 729 730 private final FileFormat ffa; 731 732 private final ArrayList<byte[]> adapters; 733 734 /*--------------------------------------------------------------*/ 735 736 private PrintStream outstream=System.err; 737 public static boolean verbose=false; 738 739 private java.util.Random randy; 740 741 } 742