1 package jgi;
2 
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.LinkedHashMap;
7 import java.util.Locale;
8 
9 import fileIO.ByteFile;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import shared.Parse;
13 import shared.Parser;
14 import shared.PreParser;
15 import shared.ReadStats;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import shared.TrimRead;
20 import stream.ConcurrentReadInputStream;
21 import stream.ConcurrentReadOutputStream;
22 import stream.DualCris;
23 import stream.FASTQ;
24 import stream.FastaReadInputStream;
25 import stream.Read;
26 import stream.SamLine;
27 import structures.ListNum;
28 
29 /**
30  * @author Brian Bushnell
31  * @date Sep 4, 2013
32  *
33  */
34 public final class SplitPairsAndSingles {
35 
main(String[] args)36 	public static void main(String[] args){
37 		SplitPairsAndSingles x=new SplitPairsAndSingles(args);
38 		x.process();
39 
40 		//Close the print stream if it was redirected
41 		Shared.closeStream(outstream);
42 	}
43 
SplitPairsAndSingles(String[] args)44 	public SplitPairsAndSingles(String[] args){
45 
46 		{//Preparse block for help, config files, and outstream
47 			PreParser pp=new PreParser(args, getClass(), false);
48 			args=pp.args;
49 			outstream=pp.outstream;
50 		}
51 
52 		ReadWrite.ZIPLEVEL=2;
53 		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
54 		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
55 
56 		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
57 			ByteFile.FORCE_MODE_BF2=true;
58 		}
59 
60 		Parser parser=new Parser();
61 		parser.trimq=trimq;
62 		parser.minReadLength=minReadLength;
63 		boolean setOut=false, setOuts=false;
64 		boolean fixInterleaving_=false, repair_=false, allowIdenticalPairNames_=false;
65 
66 		for(int i=0; i<args.length; i++){
67 
68 			final String arg=args[i];
69 			String[] split=arg.split("=");
70 			String a=split[0].toLowerCase();
71 			String b=split.length>1 ? split[1] : null;
72 
73 			if(Parser.parseCommonStatic(arg, a, b)){
74 				//do nothing
75 			}else if(Parser.parseZip(arg, a, b)){
76 				//do nothing
77 			}else if(Parser.parseQuality(arg, a, b)){
78 				//do nothing
79 			}else if(Parser.parseFasta(arg, a, b)){
80 				//do nothing
81 			}else if(parser.parseInterleaved(arg, a, b)){
82 				//do nothing
83 			}else if(parser.parseTrim(arg, a, b)){
84 				//do nothing
85 			}else if(a.equals("in") || a.equals("in1")){
86 				in1=b;
87 			}else if(a.equals("in2")){
88 				in2=b;
89 			}else if(a.equals("out") || a.equals("out1") || a.equals("outp") || a.equals("outp1") || a.equals("outpair") || a.equals("outpair1")){
90 				out1=b;
91 				setOut=true;
92 			}else if(a.equals("out2") || a.equals("outp2") || a.equals("outpair2")){
93 				out2=b;
94 			}else if(a.equals("outs") || a.equals("outsingle") || a.equals("outb") || a.equals("outbad")){
95 				outsingle=b;
96 				setOut=true;
97 			}else if(a.equals("append") || a.equals("app")){
98 				append=ReadStats.append=Parse.parseBoolean(b);
99 			}else if(a.equals("overwrite") || a.equals("ow")){
100 				overwrite=Parse.parseBoolean(b);
101 			}else if(a.equals("showspeed") || a.equals("ss")){
102 				showSpeed=Parse.parseBoolean(b);
103 			}else if(a.equals("verbose")){
104 				verbose=Parse.parseBoolean(b);
105 			}else if(a.equals("addslash")){
106 				addslash=Parse.parseBoolean(b);
107 			}else if(a.equals("addcolon")){
108 				addcolon=Parse.parseBoolean(b);
109 			}else if(a.equals("reads") || a.startsWith("maxreads")){
110 				maxReads=Parse.parseKMG(b);
111 			}else if(a.equals("fixinterleaving") || a.equals("fi") || a.equals("fint") || a.equals("fixint")){
112 				fixInterleaving_=Parse.parseBoolean(b);
113 				if(fixInterleaving_){repair_=false;}
114 			}else if(a.equals("allowidenticalnames") || a.equals("ain")){
115 				allowIdenticalPairNames_=Parse.parseBoolean(b);
116 			}else if(a.equals("repair") || a.equals("rp")){
117 				repair_=Parse.parseBoolean(b);
118 				if(repair_){fixInterleaving_=false;}
119 			}else if(i==0 && in1==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
120 				in1=args[i];
121 			}else if(i==1 && out1==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
122 				out1=args[i];
123 				setOut=true;
124 			}else if(i==2 && outsingle==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
125 				outsingle=args[i];
126 				setOuts=true;
127 			}else{
128 				throw new RuntimeException("Unknown parameter "+args[i]);
129 			}
130 		}
131 
132 		{//Process parser fields
133 			Parser.processQuality();
134 
135 			qtrimLeft=parser.qtrimLeft;
136 			qtrimRight=parser.qtrimRight;
137 			trimq=parser.trimq;
138 			trimE=parser.trimE();
139 			minReadLength=parser.minReadLength;
140 		}
141 
142 		allowIdenticalPairNames=allowIdenticalPairNames_;
143 		fixInterleaving=fixInterleaving_;
144 		repair=repair_;
145 		assert(!repair || ! fixInterleaving) : "ERROR: Choose 'fixInterleaving' or 'repair', but not both.";
146 
147 		assert(FastaReadInputStream.settingsOK());
148 
149 		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
150 
151 		if(in1!=null && in1.contains("#") && !new File(in1).exists()){
152 			int pound=in1.lastIndexOf('#');
153 			String a=in1.substring(0, pound);
154 			String b=in1.substring(pound+1);
155 			in1=a+1+b;
156 			in2=a+2+b;
157 		}
158 		if(in2!=null && (in2.contains("=") || in2.equalsIgnoreCase("null"))){in2=null;}
159 
160 		if(fixInterleaving){
161 			if(in2!=null){
162 				System.err.println("ERROR: 'FixInterleaving' mode only works with a single interleaved input file, not paired input files.");
163 				System.err.println("Aborting.");
164 				System.exit(1);
165 			}
166 			parser.setInterleaved=true;
167 			FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
168 			outstream.println("Paired input disabled; running in FixInterleaving mode");
169 		}
170 
171 		if(repair){
172 			FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
173 			outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
174 		}else{
175 			if(!parser.setInterleaved && in2==null){
176 				FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=true;
177 				outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
178 			}
179 			if(in2!=null){
180 				if(FASTQ.FORCE_INTERLEAVED){System.err.println("Reset INTERLEAVED to false because paired input files were specified.");}
181 				FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
182 			}
183 		}
184 
185 		if(out1!=null && out1.contains("#")){
186 			int pound=out1.lastIndexOf('#');
187 			String a=out1.substring(0, pound);
188 			String b=out1.substring(pound+1);
189 			out1=a+1+b;
190 			out2=a+2+b;
191 		}
192 
193 		if(!setOut){
194 			System.err.println("No output stream specified.  To write to stdout, please specify 'out=stdout.fq' or similar.");
195 //			out1="stdout.fq";
196 			outstream=System.err;
197 			out2=null;
198 		}else if("stdout".equalsIgnoreCase(out1) || "standarddout".equalsIgnoreCase(out1)){
199 			out1="stdout.fq";
200 			outstream=System.err;
201 			out2=null;
202 		}
203 		if(out1!=null && !Tools.canWrite(out1, overwrite)){throw new RuntimeException("Output file "+out1+" already exists, and overwrite="+overwrite);}
204 
205 		assert(!in1.equalsIgnoreCase(out1));
206 		assert(!in1.equalsIgnoreCase(outsingle));
207 		assert(!in1.equalsIgnoreCase(in2));
208 		assert(out1==null || !out1.equalsIgnoreCase(out2)) : "out2 may not be defined without out1, and out1 may not equal out2.";
209 		assert(out1==null || !out1.equalsIgnoreCase(outsingle));
210 
211 		pairMap=(repair ? new LinkedHashMap<String, Read>() : null);
212 
213 		//Close the print stream if it was redirected
214 		Shared.closeStream(outstream);
215 	}
216 
process()217 	public void process(){
218 
219 		Timer t=new Timer();
220 
221 		process2();
222 
223 		t.stop();
224 
225 		outstream.println("\nInput:                  \t"+readsIn+" reads \t\t"+basesIn+" bases.");
226 
227 		if(qtrimLeft || qtrimRight){
228 			outstream.println("Trimmed:                \t"+readsTrimmed+" reads ("+String.format(Locale.ROOT, "%.2f",readsTrimmed*100.0/readsIn)+"%) \t"+
229 					basesTrimmed+" bases ("+String.format(Locale.ROOT, "%.2f",basesTrimmed*100.0/basesIn)+"%)");
230 		}
231 		outstream.println("Result:                 \t"+readsOut+" reads ("+String.format(Locale.ROOT, "%.2f",readsOut*100.0/readsIn)+"%) \t"+
232 				basesOut+" bases ("+String.format(Locale.ROOT, "%.2f",basesOut*100.0/basesIn)+"%)");
233 		outstream.println("Pairs:                  \t"+pairsOut+" reads ("+String.format(Locale.ROOT, "%.2f",pairsOut*100.0/readsIn)+"%) \t"+
234 				pairBasesOut+" bases ("+String.format(Locale.ROOT, "%.2f",pairBasesOut*100.0/basesIn)+"%)");
235 		outstream.println("Singletons:             \t"+singlesOut+" reads ("+String.format(Locale.ROOT, "%.2f",singlesOut*100.0/readsIn)+"%) \t"+
236 				singleBasesOut+" bases ("+String.format(Locale.ROOT, "%.2f",singleBasesOut*100.0/basesIn)+"%)");
237 
238 		if(showSpeed){
239 			outstream.println();
240 			outstream.println(Tools.timeReadsBasesProcessed(t, readsIn, basesIn, 8));
241 		}
242 
243 		if(errorState){
244 			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
245 		}
246 	}
247 
process2()248 	private void process2(){
249 		final ConcurrentReadInputStream cris;
250 		if(in2!=null && repair){
251 			FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true);
252 			FileFormat ff2=FileFormat.testInput(in2, FileFormat.FASTQ, null, true, true);
253 			cris=DualCris.getReadInputStream(maxReads, true, ff1, ff2, null, null);
254 		}else{
255 			FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true);
256 			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, null, null, null);
257 		}
258 		if(verbose){System.err.println("Started cris");}
259 		cris.start();
260 		boolean paired=cris.paired();
261 		if(verbose){System.err.println("Paired: "+paired);}
262 
263 		final ConcurrentReadOutputStream ros, rosb;
264 		final int buff=4;
265 		if(out1!=null){
266 			FileFormat ff1=FileFormat.testOutput(out1, FileFormat.FASTQ, null, true, overwrite, append, false);
267 			FileFormat ff2=FileFormat.testOutput(out2, FileFormat.FASTQ, null, true, overwrite, append, false);
268 			ros=ConcurrentReadOutputStream.getStream(ff1, ff2, buff, null, true);
269 			ros.start();
270 		}else{ros=null;}
271 		if(outsingle!=null){
272 			FileFormat ff1=FileFormat.testOutput(outsingle, FileFormat.FASTQ, null, true, overwrite, append, false);
273 			rosb=ConcurrentReadOutputStream.getStream(ff1, null, buff, null, true);
274 			rosb.start();
275 		}else{rosb=null;}
276 		if(ros!=null || rosb!=null){
277 			outstream.println("Started output stream.");
278 		}
279 
280 //		assert(false) : out1+", "+out2+", "+outsingle;
281 		if(fixInterleaving){
282 			process3_fixInterleaving(cris, ros, rosb);
283 		}else if(repair){
284 			if(cris.getClass()==DualCris.class){
285 				process3_repair((DualCris)cris, ros, rosb);
286 			}else{
287 				process3_repair(cris, ros, rosb);
288 			}
289 		}else{
290 			process3(cris, ros, rosb);
291 		}
292 
293 
294 		ReadWrite.closeStreams(cris, ros, rosb);
295 	}
296 //
297 //	private void process3_old(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){
298 //
299 //		ListNum<Read> ln=cris.nextList();
300 //		ArrayList<Read> reads0=(ln!=null ? ln.list : null);
301 //		ArrayList<Read> single=(rosb==null ? null : new ArrayList<Read>(Shared.bufferLen()));
302 //
303 //		while(reads0!=null && reads0.size()>0){
304 //			ArrayList<Read> reads=(ArrayList<Read>) reads0.clone();
305 //			int removed=0;
306 //			for(int i=0; i<reads.size(); i++){
307 //				Read r1=reads.get(i);
308 //				Read r2=r1.mate;
309 //
310 //				readsIn++;
311 //				basesIn+=r1.length();
312 //				if(r2!=null){
313 //					readsIn++;
314 //					basesIn+=r2.length();
315 //				}
316 //
317 //				{
318 //					if(trimLeft || trimRight){
319 //						if(r1!=null){
320 //							int x=TrimRead.trimFast(r1, trimLeft, trimRight, trimq, 1);
321 //							basesTrimmed+=x;
322 //							readsTrimmed+=(x>0 ? 1 : 0);
323 //						}
324 //						if(r2!=null){
325 //							int x=TrimRead.trimFast(r2, trimLeft, trimRight, trimq, 1);
326 //							basesTrimmed+=x;
327 //							readsTrimmed+=(x>0 ? 1 : 0);
328 //						}
329 //					}
330 //
331 //					final int rlen1=(r1==null ? -1 : r1.length());
332 //					final int rlen2=(r2==null ? -1 : r2.length());
333 //
334 //					if(verbose){System.err.println("rlen1="+rlen1+", rlen2="+rlen2);}
335 //
336 //					if(rlen1<minReadLength || rlen2<minReadLength){
337 //						reads.set(i, null);
338 //						removed++;
339 //						r1.mate=null;
340 //						if(r2!=null){
341 //							r2.mate=null;
342 //						}
343 //
344 //						if(rlen1>=minReadLength){
345 //							single.add(r1);
346 //							singlesOut++;
347 //							singleBasesOut+=rlen1;
348 //						}
349 //						if(rlen2>=minReadLength){
350 //							single.add(r2);
351 //							singlesOut++;
352 //							singleBasesOut+=rlen2;
353 //						}
354 //					}else{
355 //						if(r1!=null){
356 //							pairsOut++;
357 //							pairBasesOut+=rlen2;
358 //						}
359 //						if(r2!=null){
360 //							pairsOut++;
361 //							pairBasesOut+=rlen2;
362 //						}
363 //					}
364 //				}
365 //			}
366 //
367 //			if(rosb!=null){
368 //				if(verbose){System.err.println("Adding "+single.size()+" to single out.");}
369 //				rosb.add(new ArrayList<Read>(single), ln.id);
370 //				single.clear();
371 //			}
372 //
373 //			if(ros!=null){
374 //				if(removed>0){Tools.condenseStrict(reads);}
375 //				ArrayList<Read> x=new ArrayList<Read>(reads.size());
376 //				x.addAll(reads);
377 //				if(verbose){System.err.println("Adding "+x.size()+" to pair out.");}
378 //				ros.add(x, ln.id);
379 //			}
380 //
381 //			cris.returnList(ln);
382 //			ln=cris.nextList();
383 //			reads0=(ln!=null ? ln.list : null);
384 //		}
385 //		cris.returnList(ln);
386 //
387 //		readsOut+=singlesOut+pairsOut;
388 //		basesOut+=singleBasesOut+pairBasesOut;
389 //	}
390 
process3(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)391 	private void process3(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){
392 
393 		ListNum<Read> ln=cris.nextList();
394 		ArrayList<Read> reads=ln.list;
395 
396 		final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen()));
397 		final ArrayList<Read> singles=(rosb==null ? null : new ArrayList<Read>(Shared.bufferLen()));
398 
399 		while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
400 			for(int i=0; i<reads.size(); i++){
401 				Read r1=reads.get(i);
402 				Read r2=r1.mate;
403 				processPair(r1, r2, pairs, singles);
404 			}
405 
406 			cris.returnList(ln);
407 			ln=cris.nextList();
408 			reads=(ln!=null ? ln.list : null);
409 
410 			if(rosb!=null){
411 				if(verbose){System.err.println("Adding "+singles.size()+" to single out.");}
412 				rosb.add(new ArrayList<Read>(singles), ln.id);
413 				singles.clear();
414 			}
415 
416 			if(ros!=null){
417 				if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");}
418 				ros.add(new ArrayList<Read>(pairs), ln.id);
419 				pairs.clear();
420 			}
421 		}
422 		cris.returnList(ln);
423 
424 		readsOut+=singlesOut+pairsOut;
425 		basesOut+=singleBasesOut+pairBasesOut;
426 	}
427 
process3_fixInterleaving(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)428 	private void process3_fixInterleaving(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){
429 
430 		ListNum<Read> ln=cris.nextList();
431 		ArrayList<Read> reads=ln.list;
432 
433 		final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen()));
434 		final ArrayList<Read> singles=(rosb==null ? null : new ArrayList<Read>(Shared.bufferLen()));
435 
436 		Read current=null, prev=null;
437 
438 		while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
439 			for(int i=0; i<reads.size(); i++){
440 
441 				current=reads.get(i);
442 //				if(verbose){System.err.println("Fetched "+current);}
443 
444 				if(prev!=null){
445 					boolean b=FASTQ.testPairNames(prev, current, allowIdenticalPairNames);
446 					if(b){
447 						if(verbose){System.err.println("A");}
448 						processPair(prev, current, pairs, singles);
449 						prev=null;
450 						current=null;
451 					}else{
452 						if(verbose){System.err.println("B");}
453 						processPair(prev, null, null, singles);
454 						prev=null;
455 					}
456 				}
457 				prev=current;
458 				current=null;
459 			}
460 
461 //			if(verbose){System.err.println("X\n"+current+"\n"+prev+"\n");}
462 
463 			cris.returnList(ln);
464 			ln=cris.nextList();
465 			reads=(ln!=null ? ln.list : null);
466 
467 			if((ln==null || reads==null || reads.isEmpty()) && prev!=null){ //Process last read
468 				boolean b=FASTQ.testPairNames(prev, current, allowIdenticalPairNames);
469 				if(b){
470 					if(verbose){System.err.println("C");}
471 					processPair(prev, current, pairs, singles);
472 					prev=null;
473 					current=null;
474 				}else{
475 					if(verbose){System.err.println("D");}
476 					processPair(prev, null, null, singles);
477 					prev=null;
478 				}
479 			}
480 
481 			if(rosb!=null){
482 				if(verbose){System.err.println("Adding "+singles.size()+" to single out.");}
483 				rosb.add(new ArrayList<Read>(singles), ln.id);
484 				singles.clear();
485 			}
486 
487 			if(ros!=null){
488 				if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");}
489 				ros.add(new ArrayList<Read>(pairs), ln.id);
490 				pairs.clear();
491 			}
492 		}
493 		cris.returnList(ln);
494 
495 		readsOut+=singlesOut+pairsOut;
496 		basesOut+=singleBasesOut+pairBasesOut;
497 	}
498 
process3_repair(final DualCris cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)499 	private void process3_repair(final DualCris cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){
500 
501 		ListNum<Read> ln=cris.nextList();
502 		ArrayList<Read> reads=ln.list;
503 
504 		final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen()));
505 
506 		boolean foundR1=false, foundR2=false;
507 		while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
508 			for(Read r1 : reads){
509 				Read r2=r1.mate;
510 
511 				if(r1.pairnum()==0){foundR1=true;}
512 				else{foundR2=true;}
513 				if(r2!=null){
514 					if(r2.pairnum()==0){foundR1=true;}
515 					else{foundR2=true;}
516 				}
517 
518 				{
519 					Read pair=repair(r1);
520 					if(pair!=null && pairs!=null){pairs.add(pair);}
521 				}
522 				{
523 					Read pair=repair(r2);
524 					if(pair!=null && pairs!=null){pairs.add(pair);}
525 				}
526 			}
527 
528 //			if(verbose){System.err.println("X\n"+current+"\n"+prev+"\n");}
529 
530 			cris.returnList(ln.id, foundR1, foundR2);
531 			foundR1=foundR2=false;
532 			ln=cris.nextList();
533 			reads=(ln!=null ? ln.list : null);
534 
535 			if(ros!=null){
536 				if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");}
537 				ros.add(new ArrayList<Read>(pairs), ln.id);
538 				pairs.clear();
539 			}
540 		}
541 		cris.returnList(ln.id, foundR1, foundR2);
542 
543 		if(!pairMap.isEmpty()){
544 			final ArrayList<Read> singles=new ArrayList<Read>(pairMap.size());
545 			for(String key : pairMap.keySet()){
546 				Read r=pairMap.get(key);
547 				singles.add(r);
548 				singlesOut++;
549 				singleBasesOut+=r.length();
550 			}
551 			pairMap.clear();
552 			if(verbose){System.err.println("Adding "+singles.size()+" to single out.");}
553 			if(rosb!=null){rosb.add(singles, 0);}
554 		}
555 
556 		readsOut+=singlesOut+pairsOut;
557 		basesOut+=singleBasesOut+pairBasesOut;
558 	}
559 
process3_repair(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb)560 	private void process3_repair(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros, final ConcurrentReadOutputStream rosb){
561 
562 		ListNum<Read> ln=cris.nextList();
563 		ArrayList<Read> reads=ln.list;
564 
565 		final ArrayList<Read> pairs=(ros==null ? null : new ArrayList<Read>(Shared.bufferLen()));
566 
567 		while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
568 			for(Read r1 : reads){
569 				Read r2=r1.mate;
570 
571 				{
572 					Read pair=repair(r1);
573 					if(pair!=null && pairs!=null){pairs.add(pair);}
574 				}
575 				{
576 					Read pair=repair(r2);
577 					if(pair!=null && pairs!=null){pairs.add(pair);}
578 				}
579 			}
580 
581 //			if(verbose){System.err.println("X\n"+current+"\n"+prev+"\n");}
582 
583 			cris.returnList(ln);
584 			ln=cris.nextList();
585 			reads=(ln!=null ? ln.list : null);
586 
587 			if(ros!=null && pairs!=null){//pairs!=null is implied
588 				if(verbose){System.err.println("Adding "+pairs.size()+" to pair out.");}
589 				ros.add(new ArrayList<Read>(pairs), ln.id);
590 				pairs.clear();
591 			}
592 		}
593 		cris.returnList(ln);
594 
595 		if(!pairMap.isEmpty()){
596 			final ArrayList<Read> singles=new ArrayList<Read>(pairMap.size());
597 			for(String key : pairMap.keySet()){
598 				Read r=pairMap.get(key);
599 				singles.add(r);
600 				singlesOut++;
601 				singleBasesOut+=r.length();
602 			}
603 			pairMap.clear();
604 			if(verbose){System.err.println("Adding "+singles.size()+" to single out.");}
605 			if(rosb!=null){rosb.add(singles, 0);}
606 		}
607 
608 		readsOut+=singlesOut+pairsOut;
609 		basesOut+=singleBasesOut+pairBasesOut;
610 	}
611 
612 
processPair(Read r1, Read r2, ArrayList<Read> pairs, ArrayList<Read> singles)613 	private int processPair(Read r1, Read r2, ArrayList<Read> pairs, ArrayList<Read> singles){
614 		int removed=0;
615 		readsIn++;
616 		basesIn+=r1.length();
617 		if(r2!=null){
618 			readsIn++;
619 			basesIn+=r2.length();
620 		}
621 
622 		if(qtrimLeft || qtrimRight){
623 			if(r1!=null){
624 				int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1);
625 				basesTrimmed+=x;
626 				readsTrimmed+=(x>0 ? 1 : 0);
627 				if(addcolon){
628 					String colon=colon1;
629 					if(!r1.id.contains(colon)){r1.id+=colon;}
630 				}
631 			}
632 			if(r2!=null){
633 				int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1);
634 				basesTrimmed+=x;
635 				readsTrimmed+=(x>0 ? 1 : 0);
636 				if(addcolon){
637 					String colon=colon2;
638 					if(!r2.id.contains(colon)){r2.id+=colon;}
639 				}
640 			}
641 		}
642 		final int rlen1=(r1==null ? -1 : r1.length());
643 		final int rlen2=(r2==null ? -1 : r2.length());
644 		if(verbose){System.err.println("rlen="+rlen1+", rlen2="+rlen2);}
645 
646 		if(rlen1>=minReadLength && rlen2>=minReadLength){
647 			if(verbose){System.err.println("Sending to pair out:\t"+r1.id+"\t"+r2.id);}
648 			r1.mate=r2;
649 			r2.mate=r1;
650 			r1.setPairnum(0);
651 			r2.setPairnum(1);
652 			if(pairs!=null){pairs.add(r1);}
653 			pairsOut+=2;
654 			pairBasesOut+=(rlen1+rlen2);
655 		}else if(rlen1>=minReadLength){
656 			if(verbose){System.err.println("Sending r1 to single out:\t"+r1.id+"\t"+(r2==null ? "*" : r2.id));}
657 			r1.mate=null;
658 			r1.setPairnum(0);
659 			if(singles!=null){singles.add(r1);}
660 			singlesOut++;
661 			singleBasesOut+=rlen1;
662 			if(r2!=null){removed++;}
663 		}else if(rlen2>=minReadLength){
664 			if(verbose){System.err.println("Sending r2 to single out:\t"+(r1==null ? "*" : r1.id)+"\t"+r2.id);}
665 			r2.mate=null;
666 			r2.setPairnum(0);
667 			if(singles!=null){singles.add(r2);}
668 			singlesOut++;
669 			singleBasesOut+=rlen2;
670 			if(r1!=null){removed++;}
671 		}else{
672 			if(verbose){System.err.println("Removed both reads:\t"+(r1==null ? "*" : r1.id)+"\t"+(r2==null ? "*" : r2.id));}
673 			if(r1!=null){removed++;}
674 			if(r2!=null){removed++;}
675 		}
676 		return removed;
677 	}
678 
679 
repair(Read r)680 	private Read repair(Read r){
681 		if(r==null){return null;}
682 		r.mate=null;
683 
684 		readsIn++;
685 		basesIn+=r.length();
686 		final String id=r.id;
687 
688 		final SamLine sl=r.samline;
689 		if(sl!=null && (!sl.primary() || sl.supplementary())){return null;}
690 
691 		assert(id!=null) : "Read number "+r.numericID+" has no name and thus cannot be re-paired.  To ignore this, run with the -da flag.";
692 		if(id==null){return null;}
693 		final int slash=id.indexOf('/');
694 		String[] split=id.split("\\s+");
695 
696 		if(split.length==1 && slash>0){
697 			split=new String[] {id.substring(0, slash), id.substring(slash)};
698 		}
699 
700 		assert(split.length>0);
701 		String prefix=split[0];
702 		String suffix=(split.length==1 ? null : split[split.length-1]);
703 
704 		if(sl!=null){
705 			r.setPairnum(sl.pairnum());
706 		}else if(suffix!=null){
707 			if(suffix.startsWith("/1") || suffix.startsWith("1:")){
708 				r.setPairnum(0);
709 			}else if(suffix.startsWith("/2") || suffix.startsWith("2:")){
710 				r.setPairnum(1);
711 			}else if(id.contains("/1") || id.contains("/2")){
712 				split=id.split("/");
713 				prefix=split[0];
714 				suffix=(split.length==1 ? null : split[split.length-1]);
715 
716 				if(suffix!=null){
717 					if(suffix.startsWith("1")){
718 						r.setPairnum(0);
719 					}else if(suffix.startsWith("2")){
720 						r.setPairnum(1);
721 					}
722 				}else{
723 					//pairnum cannot be determined
724 				}
725 			}else{
726 				//pairnum cannot be determined
727 			}
728 		}else{
729 			//pairnum cannot be determined
730 		}
731 
732 		if(addcolon){
733 			String colon=(r.pairnum()==0 ? colon1 : colon2);
734 			if(!r.id.contains(colon)){r.id+=colon;}
735 		}
736 
737 		Read old=pairMap.remove(prefix);
738 
739 //		System.out.println("Processing:\n"+r+"\n"+old+"\n"+readsIn+", "+readsOut+", "+pairsOut);
740 
741 		if(old==null){
742 			pairMap.put(prefix, r);
743 			return null;
744 		}else{
745 			r.mate=old;
746 			old.mate=r;
747 
748 			int len=r.length()+old.length();
749 			pairsOut+=2;
750 			pairBasesOut+=len;
751 
752 			if(old.pairnum()==1){
753 				r.setPairnum(0);
754 				return r;
755 			}else{
756 				old.setPairnum(0);
757 				r.setPairnum(1);
758 				return old;
759 			}
760 		}
761 	}
762 
763 
764 	private String in1=null, in2=null;
765 	private String out1=null, out2=null;
766 	private String outsingle=null;
767 	private long maxReads=-1;
768 	public boolean errorState=false;
769 
770 	long readsIn=0;
771 	long basesIn=0;
772 	long readsOut=0;
773 	long basesOut=0;
774 	long pairsOut=0;
775 	long pairBasesOut=0;
776 	long singlesOut=0;
777 	long singleBasesOut=0;
778 	long readsTrimmed=0;
779 	long basesTrimmed=0;
780 
781 	private final LinkedHashMap<String, Read> pairMap;
782 
783 	private float trimq=6;
784 	/** Error rate for trimming (derived from trimq) */
785 	private final float trimE;
786 	private int minReadLength=20;
787 	private final boolean qtrimLeft, qtrimRight;
788 
789 	private final boolean fixInterleaving;
790 	private final boolean allowIdenticalPairNames;
791 	private final boolean repair;
792 
793 	private boolean addslash=false;
794 	private boolean addcolon=false;
795 
796 	private static PrintStream outstream=System.err;
797 	/** Permission to overwrite existing files */
798 	public static boolean overwrite=false;
799 	/** Permission to append to existing files */
800 	public static boolean append=false;
801 	public static boolean showSpeed=true;
802 	public static boolean verbose=false;
803 
804 	private static final String slash1=" /1";
805 	private static final String slash2=" /2";
806 	private static final String colon1=" 1:";
807 	private static final String colon2=" 2:";
808 
809 }
810