1 package align2;
2 
3 import java.io.IOException;
4 import java.io.OutputStream;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.zip.ZipOutputStream;
8 
9 import dna.Data;
10 import fileIO.ReadWrite;
11 import shared.Timer;
12 import stream.ConcurrentLegacyReadInputStream;
13 import stream.RTextInputStream;
14 import stream.Read;
15 import stream.SiteScore;
16 import structures.ListNum;
17 
18 public class SplitMappedReads {
19 
20 
main(String[] args)21 	public static void main(String[] args){
22 
23 		String reads1=args[0];
24 		String reads2=args[1].equalsIgnoreCase("null") ?  null : args[1];
25 		String outname=args[2].equalsIgnoreCase("null") ?  "" : args[2];
26 
27 		int minChrom=1;
28 		int maxChrom=25;
29 		if(args.length>3){
30 			minChrom=maxChrom=Byte.parseByte(args[3]);
31 			if(args.length>4){
32 				maxChrom=Byte.parseByte(args[4]);
33 			}
34 		}
35 		assert(minChrom<=maxChrom && minChrom>=0);
36 
37 		SplitMappedReads smr=new SplitMappedReads(reads1, reads2, outname, minChrom, maxChrom);
38 		smr.process();
39 
40 	}
41 
SplitMappedReads(String fname1, String fname2, String outname_, int minChrom, int maxChrom)42 	public SplitMappedReads(String fname1, String fname2, String outname_, int minChrom, int maxChrom){
43 		this(new RTextInputStream(fname1, fname2, -1), outname_, minChrom, maxChrom);
44 		assert(fname2==null || !fname1.equals(fname2)) : "Error - input files have same name.";
45 	}
46 
SplitMappedReads(RTextInputStream stream_, String outname_, int minChrom, int maxChrom)47 	public SplitMappedReads(RTextInputStream stream_, String outname_, int minChrom, int maxChrom){
48 		stream=stream_;
49 		outname=outname_;
50 		paired=stream.paired();
51 //		assert(outname.contains("#")) : "Output file name must contain the character '#' to be used for chromosome number.";
52 
53 		MIN_CHROM=minChrom;
54 		MAX_CHROM=maxChrom;
55 		assert(MIN_CHROM>=0);
56 		assert(MAX_CHROM>=MIN_CHROM);
57 
58 		outArraySingle1=new OutputStream[maxChrom+1];
59 		printArraySingle1=new PrintWriter[maxChrom+1];
60 		bufferArraySingle1=new ArrayList[maxChrom+1];
61 		for(int i=minChrom; i<outArraySingle1.length; i++){
62 			bufferArraySingle1[i]=new ArrayList<Read>(WRITE_BUFFER);
63 			outArraySingle1[i]=ReadWrite.getOutputStream(outname.replace("#", "single_1_chr"+i), false, true, false);
64 			printArraySingle1[i]=new PrintWriter(outArraySingle1[i]);
65 			printArraySingle1[i].println("#Chromosome "+i+" Read 1 Singletons");
66 			printArraySingle1[i].println("#"+Read.header());
67 		}
68 
69 		if(!paired){
70 			outArraySingle2=null;
71 			printArraySingle2=null;
72 			bufferArraySingle2=null;
73 			outArrayPaired1=null;
74 			printArrayPaired1=null;
75 			bufferArrayPaired1=null;
76 			outArrayPaired2=null;
77 			printArrayPaired2=null;
78 			bufferArrayPaired2=null;
79 		}else{
80 
81 			outArraySingle2=new OutputStream[maxChrom+1];
82 			printArraySingle2=new PrintWriter[maxChrom+1];
83 			bufferArraySingle2=new ArrayList[maxChrom+1];
84 			for(int i=minChrom; i<outArraySingle2.length; i++){
85 				bufferArraySingle2[i]=new ArrayList<Read>(WRITE_BUFFER);
86 				outArraySingle2[i]=ReadWrite.getOutputStream(outname.replace("#", "single_2_chr"+i), false, true, false);
87 				printArraySingle2[i]=new PrintWriter(outArraySingle2[i]);
88 				printArraySingle2[i].println("#Chromosome "+i+" Read 2 Singletons");
89 				printArraySingle2[i].println("#"+Read.header());
90 			}
91 
92 			outArrayPaired1=new OutputStream[maxChrom+1];
93 			printArrayPaired1=new PrintWriter[maxChrom+1];
94 			bufferArrayPaired1=new ArrayList[maxChrom+1];
95 			for(int i=minChrom; i<outArrayPaired1.length; i++){
96 				bufferArrayPaired1[i]=new ArrayList<Read>(WRITE_BUFFER);
97 				outArrayPaired1[i]=ReadWrite.getOutputStream(outname.replace("#", "paired_1_chr"+i), false, true, false);
98 				printArrayPaired1[i]=new PrintWriter(outArrayPaired1[i]);
99 				printArrayPaired1[i].println("#Chromosome "+i+" Read 1 Paired");
100 				printArrayPaired1[i].println("#"+Read.header());
101 			}
102 
103 			outArrayPaired2=new OutputStream[maxChrom+1];
104 			printArrayPaired2=new PrintWriter[maxChrom+1];
105 			bufferArrayPaired2=new ArrayList[maxChrom+1];
106 			for(int i=minChrom; i<outArrayPaired2.length; i++){
107 				bufferArrayPaired2[i]=new ArrayList<Read>(WRITE_BUFFER);
108 				outArrayPaired2[i]=ReadWrite.getOutputStream(outname.replace("#", "paired_2_chr"+i), false, true, false);
109 				printArrayPaired2[i]=new PrintWriter(outArrayPaired2[i]);
110 				printArrayPaired2[i].println("#Chromosome "+i+" Read 2 Paired");
111 				printArrayPaired2[i].println("#"+Read.header());
112 			}
113 
114 		}
115 
116 		cris=(USE_CRIS ? new ConcurrentLegacyReadInputStream(stream, -1) : null);
117 	}
118 
process()119 	public void process(){
120 
121 		Timer t=new Timer();
122 
123 		if(cris!=null){
124 			cris.start();
125 			ListNum<Read> ln=cris.nextList();
126 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
127 
128 			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
129 				processReads(reads);
130 				cris.returnList(ln);
131 				ln=cris.nextList();
132 				reads=(ln!=null ? ln.list : null);
133 			}
134 			cris.returnList(ln);
135 		}else{
136 			ArrayList<Read> reads=stream.nextList();
137 			while(reads!=null && reads.size()>0){
138 				processReads(reads);
139 				reads=stream.nextList();
140 			}
141 		}
142 
143 		synchronized(this){this.notifyAll();}
144 
145 		finish();
146 
147 		t.stop();
148 		Data.sysout.println("Time:\t"+t);
149 	}
150 
151 
152 
processReads(ArrayList<Read> reads)153 	private void processReads(ArrayList<Read> reads){
154 		for(Read r : reads){
155 			addRead(r, 1);
156 			if(r.mate!=null){
157 				addRead(r.mate, 2);
158 			}
159 		}
160 	}
161 
162 
addRead(Read r, int side)163 	private void addRead(Read r, int side){
164 
165 		if(r.chrom<1 && r.numSites()>0){
166 			SiteScore ss=r.topSite(); //Should not be necessary
167 			r.start=ss.start;
168 			r.stop=ss.stop;
169 			r.chrom=ss.chrom;
170 			r.setStrand(ss.strand);
171 		}
172 
173 		//Ensure no superfluous data is written
174 		r.sites=null;
175 		r.originalSite=null;
176 		r.samline=null;
177 
178 //		System.err.println("Adding to chrom "+r.chrom+", side "+side+", paired="+r.paired+", "+(r.list==null ? "null" : r.list.size()));
179 		if(r.chrom<MIN_CHROM || r.chrom>MAX_CHROM){return;}
180 
181 		final PrintWriter writer;
182 		final ArrayList<Read> list;
183 
184 		if(side==1){
185 			if(r.paired()){
186 				writer=printArrayPaired1[r.chrom];
187 				list=bufferArrayPaired1[r.chrom];
188 			}else{
189 				writer=printArraySingle1[r.chrom];
190 				list=bufferArraySingle1[r.chrom];
191 			}
192 		}else{
193 			assert(side==2);
194 			if(r.paired()){
195 				writer=printArrayPaired2[r.chrom];
196 				list=bufferArrayPaired2[r.chrom];
197 			}else{
198 				writer=printArraySingle2[r.chrom];
199 				list=bufferArraySingle2[r.chrom];
200 			}
201 		}
202 
203 		assert(list.size()<WRITE_BUFFER);
204 		list.add(r);
205 
206 		if(list.size()>=WRITE_BUFFER){
207 			writeList((ArrayList<Read>)list.clone(), writer);
208 			list.clear();
209 		}
210 	}
211 
212 
213 	private static void writeList(ArrayList<Read> list, PrintWriter writer){
214 		synchronized(writer){
215 			for(Read r : list){
216 				writer.println(r.toText(true));
217 			}
218 		}
219 	}
220 
221 
222 	public void finish(){
223 
224 		final PrintWriter[][] writers=new PrintWriter[][] {printArraySingle1, printArraySingle2, printArrayPaired1, printArrayPaired2};
225 		final OutputStream[][] streams=new OutputStream[][] {outArraySingle1, outArraySingle2, outArrayPaired1, outArrayPaired2};
226 		final ArrayList<Read>[][] buffers=new ArrayList[][] {bufferArraySingle1, bufferArraySingle2, bufferArrayPaired1, bufferArrayPaired2};
227 
228 
229 		for(int x=0; x<buffers.length; x++){
230 
231 
232 			PrintWriter[] printArray=writers[x];
233 			ArrayList<Read>[] bufferArray=buffers[x];
234 
235 			for(int i=0; printArray!=null && i<printArray.length; i++){
236 				PrintWriter writer=printArray[i];
237 				ArrayList<Read> list=bufferArray[i];
238 
239 				if(list!=null && !list.isEmpty()){
240 					writeList(list, writer);
241 					list=null;
242 				}
243 			}
244 		}
245 
246 		//TODO: Wait for writing to finish, if it is done in threads.
247 
248 
249 		for(int x=0; x<writers.length; x++){
250 
251 
252 			PrintWriter[] printArray=writers[x];
253 			OutputStream[] outArray=streams[x];
254 
255 			for(int i=0; printArray!=null && i<printArray.length; i++){
256 				if(printArray[i]!=null){
257 					synchronized(printArray[i]){
258 						printArray[i].flush();
259 						if(outArray[i].getClass()==ZipOutputStream.class){
260 							ZipOutputStream zos=(ZipOutputStream)outArray[i];
261 							try {
262 								zos.closeEntry();
263 								zos.finish();
264 							} catch (IOException e) {
265 								// TODO Auto-generated catch block
266 								e.printStackTrace();
267 							}
268 						}
269 						printArray[i].close();
270 						try {
271 							outArray[i].close();
272 						} catch (IOException e) {
273 							// TODO Auto-generated catch block
274 							e.printStackTrace();
275 						}
276 					}
277 				}
278 			}
279 		}
280 
281 //		if(cris!=null){cris.shutdown();}
282 //		stream.shutdown();
283 
284 		if(cris!=null){ReadWrite.closeStream(cris);}
285 		else{stream.close();}
286 	}
287 
288 
289 	public final String outname;
290 	private final RTextInputStream stream;
291 	private final ConcurrentLegacyReadInputStream cris;
292 
293 	private final OutputStream[] outArraySingle1;
294 	private final PrintWriter[] printArraySingle1;
295 	private final ArrayList<Read>[] bufferArraySingle1;
296 
297 	private final OutputStream[] outArraySingle2;
298 	private final PrintWriter[] printArraySingle2;
299 	private final ArrayList<Read>[] bufferArraySingle2;
300 
301 	private final OutputStream[] outArrayPaired1;
302 	private final PrintWriter[] printArrayPaired1;
303 	private final ArrayList<Read>[] bufferArrayPaired1;
304 
305 	private final OutputStream[] outArrayPaired2;
306 	private final PrintWriter[] printArrayPaired2;
307 	private final ArrayList<Read>[] bufferArrayPaired2;
308 
309 	private final int MIN_CHROM;
310 	private final int MAX_CHROM;
311 
312 	public final boolean paired;
313 
314 	public static boolean USE_CRIS=true; //Similar speed either way.  "true" may be better with many threads.
315 
316 	public static final int WRITE_BUFFER=400; //Bigger number uses more memory, for less frequent writes.
317 
318 
319 }
320