1 package align2; 2 3 import java.io.IOException; 4 import java.io.OutputStream; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.zip.ZipOutputStream; 8 9 import dna.Data; 10 import fileIO.ReadWrite; 11 import shared.Timer; 12 import stream.ConcurrentLegacyReadInputStream; 13 import stream.RTextInputStream; 14 import stream.Read; 15 import stream.SiteScore; 16 import structures.ListNum; 17 18 public class SplitMappedReads { 19 20 main(String[] args)21 public static void main(String[] args){ 22 23 String reads1=args[0]; 24 String reads2=args[1].equalsIgnoreCase("null") ? null : args[1]; 25 String outname=args[2].equalsIgnoreCase("null") ? "" : args[2]; 26 27 int minChrom=1; 28 int maxChrom=25; 29 if(args.length>3){ 30 minChrom=maxChrom=Byte.parseByte(args[3]); 31 if(args.length>4){ 32 maxChrom=Byte.parseByte(args[4]); 33 } 34 } 35 assert(minChrom<=maxChrom && minChrom>=0); 36 37 SplitMappedReads smr=new SplitMappedReads(reads1, reads2, outname, minChrom, maxChrom); 38 smr.process(); 39 40 } 41 SplitMappedReads(String fname1, String fname2, String outname_, int minChrom, int maxChrom)42 public SplitMappedReads(String fname1, String fname2, String outname_, int minChrom, int maxChrom){ 43 this(new RTextInputStream(fname1, fname2, -1), outname_, minChrom, maxChrom); 44 assert(fname2==null || !fname1.equals(fname2)) : "Error - input files have same name."; 45 } 46 SplitMappedReads(RTextInputStream stream_, String outname_, int minChrom, int maxChrom)47 public SplitMappedReads(RTextInputStream stream_, String outname_, int minChrom, int maxChrom){ 48 stream=stream_; 49 outname=outname_; 50 paired=stream.paired(); 51 // assert(outname.contains("#")) : "Output file name must contain the character '#' to be used for chromosome number."; 52 53 MIN_CHROM=minChrom; 54 MAX_CHROM=maxChrom; 55 assert(MIN_CHROM>=0); 56 assert(MAX_CHROM>=MIN_CHROM); 57 58 outArraySingle1=new OutputStream[maxChrom+1]; 59 printArraySingle1=new PrintWriter[maxChrom+1]; 60 bufferArraySingle1=new ArrayList[maxChrom+1]; 61 for(int i=minChrom; i<outArraySingle1.length; i++){ 62 bufferArraySingle1[i]=new ArrayList<Read>(WRITE_BUFFER); 63 outArraySingle1[i]=ReadWrite.getOutputStream(outname.replace("#", "single_1_chr"+i), false, true, false); 64 printArraySingle1[i]=new PrintWriter(outArraySingle1[i]); 65 printArraySingle1[i].println("#Chromosome "+i+" Read 1 Singletons"); 66 printArraySingle1[i].println("#"+Read.header()); 67 } 68 69 if(!paired){ 70 outArraySingle2=null; 71 printArraySingle2=null; 72 bufferArraySingle2=null; 73 outArrayPaired1=null; 74 printArrayPaired1=null; 75 bufferArrayPaired1=null; 76 outArrayPaired2=null; 77 printArrayPaired2=null; 78 bufferArrayPaired2=null; 79 }else{ 80 81 outArraySingle2=new OutputStream[maxChrom+1]; 82 printArraySingle2=new PrintWriter[maxChrom+1]; 83 bufferArraySingle2=new ArrayList[maxChrom+1]; 84 for(int i=minChrom; i<outArraySingle2.length; i++){ 85 bufferArraySingle2[i]=new ArrayList<Read>(WRITE_BUFFER); 86 outArraySingle2[i]=ReadWrite.getOutputStream(outname.replace("#", "single_2_chr"+i), false, true, false); 87 printArraySingle2[i]=new PrintWriter(outArraySingle2[i]); 88 printArraySingle2[i].println("#Chromosome "+i+" Read 2 Singletons"); 89 printArraySingle2[i].println("#"+Read.header()); 90 } 91 92 outArrayPaired1=new OutputStream[maxChrom+1]; 93 printArrayPaired1=new PrintWriter[maxChrom+1]; 94 bufferArrayPaired1=new ArrayList[maxChrom+1]; 95 for(int i=minChrom; i<outArrayPaired1.length; i++){ 96 bufferArrayPaired1[i]=new ArrayList<Read>(WRITE_BUFFER); 97 outArrayPaired1[i]=ReadWrite.getOutputStream(outname.replace("#", "paired_1_chr"+i), false, true, false); 98 printArrayPaired1[i]=new PrintWriter(outArrayPaired1[i]); 99 printArrayPaired1[i].println("#Chromosome "+i+" Read 1 Paired"); 100 printArrayPaired1[i].println("#"+Read.header()); 101 } 102 103 outArrayPaired2=new OutputStream[maxChrom+1]; 104 printArrayPaired2=new PrintWriter[maxChrom+1]; 105 bufferArrayPaired2=new ArrayList[maxChrom+1]; 106 for(int i=minChrom; i<outArrayPaired2.length; i++){ 107 bufferArrayPaired2[i]=new ArrayList<Read>(WRITE_BUFFER); 108 outArrayPaired2[i]=ReadWrite.getOutputStream(outname.replace("#", "paired_2_chr"+i), false, true, false); 109 printArrayPaired2[i]=new PrintWriter(outArrayPaired2[i]); 110 printArrayPaired2[i].println("#Chromosome "+i+" Read 2 Paired"); 111 printArrayPaired2[i].println("#"+Read.header()); 112 } 113 114 } 115 116 cris=(USE_CRIS ? new ConcurrentLegacyReadInputStream(stream, -1) : null); 117 } 118 process()119 public void process(){ 120 121 Timer t=new Timer(); 122 123 if(cris!=null){ 124 cris.start(); 125 ListNum<Read> ln=cris.nextList(); 126 ArrayList<Read> reads=(ln!=null ? ln.list : null); 127 128 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 129 processReads(reads); 130 cris.returnList(ln); 131 ln=cris.nextList(); 132 reads=(ln!=null ? ln.list : null); 133 } 134 cris.returnList(ln); 135 }else{ 136 ArrayList<Read> reads=stream.nextList(); 137 while(reads!=null && reads.size()>0){ 138 processReads(reads); 139 reads=stream.nextList(); 140 } 141 } 142 143 synchronized(this){this.notifyAll();} 144 145 finish(); 146 147 t.stop(); 148 Data.sysout.println("Time:\t"+t); 149 } 150 151 152 processReads(ArrayList<Read> reads)153 private void processReads(ArrayList<Read> reads){ 154 for(Read r : reads){ 155 addRead(r, 1); 156 if(r.mate!=null){ 157 addRead(r.mate, 2); 158 } 159 } 160 } 161 162 addRead(Read r, int side)163 private void addRead(Read r, int side){ 164 165 if(r.chrom<1 && r.numSites()>0){ 166 SiteScore ss=r.topSite(); //Should not be necessary 167 r.start=ss.start; 168 r.stop=ss.stop; 169 r.chrom=ss.chrom; 170 r.setStrand(ss.strand); 171 } 172 173 //Ensure no superfluous data is written 174 r.sites=null; 175 r.originalSite=null; 176 r.samline=null; 177 178 // System.err.println("Adding to chrom "+r.chrom+", side "+side+", paired="+r.paired+", "+(r.list==null ? "null" : r.list.size())); 179 if(r.chrom<MIN_CHROM || r.chrom>MAX_CHROM){return;} 180 181 final PrintWriter writer; 182 final ArrayList<Read> list; 183 184 if(side==1){ 185 if(r.paired()){ 186 writer=printArrayPaired1[r.chrom]; 187 list=bufferArrayPaired1[r.chrom]; 188 }else{ 189 writer=printArraySingle1[r.chrom]; 190 list=bufferArraySingle1[r.chrom]; 191 } 192 }else{ 193 assert(side==2); 194 if(r.paired()){ 195 writer=printArrayPaired2[r.chrom]; 196 list=bufferArrayPaired2[r.chrom]; 197 }else{ 198 writer=printArraySingle2[r.chrom]; 199 list=bufferArraySingle2[r.chrom]; 200 } 201 } 202 203 assert(list.size()<WRITE_BUFFER); 204 list.add(r); 205 206 if(list.size()>=WRITE_BUFFER){ 207 writeList((ArrayList<Read>)list.clone(), writer); 208 list.clear(); 209 } 210 } 211 212 213 private static void writeList(ArrayList<Read> list, PrintWriter writer){ 214 synchronized(writer){ 215 for(Read r : list){ 216 writer.println(r.toText(true)); 217 } 218 } 219 } 220 221 222 public void finish(){ 223 224 final PrintWriter[][] writers=new PrintWriter[][] {printArraySingle1, printArraySingle2, printArrayPaired1, printArrayPaired2}; 225 final OutputStream[][] streams=new OutputStream[][] {outArraySingle1, outArraySingle2, outArrayPaired1, outArrayPaired2}; 226 final ArrayList<Read>[][] buffers=new ArrayList[][] {bufferArraySingle1, bufferArraySingle2, bufferArrayPaired1, bufferArrayPaired2}; 227 228 229 for(int x=0; x<buffers.length; x++){ 230 231 232 PrintWriter[] printArray=writers[x]; 233 ArrayList<Read>[] bufferArray=buffers[x]; 234 235 for(int i=0; printArray!=null && i<printArray.length; i++){ 236 PrintWriter writer=printArray[i]; 237 ArrayList<Read> list=bufferArray[i]; 238 239 if(list!=null && !list.isEmpty()){ 240 writeList(list, writer); 241 list=null; 242 } 243 } 244 } 245 246 //TODO: Wait for writing to finish, if it is done in threads. 247 248 249 for(int x=0; x<writers.length; x++){ 250 251 252 PrintWriter[] printArray=writers[x]; 253 OutputStream[] outArray=streams[x]; 254 255 for(int i=0; printArray!=null && i<printArray.length; i++){ 256 if(printArray[i]!=null){ 257 synchronized(printArray[i]){ 258 printArray[i].flush(); 259 if(outArray[i].getClass()==ZipOutputStream.class){ 260 ZipOutputStream zos=(ZipOutputStream)outArray[i]; 261 try { 262 zos.closeEntry(); 263 zos.finish(); 264 } catch (IOException e) { 265 // TODO Auto-generated catch block 266 e.printStackTrace(); 267 } 268 } 269 printArray[i].close(); 270 try { 271 outArray[i].close(); 272 } catch (IOException e) { 273 // TODO Auto-generated catch block 274 e.printStackTrace(); 275 } 276 } 277 } 278 } 279 } 280 281 // if(cris!=null){cris.shutdown();} 282 // stream.shutdown(); 283 284 if(cris!=null){ReadWrite.closeStream(cris);} 285 else{stream.close();} 286 } 287 288 289 public final String outname; 290 private final RTextInputStream stream; 291 private final ConcurrentLegacyReadInputStream cris; 292 293 private final OutputStream[] outArraySingle1; 294 private final PrintWriter[] printArraySingle1; 295 private final ArrayList<Read>[] bufferArraySingle1; 296 297 private final OutputStream[] outArraySingle2; 298 private final PrintWriter[] printArraySingle2; 299 private final ArrayList<Read>[] bufferArraySingle2; 300 301 private final OutputStream[] outArrayPaired1; 302 private final PrintWriter[] printArrayPaired1; 303 private final ArrayList<Read>[] bufferArrayPaired1; 304 305 private final OutputStream[] outArrayPaired2; 306 private final PrintWriter[] printArrayPaired2; 307 private final ArrayList<Read>[] bufferArrayPaired2; 308 309 private final int MIN_CHROM; 310 private final int MAX_CHROM; 311 312 public final boolean paired; 313 314 public static boolean USE_CRIS=true; //Similar speed either way. "true" may be better with many threads. 315 316 public static final int WRITE_BUFFER=400; //Bigger number uses more memory, for less frequent writes. 317 318 319 } 320