1 package tax;
2 
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.LinkedHashMap;
6 import java.util.Locale;
7 
8 import fileIO.ByteFile;
9 import fileIO.ByteStreamWriter;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import fileIO.TextStreamWriter;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import stream.FastaReadInputStream;
20 
21 /**
22  * Constructs a directory and file tree of sequences
23  * corresponding to a taxonomic tree.
24  *
25  * @author Brian Bushnell
26  * @date December 12, 2017
27  *
28  */
29 public class ExplodeTree {
30 
31 	/*--------------------------------------------------------------*/
32 	/*----------------        Initialization        ----------------*/
33 	/*--------------------------------------------------------------*/
34 
35 	/**
36 	 * Code entrance from the command line.
37 	 * @param args Command line arguments
38 	 */
main(String[] args)39 	public static void main(String[] args){
40 		Timer t=new Timer();
41 		ExplodeTree x=new ExplodeTree(args);
42 		x.process(t);
43 
44 		//Close the print stream if it was redirected
45 		Shared.closeStream(x.outstream);
46 	}
47 
48 	/**
49 	 * Constructor.
50 	 * @param args Command line arguments
51 	 */
ExplodeTree(String[] args)52 	public ExplodeTree(String[] args){
53 
54 		{//Preparse block for help, config files, and outstream
55 			PreParser pp=new PreParser(args, getClass(), false);
56 			args=pp.args;
57 			outstream=pp.outstream;
58 		}
59 
60 		//Set shared static variables
61 		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
62 		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
63 
64 		//Create a parser object
65 		Parser parser=new Parser();
66 
67 		//Parse each argument
68 		for(int i=0; i<args.length; i++){
69 			String arg=args[i];
70 
71 			//Break arguments into their constituent parts, in the form of "a=b"
72 			String[] split=arg.split("=");
73 			String a=split[0].toLowerCase();
74 			String b=split.length>1 ? split[1] : null;
75 
76 			if(a.equals("verbose")){
77 				verbose=Parse.parseBoolean(b);
78 			}else if(a.equals("out") || a.equals("path") || a.equals("outpath")){
79 				outPath=b;
80 			}else if(a.equals("prefix")){
81 				prefix=b;
82 			}else if(a.equals("results") || a.equals("result")){
83 				resultsFile=b;
84 			}else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){
85 				makeDirectories=Parse.parseBoolean(b);
86 			}else if(a.equals("tree") || a.equals("taxtree")){
87 				taxTreeFile=b;
88 			}else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
89 				//do nothing
90 			}else{
91 				outstream.println("Unknown parameter "+args[i]);
92 				assert(false) : "Unknown parameter "+args[i];
93 				//				throw new RuntimeException("Unknown parameter "+args[i]);
94 			}
95 		}
96 		if(prefix==null){prefix="";}
97 		if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
98 
99 		{//Process parser fields
100 			Parser.processQuality();
101 
102 			maxReads=parser.maxReads;
103 
104 			overwrite=parser.overwrite;
105 
106 			in1=parser.in1;
107 
108 			extin=parser.extin;
109 		}
110 
111 		if(outPath==null || outPath.trim().length()==0){outPath="";}
112 		else{
113 			outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/");
114 			if(!outPath.endsWith("/")){outPath=outPath+"/";}
115 		}
116 
117 		assert(FastaReadInputStream.settingsOK());
118 
119 		//Ensure there is an input file
120 		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
121 
122 		//Adjust the number of threads for input file reading
123 		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
124 			ByteFile.FORCE_MODE_BF2=true;
125 		}
126 
127 		//Ensure output files can be written
128 		if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){
129 			outstream.println(resultsFile);
130 			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n");
131 		}
132 
133 		//Ensure input files can be read
134 		if(!Tools.testInputFiles(false, true, in1)){
135 			throw new RuntimeException("\nCan't read some input files.\n");
136 		}
137 
138 		//Ensure that no file was specified multiple times
139 		if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){
140 			throw new RuntimeException("\nSome file names were specified multiple times.\n");
141 		}
142 
143 		//Create input FileFormat objects
144 		ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true);
145 
146 		tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false);
147 	}
148 
149 	/*--------------------------------------------------------------*/
150 	/*----------------         Outer Methods        ----------------*/
151 	/*--------------------------------------------------------------*/
152 
makeDirectoryTree(String root, boolean writeNames)153 	public void makeDirectoryTree(String root, boolean writeNames){
154 		for(TaxNode node : tree.nodes){
155 			if(node!=null){
156 				String dir=tree.toDir(node, root);
157 				File df=new File(dir);
158 				if(!df.exists()){df.mkdirs();}
159 				if(writeNames){
160 					try {
161 						String fname=node.simpleName()+".name";
162 						File nf=new File(fname);
163 						if(!nf.exists()){
164 							ReadWrite.writeString(node.name, dir+fname);
165 						}
166 					} catch (Exception e) {
167 						// TODO Auto-generated catch block
168 						e.printStackTrace();
169 					}
170 				}
171 			}
172 		}
173 	}
174 
175 	/** Create read streams and process all data */
process(Timer t)176 	public void process(Timer t){
177 
178 		Timer t2=new Timer();
179 		if(makeDirectories){
180 			makeDirectoryTree(outPath, true);
181 			t2.stop("Finished making directories. ");
182 			t2.start();
183 		}
184 		processInner();
185 		t2.stop();
186 		t2.stop("Finished writing data. ");
187 
188 		//Do anything necessary after processing
189 
190 		if(resultsFile!=null){
191 			TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false);
192 			tsw.start();
193 			for(TaxNode tn : nodes.keySet()){
194 				Long data=nodes.get(tn);
195 				if(data==null){data=0L;}
196 				tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name);
197 			}
198 			errorState|=tsw.poisonAndWait();
199 		}
200 
201 		//Report timing and results
202 		{
203 			t.stop();
204 
205 			//Calculate units per nanosecond
206 			double rpnano=readsProcessed/(double)(t.elapsed);
207 			double lpnano=linesProcessed/(double)(t.elapsed);
208 			double bpnano=basesProcessed/(double)(t.elapsed);
209 
210 			//Add "k" and "m" for large numbers
211 			String rpstring=Tools.padKM(readsProcessed, 8);
212 			String lpstring=Tools.padKM(linesProcessed, 8);
213 			String bpstring=Tools.padKM(basesProcessed, 8);
214 
215 			String li="Lines In:               \t"+linesProcessed+" lines";
216 			String lo="Lines Out:              \t"+linesOut+" lines";
217 			while(lo.length()<li.length()){lo=lo+" ";}
218 
219 			String ri="Reads In:               \t"+readsProcessed+" reads";
220 			String ro="Reads Out:              \t"+readsOut+" reads";
221 			while(ro.length()<ri.length()){ro=ro+" ";}
222 
223 			outstream.println(ri+"\t"+basesProcessed+" bases");
224 			outstream.println(ro+"\t"+basesOut+" bases");
225 			outstream.println(li);
226 			outstream.println(lo);
227 			outstream.println();
228 
229 			outstream.println("Time:                         \t"+t);
230 			outstream.println("Reads Processed:    "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000));
231 			outstream.println("Lines Processed:    "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000));
232 			outstream.println("Bases Processed:    "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000));
233 		}
234 
235 		//Throw an exception of there was an error in a thread
236 		if(errorState){
237 			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
238 		}
239 	}
240 
241 	/*--------------------------------------------------------------*/
242 	/*----------------         Inner Methods        ----------------*/
243 	/*--------------------------------------------------------------*/
244 
245 	/** Iterate through the reads */
processInner()246 	void processInner(){
247 		ByteFile bf=ByteFile.makeByteFile(ffin1);
248 		TaxNode currentNode=null;
249 		long currentSize=0;
250 		ByteStreamWriter bsw=null;
251 		for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){
252 			linesProcessed++;
253 			if(line.length>0){
254 				final boolean header=(line[0]=='>');
255 				if(header){
256 					if(maxReads>0 && readsProcessed>=maxReads){break;}
257 					readsProcessed++;
258 					if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
259 
260 					final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false);
261 
262 					if(tn==null || tn!=currentNode){
263 						if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;}
264 					}
265 					if(tn!=null && tn!=currentNode){
266 						String dir=tree.toDir(tn, outPath);
267 						final boolean found=nodes.containsKey(tn);
268 						if(!found){nodes.put(tn, 0L);}
269 						FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false);
270 						bsw=new ByteStreamWriter(ff);
271 						bsw.start();
272 					}
273 
274 					currentNode=tn;
275 					currentSize=0;
276 					if(bsw!=null){readsOut++;}
277 				}else{
278 					basesProcessed+=line.length;
279 					currentSize+=line.length;
280 				}
281 				if(bsw!=null){
282 					linesOut++;
283 					if(!header){basesOut+=line.length;}
284 					bsw.println(line);
285 				}
286 			}
287 		}
288 		if(bsw!=null){
289 			errorState=bsw.poisonAndWait()|errorState; bsw=null;
290 			if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
291 		}
292 		bf.close();
293 	}
294 
295 	/*--------------------------------------------------------------*/
296 	/*----------------            Fields            ----------------*/
297 	/*--------------------------------------------------------------*/
298 
299 	/** Primary input file path */
300 	private String in1=null;
301 
302 	/** Primary output file path */
303 	private String outPath=null;
304 
305 	private String prefix;
306 
307 	/** Override input file extension */
308 	private String extin=null;
309 
310 	/** For listing what is present in the output */
311 	public String resultsFile=null;
312 
313 	public String taxTreeFile=null;
314 
315 	public boolean makeDirectories=true;
316 
317 	public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>();
318 
319 	/*--------------------------------------------------------------*/
320 
321 	/** Number of reads processed */
322 	protected long readsProcessed=0;
323 	/** Number of lines processed */
324 	protected long linesProcessed=0;
325 	/** Number of bases processed */
326 	protected long basesProcessed=0;
327 
328 	/** Number of reads out */
329 	public long readsOut=0;
330 	/** Number of lines out */
331 	public long linesOut=0;
332 	/** Number of bases out */
333 	public long basesOut=0;
334 
335 	/** Quit after processing this many input reads; -1 means no limit */
336 	private long maxReads=-1;
337 
338 	/*--------------------------------------------------------------*/
339 	/*----------------         Final Fields         ----------------*/
340 	/*--------------------------------------------------------------*/
341 
342 	/** Primary input file */
343 	private final FileFormat ffin1;
344 
345 	private final TaxTree tree;
346 
347 	/*--------------------------------------------------------------*/
348 	/*----------------        Common Fields         ----------------*/
349 	/*--------------------------------------------------------------*/
350 
351 	/** Print status messages to this output stream */
352 	private PrintStream outstream=System.err;
353 	/** Print verbose messages */
354 	public static boolean verbose=false;
355 	/** True if an error was encountered */
356 	public boolean errorState=false;
357 	/** Overwrite existing output files */
358 	private boolean overwrite=true;
359 
360 }
361