1 package sketch;
2 
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 
7 import dna.AminoAcid;
8 import fileIO.ByteFile;
9 import fileIO.ByteFile1;
10 import fileIO.ByteFile2;
11 import fileIO.ByteStreamWriter;
12 import fileIO.FileFormat;
13 import fileIO.ReadWrite;
14 import shared.Parse;
15 import shared.Parser;
16 import shared.PreParser;
17 import shared.ReadStats;
18 import shared.Shared;
19 import shared.Timer;
20 import shared.Tools;
21 import stream.ConcurrentGenericReadInputStream;
22 import stream.ConcurrentReadInputStream;
23 import stream.FASTQ;
24 import stream.FastaReadInputStream;
25 import stream.Read;
26 import structures.ListNum;
27 import structures.LongHashSet;
28 
29 /**
30  * @author Brian Bushnell
31  * @date Oct 17, 2014
32  *
33  */
34 public class InvertKey extends SketchObject {
35 
main(String[] args)36 	public static void main(String[] args){
37 		Timer t=new Timer();
38 		InvertKey x=new InvertKey(args);
39 		x.process(t);
40 
41 		//Close the print stream if it was redirected
42 		Shared.closeStream(x.outstream);
43 	}
44 
InvertKey(String[] args)45 	public InvertKey(String[] args){
46 
47 		{//Preparse block for help, config files, and outstream
48 			PreParser pp=new PreParser(args, getClass(), false);
49 			args=pp.args;
50 			outstream=pp.outstream;
51 		}
52 
53 		Shared.capBuffers(4);
54 		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
55 		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
56 
57 		FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
58 		int k_=32, k2_=0;
59 
60 		Parser parser=new Parser();
61 		for(int i=0; i<args.length; i++){
62 			String arg=args[i];
63 			String[] split=arg.split("=");
64 			String a=split[0].toLowerCase();
65 			String b=split.length>1 ? split[1] : null;
66 
67 			if(a.equals("verbose")){
68 				verbose=Parse.parseBoolean(b);
69 				ByteFile1.verbose=verbose;
70 				ByteFile2.verbose=verbose;
71 				stream.FastaReadInputStream.verbose=verbose;
72 				ConcurrentGenericReadInputStream.verbose=verbose;
73 				stream.FastqReadInputStream.verbose=verbose;
74 				ReadWrite.verbose=verbose;
75 			}else if(a.equals("key")){
76 				keyString=b;
77 			}else if(a.equals("out")){
78 				out1=b;
79 			}else if(a.equalsIgnoreCase("k")){
80 				assert(b!=null) : "Bad parameter: "+arg;
81 				if(b.indexOf(',')>=0){
82 					String[] bsplit=b.split(",");
83 					assert(bsplit.length==2) : "Bad argument "+arg;
84 					int x=Integer.parseInt(bsplit[0]);
85 					int y=Integer.parseInt(bsplit[1]);
86 					k_=Tools.max(x, y);
87 					k2_=Tools.min(x, y);
88 					if(k_==k2_){k2_=0;}
89 				}else{
90 					k_=Integer.parseInt(b);
91 					k2_=0;
92 				}
93 			}else if(a.equalsIgnoreCase("printonce")){
94 				printOnce=Parse.parseBoolean(b);
95 			}else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
96 				parser.in1=arg;
97 			}else if(parser.out1==null && i==1 && !arg.contains("=")){
98 				out1=arg;
99 			}else if(parser.parse(arg, a, b)){
100 				//do nothing
101 			}else{
102 				outstream.println("Unknown parameter "+args[i]);
103 				assert(false) : "Unknown parameter "+args[i];
104 				//				throw new RuntimeException("Unknown parameter "+args[i]);
105 			}
106 		}
107 
108 		k=k_;
109 		k2=k2_;
110 		shift=2*k;
111 		shift2=shift-2;
112 		mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32
113 
114 		{//Process parser fields
115 			Parser.processQuality();
116 
117 			maxReads=parser.maxReads;
118 
119 			overwrite=ReadStats.overwrite=parser.overwrite;
120 			append=ReadStats.append=parser.append;
121 
122 			in1=parser.in1;
123 		}
124 
125 		assert(FastaReadInputStream.settingsOK());
126 
127 		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
128 		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
129 			ByteFile.FORCE_MODE_BF2=false;
130 			ByteFile.FORCE_MODE_BF1=true;
131 		}
132 
133 		if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
134 
135 		if(!Tools.testOutputFiles(overwrite, append, false, out1)){
136 			outstream.println((out1==null)+", "+out1);
137 			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
138 		}
139 
140 		ffout1=FileFormat.testOutput(out1, FileFormat.FASTA, null, true, overwrite, append, false);
141 
142 		ffin1=FileFormat.testInput(in1, FileFormat.FASTA, null, true, true);
143 
144 		SketchObject.postParse();
145 
146 		if(keyString.indexOf(',')>0){
147 			String[] split=keyString.split(",");
148 			set=new LongHashSet(split.length*2);
149 			for(String s : split){
150 				long x=Long.MAX_VALUE-Sketch.parseA48(s);
151 				set.add(x);
152 //				assert(set.contains(x)) : x+", "+set.size()+", "+set.toStringListView();
153 			}
154 			key0=-1;
155 //			System.err.println(set.toStringListView()+", "+set.size());
156 			assert(!set.isEmpty());
157 		}else if(keyString.endsWith(".sketch")){
158 			SketchTool tool=new SketchTool(10000, 0, false, false);
159 			Sketch sk=tool.loadSketchesFromFile(keyString, null, 0, 1000000, SketchObject.ONE_SKETCH, 1f, 0f, 0f, (byte)0, false).get(0);
160 			set=new LongHashSet(sk.length()*2);
161 			for(long x : sk.keys){set.add(Long.MAX_VALUE-x);}
162 			key0=-1;
163 //			System.err.println(set.toStringListView()+", "+set.size());
164 			assert(!set.isEmpty());
165 		}else{
166 			key0=Long.MAX_VALUE-Sketch.parseA48(keyString);
167 			set=null;
168 //			System.err.println(key0);
169 		}
170 	}
171 
process(Timer t)172 	void process(Timer t){
173 
174 		final ConcurrentReadInputStream cris;
175 		{
176 			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null, null, null);
177 			cris.start();
178 			if(verbose){outstream.println("Started cris");}
179 		}
180 		boolean paired=cris.paired();
181 //		if(verbose){
182 			if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));}
183 //		}
184 
185 		final ByteStreamWriter bsw;
186 		if(out1!=null){
187 			fasta=ffout1.fasta() && !out1.endsWith(".txt");
188 			bsw=new ByteStreamWriter(ffout1);
189 			bsw.start();
190 		}else{bsw=null;}
191 
192 		long readsProcessed=0;
193 		long basesProcessed=0;
194 		boolean finished=false;
195 
196 		{
197 
198 			ListNum<Read> ln=cris.nextList();
199 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
200 
201 //			outstream.println("Fetched "+reads);
202 
203 			if(reads!=null && !reads.isEmpty()){
204 				Read r=reads.get(0);
205 				assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired());
206 			}
207 
208 			while(reads!=null && reads.size()>0 && !finished){
209 
210 				for(int idx=0; idx<reads.size() && !finished; idx++){
211 					final Read r1=reads.get(idx);
212 
213 					finished=invert(key0, r1, bsw);
214 
215 					final int initialLength1=r1.length();
216 
217 					readsProcessed++;
218 					basesProcessed+=initialLength1;
219 				}
220 
221 				cris.returnList(ln);
222 				ln=cris.nextList();
223 				reads=(ln!=null ? ln.list : null);
224 			}
225 			if(ln!=null){
226 				cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
227 			}
228 		}
229 
230 		errorState|=(ReadWrite.closeStream(cris));
231 		if(bsw!=null){errorState|=bsw.poisonAndWait();}
232 
233 		t.stop();
234 		outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
235 
236 		if(errorState && !finished && maxReads<1){
237 			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
238 		}
239 	}
240 
invert(long key2, Read r, ByteStreamWriter bsw)241 	private boolean invert(long key2, Read r, ByteStreamWriter bsw) {
242 		final byte[] bases=r.bases;
243 
244 		long kmer=0;
245 		long rkmer=0;
246 		int len=0;
247 
248 
249 //		System.err.println("Looking for "+key+"\t"+Sketch.toA48(key)+"\t"+Sketch.toA48(Long.MAX_VALUE-key));
250 
251 		for(int i=0; i<bases.length; i++){
252 			byte b=bases[i];
253 			long x=AminoAcid.baseToNumber[b];
254 			long x2=AminoAcid.baseToComplementNumber[b];
255 			kmer=((kmer<<2)|x)&mask;
256 			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
257 			if(x<0){len=0; rkmer=0;}else{len++;}
258 			if(len>=k){
259 				kmersProcessed++;
260 				final long hashcode=hash(kmer, rkmer);
261 				boolean found=(key0>=0 ? hashcode==key0 : set.contains(hashcode));
262 				if(found){
263 					if(fasta){bsw.println(">"+Sketch.toA48(Long.MAX_VALUE-hashcode)+" "+(i-k+1)+" "+r.id);}
264 					bsw.println(AminoAcid.kmerToString(Tools.min(kmer, rkmer), k));
265 					if(printOnce){
266 						if(key0>=0){return true;}
267 						else{
268 							set.remove(hashcode);
269 							return set.isEmpty();
270 						}
271 					}
272 				}
273 			}
274 		}
275 		return false;
276 	}
277 
278 	/*--------------------------------------------------------------*/
279 
280 	final long key0;
281 	final LongHashSet set;
282 
283 	final int shift;
284 	final int shift2;
285 	final long mask;
286 
287 	boolean printOnce=true;
288 	long kmersProcessed=0;
289 
290 	private String in1=null;
291 	boolean fasta;
292 	boolean sketch;
293 	private String keyString=null;
294 
295 	private String out1="stdout.fa";
296 
297 	/*--------------------------------------------------------------*/
298 
299 	private long maxReads=-1;
300 
301 	/*--------------------------------------------------------------*/
302 
303 	private final FileFormat ffin1;
304 
305 	private final FileFormat ffout1;
306 
307 
308 	/*--------------------------------------------------------------*/
309 
310 	private PrintStream outstream=System.err;
311 	public static boolean verbose=false;
312 	public boolean errorState=false;
313 	private boolean overwrite=false;
314 	private boolean append=false;
315 
316 }
317