1 package sketch;
2 
3 import java.io.PrintStream;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collections;
7 import java.util.Random;
8 
9 import aligner.SingleStateAlignerFlat2;
10 import aligner.SingleStateAlignerFlatFloat;
11 import dna.AminoAcid;
12 import dna.Data;
13 import prok.GeneCaller;
14 import prok.GeneModel;
15 import prok.GeneModelParser;
16 import prok.ProkObject;
17 import shared.Parse;
18 import shared.Shared;
19 import shared.Timer;
20 import shared.Tools;
21 import stream.Read;
22 import structures.EntropyTracker;
23 import structures.IntList3;
24 import tax.TaxTree;
25 
26 public class SketchObject {
27 
28 	/*--------------------------------------------------------------*/
29 	/*----------------           Parsing            ----------------*/
30 	/*--------------------------------------------------------------*/
31 
main(String[] args)32 	public static void main(String[] args){
33 		Timer t=new Timer();
34 		for(int i=0; i<1000000; i++){
35 			wkidToAniExact(0.5, 32, 23, 0.000005);
36 		}
37 		t.stop();
38 		System.out.println(t);
39 	}
40 
parseSketchFlags(String arg, String a, String b)41 	public static boolean parseSketchFlags(String arg, String a, String b){
42 
43 		if(parseCoding(arg, a, b)){
44 			//
45 		}
46 
47 		else if(a.equalsIgnoreCase("k")){
48 			if(b.indexOf(',')>=0){
49 				String[] split=b.split(",");
50 				assert(split.length==2) : "Bad argument "+arg;
51 				int x=Integer.parseInt(split[0]);
52 				int y=Integer.parseInt(split[1]);
53 				k=Tools.max(x, y);
54 				k2=Tools.min(x, y);
55 				if(k==k2){k2=0;}
56 			}else{
57 				k=Integer.parseInt(b);
58 				k2=0;
59 			}
60 			setK=true;
61 		}else if(a.equalsIgnoreCase("rcomp")){
62 			rcomp=Parse.parseBoolean(b);
63 		}else if(a.equalsIgnoreCase("minfakeid")){
64 			minFakeID=Integer.parseInt(b);
65 		}else if(a.equalsIgnoreCase("hashNames")){
66 			hashNames=Parse.parseBoolean(b);
67 		}else if(a.equalsIgnoreCase("preferSSUMap")){
68 			preferSSUMap=Parse.parseBoolean(b);
69 		}else if(a.equalsIgnoreCase("preferSSUMapForEuks") || a.equalsIgnoreCase("preferSSUMapEuks")){
70 			preferSSUMapForEuks=Parse.parseBoolean(b);
71 		}else if(a.equalsIgnoreCase("useSSUMapOnly")){
72 			useSSUMapOnly=Parse.parseBoolean(b);
73 		}else if(a.equalsIgnoreCase("useSSUMapOnlyForEuks") || a.equalsIgnoreCase("SSUMapOnlyEuks")){
74 			useSSUMapOnlyForEuks=Parse.parseBoolean(b);
75 		}else if(a.equalsIgnoreCase("skipNonCanonical")){
76 			skipNonCanonical=Parse.parseBoolean(b);
77 		}else if(a.equalsIgnoreCase("useSizeEstimateInScore") || a.equalsIgnoreCase("useSizeEstimate")){
78 			useSizeEstimate=Parse.parseBoolean(b);
79 		}else if(a.equalsIgnoreCase("blacklist") || a.equalsIgnoreCase("bl")){
80 			blacklist=b;
81 		}else if(a.equalsIgnoreCase("whitelist") || a.equalsIgnoreCase("usewhitelist")){
82 			useWhitelist=Parse.parseBoolean(b);
83 		}
84 
85 		else if(a.equalsIgnoreCase("sampleseed")){
86 			sampleseed=Long.parseLong(b);
87 		}
88 
89 		else if(a.equalsIgnoreCase("size") || a.equalsIgnoreCase("sketchsize")){
90 			if("auto".equalsIgnoreCase(b)){
91 				AUTOSIZE=true;
92 				AUTOSIZE_LINEAR=false;
93 				SET_AUTOSIZE=true;
94 			}else if("linear".equalsIgnoreCase(b)){
95 				AUTOSIZE=false;
96 				AUTOSIZE_LINEAR=true;
97 				SET_AUTOSIZE=true;
98 			}else{
99 				AUTOSIZE=false;
100 				AUTOSIZE_LINEAR=false;
101 				targetSketchSize=Parse.parseIntKMG(b);
102 				SET_TARGET_SIZE=true;
103 			}
104 		}else if(a.equalsIgnoreCase("maxfraction") || a.equalsIgnoreCase("maxgenomefraction") || a.equalsIgnoreCase("mgf")){
105 			maxGenomeFraction=Float.parseFloat(b);
106 		}else if(a.equalsIgnoreCase("smallsketchsize")){
107 			smallSketchSize=Integer.parseInt(b);
108 		}else if(a.equalsIgnoreCase("minSketchSize")){
109 			minSketchSize=Tools.max(1, Integer.parseInt(b));
110 		}else if(a.equalsIgnoreCase("autosize")){
111 			AUTOSIZE=Parse.parseBoolean(b);
112 			if(AUTOSIZE){AUTOSIZE_LINEAR=false;}
113 			SET_AUTOSIZE=true;
114 		}else if(a.equalsIgnoreCase("autosizefactor") || a.equalsIgnoreCase("autosizefraction") || a.equalsIgnoreCase("autosizemult") || a.equalsIgnoreCase("sizemult")){
115 			AUTOSIZE_FACTOR=Float.parseFloat(b);
116 			SET_AUTOSIZE_FACTOR=true;
117 			AUTOSIZE_LINEAR=false;
118 			SET_AUTOSIZE=true;
119 		}else if(a.equalsIgnoreCase("linear") || a.equalsIgnoreCase("lineardensity") || a.equalsIgnoreCase("density")){
120 			if(Tools.isNumeric(b)){
121 				AUTOSIZE_LINEAR_DENSITY=Double.parseDouble(b);
122 				AUTOSIZE_LINEAR=true;
123 				AUTOSIZE=false;
124 				SET_AUTOSIZE=true;
125 			}else{
126 				AUTOSIZE_LINEAR=Parse.parseBoolean(b);
127 				if(AUTOSIZE_LINEAR){AUTOSIZE=false;}
128 				SET_AUTOSIZE=true;
129 			}
130 		}else if(a.equalsIgnoreCase("maxGenomeFractionSmall")){
131 			maxGenomeFractionSmall=Float.parseFloat(b);
132 		}else if(a.equalsIgnoreCase("keyFraction")){
133 			double d=Double.parseDouble(b);
134 			setKeyFraction(d);
135 		}else if(a.equalsIgnoreCase("prealloc")){
136 			if(b==null || Character.isLetter(b.charAt(0))){
137 				if(Parse.parseBoolean(b)){
138 					prealloc=1.0f;
139 				}else{
140 					prealloc=0;
141 				}
142 			}else{
143 				prealloc=Float.parseFloat(b);
144 			}
145 		}
146 
147 		else if(a.equalsIgnoreCase("intmap")){
148 			SketchIndex.useIntMap=Parse.parseBoolean(b);
149 		}else if(a.equalsIgnoreCase("intmapsize")){
150 			SketchIndex.intMapSize=Parse.parseIntKMG(b);
151 		}else if(a.equalsIgnoreCase("bitsetbits")){
152 			assert(false) : "bitsetbits should be 2.";
153 //			bitSetBits=Integer.parseInt(b);
154 		}
155 
156 //		else if(a.equalsIgnoreCase("minkmercount") || a.equalsIgnoreCase("minkeycount")){
157 //			minKeyOccuranceCount=Integer.parseInt(b);
158 //		}
159 		else if(a.equalsIgnoreCase("sketchHeapFactor")){
160 			sketchHeapFactor=Tools.max(1.0, Double.parseDouble(b));
161 		}
162 
163 		else if(a.equalsIgnoreCase("killok")){
164 			KILL_OK=Parse.parseBoolean(b);
165 		}else if(a.equalsIgnoreCase("ssa") || a.equalsIgnoreCase("usessa")){
166 			useSSA=Parse.parseBoolean(b);
167 		}else if(a.equalsIgnoreCase("ssa3") || a.equalsIgnoreCase("usessa3")){
168 			useSSA3=Parse.parseBoolean(b);
169 		}else if(a.equalsIgnoreCase("exactani")){
170 			EXACT_ANI=Parse.parseBoolean(b);
171 		}else if(a.equalsIgnoreCase("translate") || a.equalsIgnoreCase("callgenes")){
172 			translate=Parse.parseBoolean(b);
173 			defaultParams.translate=translate;
174 		}else if(a.equalsIgnoreCase("sixframes") || a.equalsIgnoreCase("6frames")){
175 			sixframes=Parse.parseBoolean(b);
176 			defaultParams.sixframes=sixframes;
177 			if(sixframes){
178 				translate=defaultParams.translate=true;
179 			}
180 		}else if(a.equalsIgnoreCase("processSSU") || a.equalsIgnoreCase("process16S") || a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("SSU")){
181 			processSSU=Parse.parseBoolean(b);
182 		}else if(a.equalsIgnoreCase("hashSeed")){
183 			hashSeed=Long.parseLong(b);
184 			//remakeCodes(hashSeed); //Happens in postParse
185 		}
186 
187 		else if(a.equalsIgnoreCase("forceDisableMultithreadedFastq")){
188 			forceDisableMultithreadedFastq=Parse.parseBoolean(b);
189 		}
190 
191 		else if(a.equalsIgnoreCase("verifyentropy")){
192 			EntropyTracker.verify=Parse.parseBoolean(b);
193 		}else if(a.equalsIgnoreCase("entropyK")){
194 			entropyK=Integer.parseInt(b);
195 		}else if(a.equalsIgnoreCase("fastentropy") || a.equalsIgnoreCase("fentropy")){
196 			if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.FAST;}
197 		}else if(a.equalsIgnoreCase("mediumentropy") || a.equalsIgnoreCase("mentropy")){
198 			if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.MEDIUM;}
199 		}else if(a.equalsIgnoreCase("slowentropy") || a.equalsIgnoreCase("sentropy")){
200 			if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.SLOW;}
201 		}else if(a.equalsIgnoreCase("superslowentropy") || a.equalsIgnoreCase("ssentropy")){
202 			if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.SUPERSLOW;}
203 		}else if(a.equalsIgnoreCase("verbose2")){
204 			verbose2=Parse.parseBoolean(b);
205 		}else if(a.equalsIgnoreCase("loadSketchesFromSketchFile2")){
206 			LOADER2=Parse.parseBoolean(b);
207 		}
208 
209 		else if(a.equalsIgnoreCase("useToValue2") || a.equalsIgnoreCase("ToValue2")){
210 			useToValue2=Parse.parseBoolean(b);
211 		}else if(a.equals("ssu") || a.equals("ssufile")){
212 			assert(false) : "ssufile is deprecated; please specify 16Sfile and/or 18Sfile independently";
213 		}else if(a.equalsIgnoreCase("16Sfile")/* || a.equalsIgnoreCase("ssufile") || a.equalsIgnoreCase("silvafile")*/){
214 			SSUMap.r16SFile=b;
215 			if("auto".equalsIgnoreCase(SSUMap.r16SFile)){SSUMap.r16SFile=TaxTree.default16SFile();}
216 			assert(SSUMap.r16SMap==null);
217 		}else if(a.equalsIgnoreCase("18Sfile")){
218 			SSUMap.r18SFile=b;
219 			if("auto".equalsIgnoreCase(SSUMap.r18SFile)){SSUMap.r18SFile=TaxTree.default18SFile();}
220 			assert(SSUMap.r18SMap==null);
221 		}
222 
223 //		else if(a.equalsIgnoreCase("minHashValue")){
224 //			minHashValue=Tools.max(1, Long.parseLong(b));
225 //		}
226 
227 		else{
228 			return false;
229 		}
230 		return true;
231 	}
232 
parseCoding(String arg, String a, String b)233 	private static boolean parseCoding(String arg, String a, String b){
234 		if(a.equalsIgnoreCase("deltaout") || a.equalsIgnoreCase("delta")){
235 			deltaOut=Parse.parseBoolean(b);
236 		}else if(a.equalsIgnoreCase("a33") || a.equalsIgnoreCase("a48")){
237 			boolean x=Parse.parseBoolean(b);
238 			if(x){CODING=A48;}
239 			else if(CODING==A48){CODING=HEX;}
240 		}else if(a.equalsIgnoreCase("hex")){
241 			boolean x=Parse.parseBoolean(b);
242 			if(x){CODING=HEX;}
243 			else if(CODING==HEX){CODING=A48;}
244 		}else if(a.equalsIgnoreCase("raw")){
245 			boolean x=Parse.parseBoolean(b);
246 			if(x){CODING=RAW;}
247 			else if(CODING==RAW){CODING=A48;}
248 		}else{
249 			return false;
250 		}
251 		return true;
252 	}
253 
parseMode(String[] args)254 	static int parseMode(String[] args){
255 		int mode=defaultParams.mode;
256 		for(String arg : args){
257 			String[] split=arg.split("=");
258 			String a=split[0].toLowerCase();
259 			String b=split.length>1 ? split[1] : null;
260 			int x=parseMode(arg, a, b);
261 			if(x>-1){mode=x;}
262 		}
263 		return mode;
264 	}
265 
parseMode(String arg, String a, String b)266 	static int parseMode(String arg, String a, String b){
267 		int mode_=-1;
268 		if(a.equalsIgnoreCase("mode")){
269 			assert(b!=null) : "Bad parameter: "+arg;
270 			if(Tools.isDigit(b.charAt(0))){
271 				mode_=Integer.parseInt(b);
272 			}else if(b.equalsIgnoreCase("single") || b.equalsIgnoreCase("onesketch")){
273 				mode_=ONE_SKETCH;
274 			}else if(b.equalsIgnoreCase("taxa") || b.equalsIgnoreCase("pertaxa")){
275 				mode_=PER_TAXA;
276 			}else if(b.equalsIgnoreCase("sequence") || b.equalsIgnoreCase("persequence") || b.equalsIgnoreCase("header") || b.equalsIgnoreCase("perheader")){
277 				mode_=PER_SEQUENCE;
278 //			}else if(b.equalsIgnoreCase("header") || b.equalsIgnoreCase("perheader")){
279 //				mode_=PER_HEADER;
280 			}else if(b.equalsIgnoreCase("img")){
281 				mode_=PER_IMG;
282 			}else if(b.equalsIgnoreCase("perfile") || b.equalsIgnoreCase("file")){
283 				mode_=PER_FILE;
284 			}else{
285 				assert(false) : "Bad parameter: "+arg;
286 			}
287 		}else if(a.equalsIgnoreCase("single") || a.equalsIgnoreCase("onesketch")){
288 			mode_=ONE_SKETCH;
289 		}else if(a.equalsIgnoreCase("taxa") || a.equalsIgnoreCase("pertaxa")){
290 			mode_=PER_TAXA;
291 		}else if(a.equalsIgnoreCase("sequence") || a.equalsIgnoreCase("persequence") || a.equalsIgnoreCase("header") || a.equalsIgnoreCase("perheader")){
292 			mode_=PER_SEQUENCE;
293 //		}else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("perheader")){
294 //			mode_=PER_HEADER;
295 		}else if(a.equalsIgnoreCase("perfile") || a.equalsIgnoreCase("file")){
296 			mode_=PER_FILE;
297 		}
298 		return mode_;
299 	}
300 
301 	/*--------------------------------------------------------------*/
302 	/*----------------        Initialization        ----------------*/
303 	/*--------------------------------------------------------------*/
304 
setTaxtree(String taxTreeFile, PrintStream outstream)305 	static synchronized void setTaxtree(String taxTreeFile, PrintStream outstream){
306 		if(taxTreeFile==null){
307 			taxtree=null;
308 			return;
309 		}
310 		if(treefile!=null){
311 			assert(!treefile.equals(taxTreeFile));
312 			if(treefile.equals(taxTreeFile)){return;}
313 			treefile=taxTreeFile;
314 		}
315 		taxtree=TaxTree.loadTaxTree(taxTreeFile, outstream, hashNames, false);
316 	}
317 
reset()318 	public static void reset(){
319 		postparsed=false;
320 		blacklist=null;
321 		useWhitelist=false;
322 		defaultParams=new DisplayParams();
323 		amino=false;
324 		translate=false;
325 		sixframes=false;
326 	}
327 
postParse()328 	public static void postParse(){
329 		if(postparsed){return;}
330 		postparsed=true;
331 		IntList3.defaultMode=IntList3.UNIQUE; //Not really safe, if Seal uses Sketch...
332 
333 		if(defaultParams.amino()){amino=true;}
334 		if(amino){Shared.AMINO_IN=true;}
335 
336 		if(amino8){
337 			AminoAcid.AMINO_SHIFT=3;
338 			System.err.println("Set AMINO_SHIFT to "+AminoAcid.AMINO_SHIFT);
339 		}
340 
341 		if(amino || translate){
342 			rcomp=false;
343 			if(k>12){
344 				int oldk=k;
345 				int oldk2=k2;
346 //				k=Tools.min(k, 63/AminoAcid.AMINO_SHIFT);
347 //				k2=Tools.min(k2, k);
348 				k=defaultKAmino;
349 				k2=defaultK2Amino;
350 				if(k==k2){k2=0;}
351 				if(k!=oldk || k2!=oldk2){System.err.println("Set k to "+k+(k2>0 ? ","+k2 : ""));}
352 			}
353 //			AUTOSIZE_FACTOR=(AUTOSIZE_FACTOR*defaultK)/k;
354 //			System.err.println("Set AUTOSIZE_FACTOR to "+AUTOSIZE_FACTOR);
355 		}
356 
357 		if(aminoOrTranslate()){
358 			bitsPerBase=5;//Maybe it is safe to keep these at 4 and 8 or 5 and 8; need to check.
359 			bitsPerCycle=10;
360 		}else{
361 			bitsPerBase=2;
362 			bitsPerCycle=8;
363 		}
364 		basesPerCycle=bitsPerCycle/bitsPerBase;
365 		hashCycles=((k*bitsPerBase)+bitsPerCycle-1)/bitsPerCycle;
366 
367 		cycleMask=~((-1L)<<bitsPerCycle);
368 		maxCycles=(64+bitsPerCycle-1)/bitsPerCycle;
369 		codeIncrement=(int)(cycleMask+1);
370 		codes=makeCodes(maxCycles, codeIncrement, hashSeed, false);
371 		codes1D=makeCodes1D(codes);
372 
373 		if(k2>0){
374 			assert(k2<k) : "k2 must be less than k.";
375 //			assert(k2%basesPerCycle==0) : "k2 must be a multiple of "+basesPerCycle; //No longer required!  Still recommended for speed though.
376 
377 			k2mask=~((-1L)<<(bitsPerBase*k2));
378 			k2submask=~((-1L)<<(bitsPerBase*(k2%basesPerCycle)));
379 			k2shift=(k-k2); //for toValue2
380 			k2midmask=(k2mask<<((k2shift*bitsPerBase)/2)); //for toValue2
381 			hashCycles2=k2/basesPerCycle;
382 		}else{
383 			useToValue2=false;
384 		}
385 		codeMax=hashCycles*codeIncrement;
386 		codeMax2=hashCycles2*codeIncrement;
387 
388 //		hasher=k2<1 ? new Hasher1() : k2submask==0 ? new Hasher2() : new Hasher3();
389 
390 		if(translate || processSSU){
391 			if(pgmFile==null){
392 				pgmFile=Data.findPath("?model.pgm");
393 			}
394 			pgm=GeneModelParser.loadModel(pgmFile);
395 			GeneCaller.call16S=processSSU;
396 			GeneCaller.call18S=false;//processSSU;
397 			GeneCaller.call23S=false;
398 			GeneCaller.call5S=false;
399 			GeneCaller.calltRNA=false;
400 			GeneCaller.callCDS=translate;
401 
402 			if(GeneCaller.call16S || GeneCaller.call18S || GeneCaller.call23S){
403 				ProkObject.loadLongKmers();
404 				ProkObject.loadConsensusSequenceFromFile(true, true);
405 			}
406 		}
407 
408 //		if(HASH_VERSION<2 && useToValue2){HASH_VERSION=2;}
409 //		else if(HASH_VERSION==2 && !useToValue2){HASH_VERSION=1;}
410 //		assert(blacklist!=null) : blacklist+", "+(blacklist==null);
411 		if(blacklist!=null){Blacklist.addFiles(blacklist);}
412 
413 //		System.err.println("amino="+amino+", k="+k+", k2="+k2+", bitsPerCycle="+bitsPerCycle+", bitsPerBase="+bitsPerBase+
414 //				", basesPerCycle="+basesPerCycle+", hashCycles="+hashCycles+", k2mask="+k2mask+", k2submask="+k2submask+", hashCycles2="+hashCycles2+
415 //				", codeMax="+codeMax+", codeMax2="+codeMax2);
416 //		structures.IntList3.ascending=false;//123 for testing
417 
418 		alignerPool=new AlignmentThreadPool(Shared.threads());
419 	}
420 
421 	/*--------------------------------------------------------------*/
422 	/*----------------           Hashing            ----------------*/
423 	/*--------------------------------------------------------------*/
424 
425 	private static void remakeCodes(long hashSeed){
426 		long[][] codes2=makeCodes(maxCycles, codeIncrement, hashSeed, false);
427 		long[] codes1D2=makeCodes1D(codes2);
428 		for(int i=0; i<codes2.length; i++){
429 			for(int j=0; j<codes2[i].length; j++){
430 				codes[i][j]=codes2[i][j];
431 			}
432 		}
433 		for(int i=0; i<codes1D2.length; i++){
434 				codes1D[i]=codes1D2[i];
435 		}
436 	}
437 
438 	public static long[][] makeCodes(int symbols, int modes, long seed, boolean positive){
439 		Random randy=new Random(seed);
440 		long mask=positive ? Long.MAX_VALUE : -1L;
441 		long[][] r=new long[symbols][modes];
442 		for(int i=0; i<symbols; i++){
443 			for(int j=0; j<modes; j++){
444 				r[i][j]=randy.nextLong()&mask;
445 			}
446 		}
447 		for(int i=0; i<3; i++) {
448 			antialias(r, randy);
449 		}
450 		return r;
451 	}
452 
453 	private static void antialias(long[][] matrix, Random randy){
454 		for(long[] array : matrix){
455 			antialias(array, randy);
456 		}
457 	}
458 
459 	private static void antialias(long[] array, Random randy){
460 		for(int i=0; i<64; i++){
461 			antialiasNumbers(array, randy);
462 			antialiasBit(array, randy, i);
463 		}
464 	}
465 
466 	private static void antialiasBit(long[] array, Random randy, int bit){
467 		int half=array.length/2;
468 		long ones=0;
469 		for(int i=0; i<array.length; i++){
470 			ones+=(array[i]>>bit)&1L;
471 		}
472 		final long orMask=1L<<bit;
473 		final long andMask=~orMask;
474 		while(ones<half-1){
475 			int loc=randy.nextInt(array.length);
476 			while((array[loc]&orMask)!=0){
477 				loc=randy.nextInt(array.length);
478 			}
479 			array[loc]|=orMask;
480 			ones++;
481 		}
482 		while(ones>half+1){
483 			int loc=randy.nextInt(array.length);
484 			while((array[loc]&orMask)==0){
485 				loc=randy.nextInt(array.length);
486 			}
487 			array[loc]&=andMask;
488 			ones--;
489 		}
490 	}
491 
492 	private static void antialiasNumbers(long[] array, Random randy){
493 		for(int i=0; i<array.length; i++){
494 			array[i]=antialiasNumber(array[i], randy);
495 		}
496 	}
497 
498 	private static long antialiasNumber(long number, Random randy){
499 		int ones=Long.bitCount(number);
500 		while(Long.bitCount(number)<31){
501 			number=number|(1L<<randy.nextInt(64));
502 		}
503 		while(Long.bitCount(number)>33){
504 			number=number&~(1L<<randy.nextInt(64));
505 		}
506 		return number;
507 	}
508 
509 //	public static long[] makeCodes1D(int symbols, int modes, long seed, boolean positive){
510 //		Random randy=Shared.threadLocalRandom(seed);
511 //		long mask=positive ? Long.MAX_VALUE : -1L;
512 //		long[] r=new long[symbols*modes];
513 //		for(int i=0; i<r.length; i++){
514 //			r[i]=randy.nextLong()&mask;
515 //		}
516 //		return r;
517 //	}
518 
519 	public static long[] makeCodes1D(long[][] codes2D){
520 		final int len=codes2D.length*codes2D[0].length;
521 		long[] codes1D=new long[len];
522 		int k=0;
523 		for(long[] array : codes2D){
524 			for(long x : array){
525 				codes1D[k]=x;
526 				k++;
527 			}
528 		}
529 		return codes1D;
530 	}
531 
532 	public static final long hash(long kmer){//Avoid using this.
533 		return rcomp ? hash(kmer, AminoAcid.reverseComplementBinaryFast(kmer,  k)) : hash(kmer, kmer);
534 	}
535 
536 	public static final long hash(long kmer, long rkmer){
537 		assert(postparsed);
538 //		return hasher.hash_inner(kmer); //Slow
539 //		return k2<1 ? hash1(kmer) : hash2(kmer);
540 		if(useToValue2){return hashToValue2(kmer, rkmer);}
541 		final long key=toValue(kmer, rkmer);
542 		return k2<1 ? hash1(key) : k2submask==0 ? hash2(key) : hash3(key);
543 	}
544 
545 	/**
546 	 * New version.
547 	 * Generates a hash code from a kmer.
548 	 * @param kmer Kmer to hash
549 	 * @return Hash code
550 	 */
551 	private static final long hashToValue2(final long kmer0, final long rkmer0){
552 //		return Tools.max(hash64shift(kmer0), hash64shift(kmer0&0xFFFFFFFFFFFFL));//Something like this would be around 10% faster; not worth it.
553 		long kmer=kmer0, rkmer=rkmer0;
554 		final long key;
555 		final boolean useK1;
556 		if(rcomp){
557 			assert(!amino && !translate);
558 			final long kmer2=((kmer&k2midmask)>>>k2shift);
559 			final long rkmer2=((rkmer&k2midmask)>>>k2shift);
560 			final long max2=Tools.max(kmer2, rkmer2);
561 //			long cmasked=max2&kmerChoiceMask;
562 //			useK1=kmerChoiceBitset.get((int)cmasked);
563 			useK1=((max2%4999L)&1L)==0L;
564 			key=useK1 ? Tools.max(kmer, rkmer) : max2;
565 //			System.err.println("\n"+longer+", "+max2+", "+(max2%4999L)+", "+((max2%4999L)&1L)+
566 //					", "+Long.toHexString(kmer)+", "+Long.toHexString(max2)+", "+Long.toHexString(value));
567 //			long value=longer ? (kmer2>rkmer2 ? kmer : rkmer) : max2;
568 //			System.err.println("\n"+AminoAcid.kmerToString(kmer, k)+"\n"+AminoAcid.kmerToString(rkmer, k)+
569 //					"\n"+AminoAcid.kmerToString(kmer2, k2)+"\n"+AminoAcid.kmerToString(rkmer2, k2)+"\n"+AminoAcid.kmerToString(value, k)+"\n");
570 //			assert(kmer2==AminoAcid.reverseComplementBinaryFast(rkmer2, k2));)
571 		}else{
572 			assert(amino || translate);
573 			final long kmer2=(kmer&k2mask);
574 			useK1=((kmer2%4999)&1L)==0L;
575 			key=useK1 ? kmer : kmer2;
576 		}
577 //		if(key%5!=0){return -1;}//This makes it a little faster.
578 		final long bit=useK1 ? 0 : 1; //Note that this gets reversed later during the subtraction process
579 
580 //		System.err.println(bit+", "+Long.toHexString(kmer)+", "+Long.toHexString(k2mask));
581 //		assert(bit==0) : bit+", "+Long.toHexString(kmer);
582 
583 		long hashcode=key, data=key;
584 //		for(int i=0; i<codeMax; i+=codeIncrement){
585 //			int x=(int)(data&cycleMask);
586 //			data>>>=bitsPerCycle;
587 //			hashcode^=codes1D[i+x];
588 //		}
589 //		for(int i=0; data!=0; i+=codeIncrement){
590 //			int x=(int)(data&cycleMask);
591 //			data>>>=bitsPerCycle;
592 //			hashcode^=codes1D[i+x];
593 //		}
594 		{//5% faster than other loops.
595 			int i=0;
596 			do{
597 				int x=(int)(data&cycleMask);
598 				data>>>=bitsPerCycle;
599 				hashcode^=codes1D[i+x];
600 				i+=codeIncrement;
601 			}while(data!=0);
602 		}
603 		hashcode=(hashcode&(-2L))|bit;
604 
605 		return hashcode;
606 	}
607 
608 	/** Taken from Thomas Wang, Jan 1997:
609 	 * http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
610 	 *
611 	 *  This is much faster than the table version.  Results seem similar y.
612 	 */
613 	private static long hash64shift(long key){
614 		key = (~key) + (key << 21); // key = (key << 21) - key - 1;
615 		key = key ^ (key >>> 24);
616 		key = (key + (key << 3)) + (key << 8); // key * 265
617 		key = key ^ (key >>> 14);
618 		key = (key + (key << 2)) + (key << 4); // key * 21
619 		key = key ^ (key >>> 28);
620 		key = key + (key << 31);
621 		return key;
622 	}
623 
624 	/**
625 	 * Fastest version!
626 	 * Generates a hash code from a kmer.
627 	 * @param kmer Kmer to hash
628 	 * @return Hash code
629 	 */
630 	private static final long hash1(long kmer){
631 //		if(kmer%5!=0){return -1;}
632 		long code=kmer;
633 		for(int i=0; i<codeMax; i+=codeIncrement){
634 			int x=(int)(kmer&cycleMask);
635 			kmer>>=bitsPerCycle;
636 			code^=codes1D[i+x];
637 		}
638 
639 		return code;
640 	}
641 
642 
643 	/**
644 	 * Generates a hash code from a kmer, using dual kmer lengths.
645 	 * @param kmer Kmer to hash
646 	 * @return Hash code
647 	 */
648 	private static final long hash2(final long kmer0){
649 //		return hash64shift(kmer0);
650 		long kmer=kmer0;
651 		long code=0;
652 
653 		for(int i=0; i<codeMax2; i+=codeIncrement){
654 			int x=(int)(kmer&cycleMask);
655 			kmer>>=bitsPerCycle;
656 			code^=codes1D[i+x];
657 		}
658 		final long code2=code^(k2mask&kmer0);
659 		for(int i=codeMax2; i<codeMax; i+=codeIncrement){
660 			int x=(int)(kmer&cycleMask);
661 			kmer>>=bitsPerCycle;
662 			code^=codes1D[i+x];
663 		}
664 		return ((code&1)==1) ? code2 : code^kmer0; //Random; faster than max.
665 	}
666 
667 
668 	/**
669 	 * Generates a hash code from a kmer, using dual kmer lengths, allowing K2 to be a non-multiple-of-4.
670 	 * Uses one additional and, xor, and lookup.
671 	 * @param kmer Kmer to hash
672 	 * @return Hash code
673 	 */
674 	private static final long hash3(final long kmer0){
675 //		return hash64shift(kmer0);
676 
677 		long kmer=kmer0;
678 		long code=0;
679 		assert(k2submask!=0);
680 
681 		int i=0;
682 		for(; i<codeMax2; i+=codeIncrement){
683 			int x=(int)(kmer&cycleMask);
684 			kmer>>=bitsPerCycle;
685 			code^=codes1D[i+x];
686 		}
687 		final int residual=(int)(kmer&k2submask);
688 		final long code2=code^(k2mask&kmer0)^codes1D[i+residual];
689 		for(; i<codeMax; i+=codeIncrement){
690 			int x=(int)(kmer&cycleMask);
691 			kmer>>=bitsPerCycle;
692 			code^=codes1D[i+x];
693 		}
694 		return ((code&1)==1) ? code2 : code^kmer0; //Random; faster than max.
695 	}
696 
697 
698 	/*--------------------------------------------------------------*/
699 	/*----------------      Convenience Methods     ----------------*/
700 	/*--------------------------------------------------------------*/
701 
702 	public static final float align(byte[] query, byte[] ref){
703 		SingleStateAlignerFlat2 ssa=GeneCaller.getSSA();
704 		int a=0, b=ref.length-1;
705 		int[] max=ssa.fillUnlimited(query, ref, a, b, -9999);
706 		if(max==null){return 0;}
707 
708 		final int rows=max[0];
709 		final int maxCol=max[1];
710 		final int maxState=max[2];
711 		final float id=ssa.tracebackIdentity(query, ref, a, b, rows, maxCol, maxState, null);
712 		return id;
713 	}
714 
715 	public static final float alignAndMakeMatch(Read r, byte[] ref){
716 		byte[] query=r.bases;
717 		SingleStateAlignerFlat2 ssa=GeneCaller.getSSA();
718 		int a=0, b=ref.length-1;
719 		int[] max=ssa.fillUnlimited(query, ref, a, b, -9999);
720 		if(max==null){return 0;}
721 
722 		final int rows=max[0];
723 		final int maxCol=max[1];
724 		final int maxState=max[2];
725 		byte[] match=ssa.traceback(query, ref, a, b, rows, maxCol, 0);
726 		int[] score=ssa.score(query, ref, a, b, rows, maxCol, 0);
727 		final float id=Read.identity(match);
728 		r.match=match;
729 		r.start=score[1];
730 		r.stop=score[2];
731 		r.setMapped(true);
732 		return id;
733 	}
734 
735 	public static final float alignAndMakeMatch(Read r, byte[] ref, float[] refWeights/*, float[] insWeights, float[] delWeights*/){
736 		byte[] query=r.bases;
737 		SingleStateAlignerFlatFloat ssa=GeneCaller.getSSAF();
738 //		ssa.setWeights(refWeights, insWeights, delWeights);
739 		ssa.setWeights(refWeights);
740 		int a=0, b=ref.length-1;
741 		int[] max=ssa.fillUnlimited(query, ref, a, b, -9999);
742 		if(max==null){return 0;}
743 
744 		final int rows=max[0];
745 		final int maxCol=max[1];
746 		final int maxState=max[2];
747 		byte[] match=ssa.traceback(query, ref, a, b, rows, maxCol, 0);
748 		int[] score=ssa.score(query, ref, a, b, rows, maxCol, 0);
749 		final float id=Read.identity(match);
750 		r.match=match;
751 		r.start=score[1];
752 		r.stop=score[2];
753 		r.setMapped(true);
754 		return id;
755 	}
756 
757 	static String fixMeta(String s){
758 		if(s==null){return null;}
759 		int colon=s.indexOf(':');
760 		assert(colon>=0);
761 		if(s.length()==colon+5 && s.endsWith(":null")){return null;}
762 		return s.replace('\t', ' ');
763 	}
764 
765 	static ArrayList<String> fixMeta(ArrayList<String> list){
766 		if(list==null || list.isEmpty()){return null;}
767 		for(int i=0; i<list.size(); i++){
768 			String s=list.get(i);
769 			s=fixMeta(s);
770 			if(s==null){
771 				list.remove(i);
772 				i--;
773 			}else{
774 				list.set(i, s);
775 			}
776 		}
777 		if(list.isEmpty()){return null;}
778 		list.trimToSize();
779 		Collections.sort(list);
780 		return list;
781 	}
782 
783 	public static final float aniToWkid(final double ani){
784 		final float wkid;
785 		if(ani<=0){
786 			wkid=0;
787 		}else if(k2<1){
788 			wkid=(float)Math.pow(ani, k);
789 		}else{
790 			wkid=0.5f*(float)(Math.pow(ani, k)+Math.pow(ani, k2));
791 		}
792 		return wkid;
793 	}
794 
795 	public static final float wkidToAniExact(final double wkid, final int A, final int B, final double maxDeviation){
796 		assert(A>B);
797 		assert(maxDeviation>0);
798 		final double logWkid=Math.log(wkid);
799 		final double invA=1.0/A;
800 		final double invB=1.0/B;
801 
802 		double aniUpper=Math.exp(logWkid*invA);
803 		double aniLower=Math.exp(logWkid*invB);
804 		assert(aniLower<=aniUpper);
805 		double aniEst=(aniLower+aniUpper)*0.5f;
806 //		System.err.println(aniEst);
807 //		if(B>16){//Fast mode
808 //			aniUpper=(aniUpper+3*aniEst)*0.25;
809 //			aniLower=(aniLower+3*aniEst)*0.25;
810 //		}
811 		double wkidEst=aniToWkid(aniEst, A, B);
812 		int iters=1;
813 //		System.err.println(aniLower+"\t"+aniUpper+"\t"+aniEst+"\t"+aniToWkid(aniEst, A, B)+"\t"+(wkidEst-wkid));
814 		while(Math.abs(wkidEst-wkid)>maxDeviation && iters<40){
815 			if(wkidEst<wkid){aniLower=aniEst;}
816 			else{aniUpper=aniEst;}
817 			aniEst=(aniLower+aniUpper)*0.5f;
818 			wkidEst=aniToWkid(aniEst, A, B);
819 //			System.err.println(aniLower+"\t"+aniUpper+"\t"+aniEst+"\t"+wkidEst+"\t"+(wkidEst-wkid));
820 			iters++;
821 		}
822 //		System.err.println("Iterations: "+iters);
823 		return (float)aniEst;
824 	}
825 
826 	public static double aniToWkid(double ani, int A, int B){
827 		if(A<B){int C=A; A=B; B=C;}//Swap
828 		double aPow=Math.pow(ani, A);
829 		double bPow=Math.pow(ani, B);
830 //		return 0.5*(aPow+bPow);
831 		return useToValue2 ? 0.5*(aPow+bPow) : 0.5*(aPow+(bPow*0.4+aPow*0.6));
832 	}
833 
834 	//From Kayla
835 //	public static double aniToWkid(double ANI, int K2, int K1){
836 //		if(K2<K1){int C=K2; K2=K1; K1=C;}//Swap
837 //
838 //		return 0.5*((1 - (1 - Math.pow(ANI, K2-K1))*Math.pow(ANI, K1))*Math.pow(ANI, K2) +
839 //                (1 + (1 - Math.pow(ANI, K2-K1))*Math.pow(ANI, K1))*Math.pow(ANI, K1));
840 //	}
841 
842 	public static double aniToWkid(double ani, int A){
843 		return Math.pow(ani, A);
844 	}
845 
846 	public static final float wkidToAniExact(final double wkid, final int k){
847 		return (float)Math.exp(Math.log(wkid)/(k));
848 	}
849 
850 	public static final float wkidToAni(final double wkid){
851 		final float ani;
852 		if(wkid<=0){
853 			ani=0;
854 		}else if(k2<1){
855 			ani=(float)Math.exp(Math.log(wkid)/k);
856 		}else{
857 			if(EXACT_ANI){return wkidToAniExact(wkid, k, k2, 0.00005);}
858 
859 			//This is linear interpolation which is not correct.  But for some reason it works really well.
860 			final double log=Math.log(wkid);
861 
862 //			double ani1=Math.exp(log/k);
863 //			double ani2=Math.exp(log/k2);
864 //			ani=(float)(0.5*(ani1+ani2));
865 
866 			//alternatively...  this one seems to work better.
867 //			ani=(float)Math.exp(2*log/(k*1.1+k2*0.9));
868 			ani=(float)Math.exp(2*log/(1.2*k+.8*k2));
869 		}
870 		return ani;
871 	}
872 
873 //	public static void kmerArrayToSketchArray(long[] kmers){
874 //		for(int i=0; i<kmers.length; i++){
875 //			long kmer=kmers[i];
876 //			long hash=SketchTool.hash(kmer);
877 //			assert(hash>=minHashValue);
878 //			hash=Long.MAX_VALUE-hash;
879 //			kmers[i]=hash;
880 //		}
881 //		Shared.sort(kmers);
882 //	}
883 
884 	public static void hashArrayToSketchArray(long[] keys){
885 		for(int i=0; i<keys.length; i++){
886 			long hash=keys[i];
887 			assert(hash>=minHashValue);
888 			hash=Long.MAX_VALUE-hash;
889 			keys[i]=hash;
890 		}
891 		Shared.sort(keys);
892 	}
893 
894 	public static final long genomeSizeEstimate(long min, int length) {
895 		if(length==0){return 0;}
896 		double est=((double)Long.MAX_VALUE)*2*length/(Tools.max(min, 1));
897 //		if(k2>0){est*=0.5;} //This is necessary if the hash function uses max, but not random.
898 //		System.err.println("max="+Long.MAX_VALUE+", min="+min+", len="+length+", est="+(long)est);
899 //		new Exception().printStackTrace(System.err);
900 		return (long)Math.ceil(est);
901 	}
902 
903 	public static final int toSketchSize(long genomeSizeBases, long genomeSizeKmers, long genomeSizeEstimate, int maxSketchSize){
904 //		assert(false) : genomeSizeBases+", "+genomeSizeKmers+", "+genomeSizeEstimate+", "+maxSketchSize+", "+useSizeEstimate;
905 		if(genomeSizeEstimate>0 && useSizeEstimate){
906 			return toSketchSizeKmers(genomeSizeEstimate, maxSketchSize);
907 		}
908 		if(genomeSizeKmers>0){
909 			return toSketchSizeKmers(genomeSizeKmers, maxSketchSize);
910 		}
911 		assert(genomeSizeBases>0) : "BBSketch does not currently support empty files.\n"
912 				+genomeSizeBases+", "+genomeSizeKmers+", "+genomeSizeEstimate+", "+maxSketchSize+", "+useSizeEstimate;
913 		return toSketchSizeBases(genomeSizeBases, maxSketchSize);
914 	}
915 
916 	private static final int toSketchSizeBases(long genomeSizeBases, int maxSketchSize) {
917 		return toSketchSizeKmers(Tools.max(0, genomeSizeBases-k+1), maxSketchSize);
918 	}
919 
920 	private static final int toSketchSizeKmers(long genomeSizeKmers, int maxSketchSize) {
921 //		System.err.println(genomeSizeKmers+", "+maxSketchSize);
922 //		new Exception().printStackTrace();
923 		if(AUTOSIZE){
924 			if(aminoOrTranslate()){
925 				float linear1=Tools.min(60+0.5f*(float)Math.sqrt(genomeSizeKmers),
926 						maxGenomeFractionSmall*genomeSizeKmers*2);
927 				float linear2=genomeSizeKmers*maxGenomeFraction*2;
928 				float poly=0+1f*(float)Math.sqrt(genomeSizeKmers)+90f*(float)Math.pow(genomeSizeKmers, 0.3);
929 				float log=Tools.max(1000, -4000+3500*(float)Math.log(Tools.max(1, genomeSizeKmers)));
930 				float min=Tools.min(Tools.max(linear1, linear2), poly, log);
931 				assert(min>=Integer.MIN_VALUE && min<=Integer.MAX_VALUE) : min;
932 
933 				//			System.err.println(genomeSizeKmers+" -> "+linear1+", "+linear2+", "+poly+", "+log);
934 
935 				return (int)Tools.min(genomeSizeKmers*keyFraction2, min*AUTOSIZE_FACTOR);
936 				//			return (int)Tools.max(1, min*AUTOSIZE_FACTOR);
937 			}else{
938 				float linear1=Tools.min(smallSketchSize+0.5f*(float)Math.sqrt(genomeSizeKmers),
939 						maxGenomeFractionSmall*genomeSizeKmers-10);
940 				float linear2=genomeSizeKmers*maxGenomeFraction;
941 				float poly=0+1f*(float)Math.sqrt(genomeSizeKmers)+90f*(float)Math.pow(genomeSizeKmers, 0.3);
942 				float log=Tools.max(1000, -4000+3500*(float)Math.log(Tools.max(1, genomeSizeKmers))+8f*(float)Math.pow(genomeSizeKmers, 0.3));
943 				float min=Tools.min(Tools.max(linear1, linear2), poly, log);
944 				assert(min>=Integer.MIN_VALUE && min<=Integer.MAX_VALUE) : min;
945 
946 				//			System.err.println(genomeSizeKmers+" -> "+linear1+", "+linear2+", "+poly+", "+log);
947 
948 				int result=(int)Tools.min(genomeSizeKmers*keyFraction2, min*AUTOSIZE_FACTOR);
949 //				System.err.println(result);
950 				return result;
951 				//			return (int)Tools.max(1, min*AUTOSIZE_FACTOR);
952 			}
953 		}else if(AUTOSIZE_LINEAR){
954 			if(aminoOrTranslate()){
955 				return (int)(Tools.min(10000000, Tools.min(maxGenomeFractionSmall, AUTOSIZE_LINEAR_DENSITY)*genomeSizeKmers));
956 			}else{
957 				return (int)(Tools.min(10000000, Tools.min(maxGenomeFractionSmall, AUTOSIZE_LINEAR_DENSITY)*genomeSizeKmers));
958 			}
959 		}else{
960 			if(aminoOrTranslate()){//kmers are roughly 3x smaller...
961 				return Tools.min((int)(2+3*maxGenomeFraction*genomeSizeKmers), maxSketchSize);
962 			}else{
963 				return Tools.min((int)(2+maxGenomeFraction*genomeSizeKmers), maxSketchSize);
964 			}
965 		}
966 	}
967 
968 //	static final long toValue2(long kmer, long rkmer){
969 //		assert(k2>0 && k2<k) : "k="+k+","+k2;
970 //
971 //		final long value;
972 //		if(rcomp){
973 //			assert(!amino && !translate);
974 ////			if(!rcomp){return kmer;}
975 //			long kmer2=((kmer&k2midmask)>>>k2shift);
976 //			long rkmer2=((rkmer&k2midmask)>>>k2shift);
977 //
978 ////			assert(kmer2>=0);
979 ////			assert(kmer<0 || kmer2<=kmer);
980 ////			assert(rkmer<0 || rkmer2<=kmer);
981 //
982 ////			assert(false) : "\n"+Long.toBinaryString(kmer)+
983 ////				"\n"+Long.toBinaryString(rkmer)+
984 ////				"\n"+Long.toBinaryString(kmer2)+
985 ////				"\n"+Long.toBinaryString(rkmer2);
986 //
987 //			//TODO: Slow
988 ////			assert(kmer==AminoAcid.reverseComplementBinaryFast(rkmer, k)) :
989 ////				"\n"+AminoAcid.kmerToString(kmer, k)+"\n"+AminoAcid.kmerToString(rkmer, k)+"\n"+AminoAcid.kmerToString(AminoAcid.reverseComplementBinaryFast(rkmer, k), k)+"\n";
990 ////			assert(kmer2==AminoAcid.reverseComplementBinaryFast(rkmer2, k2));
991 //			long max2=Tools.max(kmer2, rkmer2);
992 ////			long cmasked=max2&kmerChoiceMask;
993 ////			boolean longer=kmerChoiceBitset.get((int)cmasked);
994 //			boolean longer=((max2%4999L)&1L)==0L;
995 //			value=longer ? Tools.max(kmer, rkmer) : max2;
996 ////			System.err.println("\n"+longer+", "+max2+", "+(max2%4999L)+", "+((max2%4999L)&1L)+
997 ////					", "+Long.toHexString(kmer)+", "+Long.toHexString(max2)+", "+Long.toHexString(value));
998 ////			long value=longer ? (kmer2>rkmer2 ? kmer : rkmer) : max2;
999 ////			System.err.println("\n"+AminoAcid.kmerToString(kmer, k)+"\n"+AminoAcid.kmerToString(rkmer, k)+
1000 ////					"\n"+AminoAcid.kmerToString(kmer2, k2)+"\n"+AminoAcid.kmerToString(rkmer2, k2)+"\n"+AminoAcid.kmerToString(value, k)+"\n");
1001 ////			assert(kmer2==AminoAcid.reverseComplementBinaryFast(rkmer2, k2));)
1002 //		}else{
1003 //			assert(amino || translate);
1004 ////			long kmer2=(kmer&k2mask);
1005 //			long kmer2=((kmer&k2midmask)>>>k2shift);
1006 //			boolean longer=((kmer2%4999)&1)==0;
1007 //			value=longer ? kmer : kmer2;
1008 //		}
1009 //		return value;
1010 //	}
1011 
1012 	private static final long toValue(long kmer, long rkmer){
1013 //		assert(toValue2(kmer, rkmer)==toValue2(rkmer, kmer));
1014 		assert(!useToValue2);
1015 //		if(useToValue2){return toValue2(kmer, rkmer);}
1016 		long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
1017 		return value;
1018 	}
1019 
1020 	/*--------------------------------------------------------------*/
1021 	/*----------------          Constants           ----------------*/
1022 	/*--------------------------------------------------------------*/
1023 
1024 	public static final int RAW=0, HEX=1, A48=2;
1025 	public static final char[] codingArray={'R', 'H', 'A'};
1026 
1027 	static final byte[] hexTable=new byte[128];
1028 	static {
1029 		Arrays.fill(hexTable, (byte)-1);
1030 		for(int i='0'; i<='9'; i++){
1031 			hexTable[i]=(byte)(i-'0');
1032 		}
1033 		for(int i='A'; i<='F'; i++){
1034 			hexTable[i]=hexTable[i+'a'-'A']=(byte)(i-'A'+10);
1035 		}
1036 		hexTable['x']=hexTable['X']=hexTable['-']=hexTable['+']=0;
1037 	}
1038 
1039 	public static final int ONE_SKETCH=1, PER_SEQUENCE=2, PER_TAXA=3, PER_IMG=4/*, PER_HEADER=5*/, PER_FILE=6;
1040 
1041 	/*--------------------------------------------------------------*/
1042 	/*----------------      Default Locations       ----------------*/
1043 	/*--------------------------------------------------------------*/
1044 
1045 //	public static ArrayList<String> IMG_PATH(){return makePaths(IMG_PATH, 31);}
1046 //	public static ArrayList<String> NT_PATH(){return makePaths(NT_PATH, 31);}
1047 //	public static ArrayList<String> NR_PATH(){throw new RuntimeException("NR is not currently available.");}
1048 //	public static ArrayList<String> REFSEQ_PATH(){return makePaths(REFSEQ_PATH, 31);}
1049 //	public static ArrayList<String> SILVA_PATH(){return makePaths(SILVA_PATH, 31);}
1050 
1051 	private static ArrayList<String> makePaths(String pattern, int files){
1052 		ArrayList<String> list=new ArrayList<String>(files);
1053 		for(int i=0; i<files; i++){
1054 			list.add(pattern.replace("#", ""+i));
1055 		}
1056 		return list;
1057 	}
1058 
1059 	private static final String IMG_PATH="/global/projectb/sandbox/gaag/bbtools/img/current/img#.sketch";
1060 	private static final String NT_PATH="/global/projectb/sandbox/gaag/bbtools/nt/current/taxa#.sketch";
1061 	private static final String NR_PATH="/global/projectb/sandbox/gaag/bbtools/nr/current/taxa#.sketch";
1062 	private static final String REFSEQ_PATH="/global/projectb/sandbox/gaag/bbtools/refseq/current/taxa#.sketch";
1063 	private static final String REFSEQ_PATH_BIG="/global/projectb/sandbox/gaag/bbtools/refseq/current/big#.sketch";
1064 	private static final String SILVA_PATH="/global/projectb/sandbox/gaag/bbtools/silva/latest/both_taxa#.sketch";
1065 	private static final String PROKPROT_PATH="/global/projectb/sandbox/gaag/bbtools/refseq/current/prot/taxa#.sketch";
1066 	private static final String PROKPROT_PATH_BIG="/global/projectb/sandbox/gaag/bbtools/refseq/current/prot/big#.sketch";
1067 	private static final String MITO_PATH="/global/projectb/sandbox/gaag/bbtools/mito2/taxa#.sketch";
1068 	private static final String FUNGI_PATH="/global/projectb/sandbox/gaag/bbtools/mito2/fungi#.sketch";
1069 
1070 	private static final String IMG_PATH_AWS=null;
1071 	private static final String NT_PATH_AWS="/test1/sketch/latest/nt/taxa#.sketch";
1072 	private static final String NR_PATH_AWS=null;
1073 	private static final String REFSEQ_PATH_AWS="/test1/sketch/latest/refseq/taxa#.sketch";
1074 	private static final String SILVA_PATH_AWS="/test1/sketch/latest/ribo/both_taxa#.sketch";
1075 	private static final String PROKPROT_PATH_AWS="/test1/sketch/latest/protein/taxa#.sketch";
1076 	private static final String MITO_PATH_AWS=null;
1077 	private static final String FUNGI_PATH_AWS=null;
1078 
1079 	public static final String IMG_PATH(){return Shared.AWS ? IMG_PATH_AWS : IMG_PATH;}
1080 	public static final String NT_PATH(){return Shared.AWS ? NT_PATH_AWS : NT_PATH;}
1081 	public static final String NR_PATH(){return Shared.AWS ? NR_PATH_AWS : NR_PATH;}
1082 	public static final String REFSEQ_PATH(){return Shared.AWS ? REFSEQ_PATH_AWS : REFSEQ_PATH;}
1083 	public static final String REFSEQ_PATH_BIG(){return REFSEQ_PATH_BIG;}
1084 	public static final String SILVA_PATH(){return Shared.AWS ? SILVA_PATH_AWS : SILVA_PATH;}
1085 	public static final String PROKPROT_PATH(){return Shared.AWS ? PROKPROT_PATH_AWS : PROKPROT_PATH;}
1086 	public static final String PROKPROT_PATH_BIG(){return PROKPROT_PATH_BIG;}
1087 	public static final String MITO_PATH(){return Shared.AWS ? MITO_PATH_AWS : MITO_PATH;}
1088 	public static final String FUNGI_PATH(){return Shared.AWS ? FUNGI_PATH_AWS : FUNGI_PATH;}
1089 
1090 	/*--------------------------------------------------------------*/
1091 	/*----------------           Getters            ----------------*/
1092 	/*--------------------------------------------------------------*/
1093 
1094 	public static boolean useWhitelist() {return useWhitelist;}
1095 	public static String blacklist() {return blacklist;}
1096 
1097 	/*--------------------------------------------------------------*/
1098 	/*----------------        Static Fields         ----------------*/
1099 	/*--------------------------------------------------------------*/
1100 
1101 	public static int CODING=A48;
1102 	public static boolean deltaOut=true;
1103 	public static boolean rcomp=true;
1104 
1105 	public static final int defaultK=32;
1106 	public static final int defaultK2=24;
1107 	public static final int defaultKAmino=12;
1108 	public static final int defaultK2Amino=9;
1109 	public static int k=defaultK;
1110 	public static int k2=defaultK2;
1111 	public static int entropyK=3;
1112 	public static boolean setK=false;
1113 	public static boolean amino=false;
1114 	public static boolean amino8=false;
1115 	public static boolean translate=false;
1116 	public static boolean sixframes=false;
1117 	public static boolean processSSU=true;
1118 	public static int min_SSU_len=1000;
1119 	public static int HASH_VERSION=2;
1120 	public static String pgmFile=null;
1121 	public static GeneModel pgm=null;
1122 
1123 	static boolean aminoOrTranslate(){return amino | translate;}
1124 
1125 	private static int bitsPerCycle=8; //Optimal speed for K=31 is 9bpc (15% faster than 8). 9, 10, and 11 are similar.
1126 	private static long cycleMask=~((-1L)<<bitsPerCycle);
1127 	private static final long codeOrMask=1L;
1128 	private static final long codeAndMask=~1L;
1129 	private static int maxCycles=(64+bitsPerCycle-1)/bitsPerCycle;
1130 	private static int codeIncrement=(int)(cycleMask+1);
1131 	private static int codeMax;
1132 	private static int codeMax2;
1133 
1134 	private static long hashSeed=12345;
1135 	private static long[][] codes;//=makeCodes(maxCycles, codeIncrement, hashSeed, false);
1136 	private static long[] codes1D;//=makeCodes1D(codes);
1137 	static boolean useToValue2=true;
1138 
1139 	//These are set in postParse()
1140 	private static int bitsPerBase=2;
1141 	private static int basesPerCycle=bitsPerCycle/bitsPerBase;
1142 	private static int hashCycles=64/bitsPerCycle; //Note: Needs to be variable based on k to make k2 codes compatible with k codes
1143 	private static int hashCycles2;
1144 	private static long k2mask;
1145 	private static long k2submask;
1146 	//Difference in length of k and k2, in bits
1147 	private static int k2shift;
1148 	private static long k2midmask;
1149 
1150 	/**
1151 	 * Make the SketchHeap size this factor bigger,
1152 	 * when minKeyOccuranceCount is used
1153 	 */
1154 	public static double sketchHeapFactor=8;
1155 //	static int minKeyOccuranceCount=1;
1156 
1157 	public static int minSketchSize=3;
1158 	public static int targetSketchSize=10000;
1159 	public static boolean AUTOSIZE=true;
1160 	public static float AUTOSIZE_FACTOR=1;
1161 	public static boolean SET_AUTOSIZE_FACTOR=false;
1162 	public static boolean SET_AUTOSIZE=false;
1163 	public static boolean SET_TARGET_SIZE=false;
1164 	public static boolean AUTOSIZE_LINEAR=false;
1165 	public static double AUTOSIZE_LINEAR_DENSITY=0.001;
1166 	public static float maxGenomeFraction=0.04f;//Was 0.015, but that is often too sparse
1167 	public static float maxGenomeFractionSmall=0.10f;
1168 	public static int smallSketchSize=150;
1169 	public static boolean makeIndex=true;
1170 	public static float prealloc=0;
1171 	public static boolean allToAll=false;
1172 	public static boolean compareSelf=false;
1173 	public static boolean skipCompare=false;
1174 	public static final int bitSetBits=2; //Needs to be 2 for unique counts.
1175 
1176 	private static double keyFraction=0.16;
1177 	private static double keyFraction2=keyFraction*1.2;
1178 	public static long minHashValue=setMinHashValue();
1179 	public static double keyFraction(){return keyFraction;}
1180 	public static void setKeyFraction(double d){
1181 		assert(d>0);
1182 		keyFraction=Tools.mid(0.0001, d, 0.5);
1183 		keyFraction2=Tools.mid(0.0001, keyFraction*1.2, 0.5);
1184 	}
1185 	public static long setMinHashValue(){
1186 		double mult=1-2*keyFraction;
1187 		minHashValue=(long)(mult*Long.MAX_VALUE);
1188 		assert(minHashValue>=0 && minHashValue<Long.MAX_VALUE);
1189 		return minHashValue;
1190 	}
1191 
1192 	public static int minFakeID=1900000000;
1193 	static boolean hashNames=false;
1194 	static boolean skipNonCanonical=true;
1195 	static boolean useSizeEstimate=true;
1196 	public static boolean allowMultithreadedFastq=false;
1197 	static boolean forceDisableMultithreadedFastq=false;
1198 
1199 	static boolean preferSSUMap=false;
1200 	static boolean preferSSUMapForEuks=true;
1201 	static boolean useSSUMapOnly=false;
1202 	static boolean useSSUMapOnlyForEuks=false;
1203 	static boolean ban18SForProks=true;
1204 	static boolean ban16SForEuks=true;
1205 
1206 	static long sampleseed=-1L;
1207 
1208 	public static TaxTree taxtree=null;
1209 	private static String treefile=null;
1210 	static String blacklist=null;
1211 	static boolean useWhitelist=false;
1212 	private static boolean postparsed=false;
1213 	public static boolean KILL_OK=false;
1214 	public static boolean EXACT_ANI=true;
1215 	public static boolean useSSA=true;
1216 	public static boolean useSSA3=false;
1217 
1218 	//Needs to be last due to dependencies.
1219 	public static DisplayParams defaultParams=new DisplayParams();
1220 
1221 	static AlignmentThreadPool alignerPool=null;
1222 
1223 	public static boolean verbose2=false;
1224 	public static boolean LOADER2=true;
1225 
1226 }
1227