1 package shared;
2 
3 import structures.ByteBuilder;
4 import structures.LongList;
5 
6 public class Parse {
7 
8 
parseIntKMG(String b)9 	public static int parseIntKMG(String b){
10 		long x=parseKMG(b);
11 		assert(x<=Integer.MAX_VALUE && x>Integer.MIN_VALUE) : "Value "+x+" is out of range for integers: "+b;
12 		return (int)x;
13 	}
14 
parseKMG(String b)15 	public static long parseKMG(String b){
16 		if(b==null){return 0;}
17 		assert(b.length()>0);
18 		final char c=Tools.toLowerCase(b.charAt(b.length()-1));
19 		final boolean dot=b.indexOf('.')>=0;
20 		if(!dot && !Tools.isLetter(c)){return Long.parseLong(b);}
21 //		if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
22 
23 		if(b.equalsIgnoreCase("big") || b.equalsIgnoreCase("inf") || b.equalsIgnoreCase("infinity") || b.equalsIgnoreCase("max") || b.equalsIgnoreCase("huge")){
24 			return Long.MAX_VALUE;
25 		}
26 
27 		long mult=1;
28 		if(Tools.isLetter(c)){
29 			if(c=='k'){mult=1000;}
30 			else if(c=='m'){mult=1000000;}
31 			else if(c=='g' || c=='b'){mult=1000000000;}
32 			else if(c=='t'){mult=1000000000000L;}
33 			else if(c=='p' || c=='q'){mult=1000000000000000L;}
34 			else if(c=='e'){mult=1000000000000000000L;}
35 //			else if(c=='z'){mult=1000000000000000000000L;}//Out of range
36 			else if(c=='c' || c=='h'){mult=100;}
37 			else if(c=='d'){mult=10;}
38 			else{throw new RuntimeException(b);}
39 			b=b.substring(0, b.length()-1);
40 		}
41 
42 		//Calculate product, check for overflow, and return
43 		if(!dot){
44 			long m=Long.parseLong(b);
45 			long p=m*mult;
46 			assert(p>=m) : p+", "+m+", "+b;
47 			return p;
48 		}else{
49 			double m=Double.parseDouble(b);
50 			long p=(long)(m*mult);
51 			assert(p>=m) : p+", "+m+", "+b;
52 			return p;
53 		}
54 	}
55 
parseKMGBinary(String b)56 	public static long parseKMGBinary(String b){
57 		if(b==null){return 0;}
58 		char c=Tools.toLowerCase(b.charAt(b.length()-1));
59 		boolean dot=b.indexOf('.')>=0;
60 		if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
61 
62 		long mult=1;
63 		if(Tools.isLetter(c)){
64 			if(c=='k'){mult=1024;}
65 			else if(c=='m'){mult=1024*1024;}
66 			else if(c=='g' || c=='b'){mult=1024*1024*1024;}
67 			else if(c=='t'){mult=1024L*1024L*1024L*1024L;}
68 			else{throw new RuntimeException(b);}
69 			b=b.substring(0, b.length()-1);
70 		}
71 
72 		if(!dot){return Long.parseLong(b)*mult;}
73 
74 		return (long)(Double.parseDouble(b)*mult);
75 	}
76 
isNumber(String s)77 	public static boolean isNumber(String s){
78 		if(s==null || s.length()==0){return false;}
79 		char c=s.charAt(0);
80 		return Tools.isDigit(c) || c=='.' || c=='-';
81 	}
82 
83 	/**
84 	 * Parse this argument.  More liberal than Boolean.parseBoolean.
85 	 * Null, t, true, or 1 all yield true.
86 	 * Everything else, including the String "null", is false.
87 	 * @param s Argument to parse
88 	 * @return boolean form
89 	 */
parseBoolean(String s)90 	public static boolean parseBoolean(String s){
91 		if(s==null || s.length()<1){return true;}
92 		if(s.length()==1){
93 			char c=Tools.toLowerCase(s.charAt(0));
94 			return c=='t' || c=='1';
95 		}
96 		if(s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return false;}
97 		return Boolean.parseBoolean(s);
98 	}
99 
parseYesNo(String s)100 	public static boolean parseYesNo(String s){
101 		if(s==null || s.length()<1){return true;}
102 		if(s.length()==1){
103 			char c=Tools.toLowerCase(s.charAt(0));
104 			if(c=='y'){return true;}
105 			if(c=='n'){return false;}
106 			throw new RuntimeException(s);
107 		}
108 
109 		if(s.equalsIgnoreCase("yes")){return true;}
110 		if(s.equalsIgnoreCase("no")){return false;}
111 		if(s.equalsIgnoreCase("unknown")){return false;} //Special case for IMG database
112 
113 		throw new RuntimeException(s);
114 	}
115 
parseIntArray(String s, String regex)116 	public static int[] parseIntArray(String s, String regex){
117 		if(s==null){return null;}
118 		String[] split=s.split(regex);
119 		int[] array=new int[split.length];
120 		for(int i=0; i<split.length; i++){
121 			array[i]=Integer.parseInt(split[i]);
122 		}
123 		return array;
124 	}
125 
parseByteArray(String s, String regex)126 	public static byte[] parseByteArray(String s, String regex){
127 		if(s==null){return null;}
128 		String[] split=s.split(regex);
129 		byte[] array=new byte[split.length];
130 		for(int i=0; i<split.length; i++){
131 			array[i]=Byte.parseByte(split[i]);
132 		}
133 		return array;
134 	}
135 
parseIntHexDecOctBin(final String s)136 	public static int parseIntHexDecOctBin(final String s){
137 		if(s==null || s.length()<1){return 0;}
138 		int radix=10;
139 		if(s.length()>1 && s.charAt(1)=='0'){
140 			final char c=s.charAt(1);
141 			if(c=='x' || c=='X'){radix=16;}
142 			else if(c=='b' || c=='B'){radix=2;}
143 			else if(c=='o' || c=='O'){radix=8;}
144 		}
145 		return Integer.parseInt(s, radix);
146 	}
147 
148 	/**
149 	 * @param array Text
150 	 * @param a Index of first digit
151 	 * @param b Index after last digit (e.g., array.length)
152 	 * @return Parsed number
153 	 */
parseFloat(byte[] array, int a, int b)154 	public static float parseFloat(byte[] array, int a, int b){
155 		return (float)parseDouble(array, a, b);
156 	}
157 
158 	/**
159 	 * @param array Text
160 	 * @param a Index of first digit
161 	 * @param b Index after last digit (e.g., array.length)
162 	 * @return Parsed number
163 	 */
parseDoubleSlow(byte[] array, int a, int b)164 	public static double parseDoubleSlow(byte[] array, int a, int b){
165 		String s=new String(array, a, b-a);
166 		return Double.parseDouble(s);
167 	}
168 
parseDouble(final byte[] array, final int start)169 	public static double parseDouble(final byte[] array, final int start){
170 		return parseDouble(array, start, array.length);
171 	}
172 
173 	/**
174 	 * @param array Text
175 	 * @param a0 Index of first digit
176 	 * @param b Index after last digit (e.g., array.length)
177 	 * @return Parsed number
178 	 */
parseDouble(final byte[] array, final int a0, final int b)179 	public static double parseDouble(final byte[] array, final int a0, final int b){
180 		if(Tools.FORCE_JAVA_PARSE_DOUBLE){
181 			return Double.parseDouble(new String(array, a0, b-a0));
182 		}
183 		int a=a0;
184 		assert(b>a);
185 		long upper=0;
186 		final byte z='0';
187 		long mult=1;
188 		if(array[a]=='-'){mult=-1; a++;}
189 
190 		for(; a<b; a++){
191 			final byte c=array[a];
192 			if(c=='.'){break;}
193 			final int x=(c-z);
194 			assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b;
195 			upper=(upper*10)+x;
196 		}
197 
198 		long lower=0;
199 		int places=0;
200 		for(a++; a<b; a++){
201 			final byte c=array[a];
202 			final int x=(c-z);
203 			assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b+
204 				"\nThis function does not support exponents; if the input has an exponent, add the flag 'forceJavaParseDouble'.";
205 			lower=(lower*10)+x;
206 			places++;
207 		}
208 
209 		double d=mult*(upper+lower*ByteBuilder.decimalInvMult[places]);
210 //		assert(d==parseDoubleSlow(array, a0, b)) : d+", "+parseDoubleSlow(array, a0, b);
211 		return d;
212 	}
213 
parseInt(byte[] array, int start)214 	public static int parseInt(byte[] array, int start){
215 		return parseInt(array, start, array.length);
216 	}
217 
218 //	/**
219 //	 * @param array Text
220 //	 * @param a Index of first digit
221 //	 * @param b Index after last digit (e.g., array.length)
222 //	 * @return Parsed number
223 //	 */
224 //	public static int parseInt(byte[] array, int a, int b){
225 //		assert(b>a);
226 //		int r=0;
227 //		final byte z='0';
228 //		int mult=1;
229 //		if(array[a]=='-'){mult=-1; a++;}
230 //		for(; a<b; a++){
231 //			int x=(array[a]-z);
232 //			assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
233 //			r=(r*10)+x;
234 //		}
235 //		return r*mult;
236 //	}
237 
238 	/**
239 	 * Returns the int representation of a number represented in ASCII text, from position a to b.
240 	 * This function is much faster than creating a substring and calling Integer.parseInt()
241 	 * Throws Assertions rather than Exceptions for invalid input.
242 	 * This function does NOT detect overflows, e.g., values over 2^31-1 (Integer.MAX_VALUE).
243 	 * This function has no side-effects.
244 	 * @param array byte array containing the text to parse.
245 	 * @param a Index of the first digit of the number.
246 	 * @param b Index after the last digit (e.g., array.length).
247 	 * @return int representation of the parsed number.
248 	 * @throws Assertions rather than Exceptions for invalid input.
249 	 *
250 	 * @TODO Correctly represent Integer.MIN_VALUE
251 	 * @TODO Detect overflow.
252 	 */
parseInt(byte[] array, int a, int b)253 	public static int parseInt(byte[] array, int a, int b){
254 		assert(b>a) : "The start position of the text to parse must come before the stop position: "+
255 			a+","+b+","+new String(array);
256 		int r=0; //Initialize the return value to 0.
257 
258 		//z holds the ASCII code for 0, which is subtracted from other ASCII codes
259 		//to yield the int value of a character.  For example, '7'-'0'=7,
260 		//because ASCII '7'=55, while ASCII '0'=48, and 55-48=7.
261 		final byte z='0';
262 
263 		//mult is 1 for positive numbers, or -1 for negative numbers.
264 		//It will be multiplied by the unsigned result to yield the final signed result.
265 		int mult=1;
266 
267 		//If the term starts with a minus sign, set the multiplier to -1 and increment the position.
268 		if(array[a]=='-'){mult=-1; a++;}
269 
270 		//Iterate through every position, incrementing a, up to b (exclusive).
271 		for(; a<b; a++){
272 			//x is the numeric value of the character at position a.
273 			//In other words, if array[a]='7',
274 			//x would be 7, not the ASCII code for '7' (which is 55).
275 			int x=(array[a]-z);
276 
277 			//Assert that x is in the range of 0-9; otherwise, the character was not a digit.
278 			//The ASCII code will be printed here because in some cases the character could be
279 			//a control character (like carriage return or vertical tab or bell) which is unprintable.
280 			//But if possible the character will be printed to, as well as the position,
281 			//and the entire String from which the number is to be parsed.
282 			assert(x<10 && x>=0) : "Non-digit character with ASCII code "+(int)array[a]+" was encountered.\n"
283 					+"x="+x+"; char="+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
284 
285 			//Multiply the old value by 10, then add the new 1's digit.
286 			//This is because the text is assumed to be base-10,
287 			//so each subsequent character will represent 1/10th the significance of the previous character.
288 			r=(r*10)+x;
289 		}
290 
291 		//Change the unsigned value into a signed result, and return it.
292 		return r*mult;
293 	}
294 
295 	/**
296 	 * @param array Text
297 	 * @param a Index of first digit
298 	 * @param b Index after last digit (e.g., array.length)
299 	 * @return Parsed number
300 	 */
parseInt(String array, int a, int b)301 	public static int parseInt(String array, int a, int b){
302 //		assert(false) : Character.toString(array.charAt(a));
303 		assert(b>a);
304 		int r=0;
305 		final byte z='0';
306 		int mult=1;
307 		if(array.charAt(a)=='-'){mult=-1; a++;}
308 		for(; a<b; a++){
309 			int x=(array.charAt(a)-z);
310 			assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
311 			r=(r*10)+x;
312 		}
313 		return r*mult;
314 	}
315 
parseLong(byte[] array)316 	public static long parseLong(byte[] array){return parseLong(array, 0, array.length);}
317 
parseLong(byte[] array, int start)318 	public static long parseLong(byte[] array, int start){return parseLong(array, start, array.length);}
319 
320 	/**
321 	 * @param array Text
322 	 * @param a Index of first digit
323 	 * @param b Index after last digit (e.g., array.length)
324 	 * @return Parsed number
325 	 */
parseLong(byte[] array, int a, int b)326 	public static long parseLong(byte[] array, int a, int b){
327 		assert(b>a);
328 		long r=0;
329 		final byte z='0';
330 		long mult=1;
331 		if(array[a]=='-'){mult=-1; a++;}
332 		for(; a<b; a++){
333 			int x=(array[a]-z);
334 			assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
335 			r=(r*10)+x;
336 		}
337 		return r*mult;
338 	}
339 
340 	/**
341 	 * @param array Text
342 	 * @param a Index of first digit
343 	 * @param b Index after last digit (e.g., array.length)
344 	 * @return Parsed number
345 	 */
parseLong(String array, int a, int b)346 	public static long parseLong(String array, int a, int b){
347 		assert(b>a);
348 		long r=0;
349 		final byte z='0';
350 		long mult=1;
351 		if(array.charAt(a)=='-'){mult=-1; a++;}
352 		for(; a<b; a++){
353 			int x=(array.charAt(a)-z);
354 			assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
355 			r=(r*10)+x;
356 		}
357 		return r*mult;
358 	}
359 
360 
361 	//Note: clen is optional, but allows poorly-formatted input like trailing whitespace
362 	//Without clen ",,," would become {0,0,0,0}
parseLongArray(String sub)363 	public static long[] parseLongArray(String sub) {
364 		if(sub==null || sub.length()<1){return null;}
365 		long current=0;
366 //		int clen=0;
367 		LongList list=new LongList(min(8, 1+sub.length()/2));
368 		for(int i=0, len=sub.length(); i<len; i++){
369 //			System.err.println();
370 			int c=sub.charAt(i)-'0';
371 			if(c<0 || c>9){
372 //				System.err.println('A');
373 				//assert(clen>0);
374 				list.add(current);
375 				current=0;
376 //				clen=0;
377 			}else{
378 //				System.err.println('B');
379 				current=(current*10)+c;
380 //				clen++;
381 			}
382 //			System.err.println("i="+i+", c="+c+", current="+current+", list="+list);
383 		}
384 //		if(clen>0){
385 			list.add(current);
386 //		}
387 //		assert(false) : "\n'"+sub+"'\n"+Arrays.toString(list.toArray());
388 		return list.toArray();
389 	}
390 
parseZmw(String id)391 	public static int parseZmw(String id){
392 		//Example: m54283_190403_183820/4194374/919_2614
393 		//Run ID is m54283_190403_183820
394 		//zmw ID is 4194374.
395 		//Read start/stop coordinates are 919_2614
396 		int under=id.indexOf('_');
397 		int slash=id.indexOf('/');
398 		if(under<0 || slash<0){return -1;}
399 		String[] split=id.split("/");
400 		String z=split[1];
401 		return Integer.parseInt(z);
402 	}
403 
parseSymbolToCharacter(String b)404 	public static char parseSymbolToCharacter(String b){
405 		b=parseSymbol(b);
406 		while(b.length()>1 && b.charAt(0)=='\\'){
407 			b=b.substring(1);
408 		}
409 		return b.charAt(0);
410 	}
411 
parseSymbol(String b)412 	public static String parseSymbol(String b){
413 		if(b==null || b.length()<2){return b;}
414 
415 		//Convenience characters
416 		if(b.equalsIgnoreCase("space")){
417 			return " ";
418 		}else if(b.equalsIgnoreCase("tab")){
419 			return "\t";
420 		}else if(b.equalsIgnoreCase("whitespace")){
421 			return "\\s+";
422 		}else if(b.equalsIgnoreCase("pound")){
423 			return "#";
424 		}else if(b.equalsIgnoreCase("greaterthan")){
425 			return ">";
426 		}else if(b.equalsIgnoreCase("lessthan")){
427 			return "<";
428 		}else if(b.equalsIgnoreCase("equals")){
429 			return "=";
430 		}else if(b.equalsIgnoreCase("colon")){
431 			return ":";
432 		}else if(b.equalsIgnoreCase("semicolon")){
433 			return ";";
434 		}else if(b.equalsIgnoreCase("bang")){
435 			return "!";
436 		}else if(b.equalsIgnoreCase("and") || b.equalsIgnoreCase("ampersand")){
437 			return "&";
438 		}else if(b.equalsIgnoreCase("quote") || b.equalsIgnoreCase("doublequote")){
439 			return "\"";
440 		}else if(b.equalsIgnoreCase("singlequote") || b.equalsIgnoreCase("apostrophe")){
441 			return "'";
442 		}
443 
444 		//Java meta characters
445 		if(b.equalsIgnoreCase("backslash")){
446 			return "\\\\";
447 		}else if(b.equalsIgnoreCase("hat") || b.equalsIgnoreCase("caret")){
448 			return "\\^";
449 		}else if(b.equalsIgnoreCase("dollar")){
450 			return "\\$";
451 		}else if(b.equalsIgnoreCase("dot")){
452 			return "\\.";
453 		}else if(b.equalsIgnoreCase("pipe") || b.equalsIgnoreCase("or")){
454 			return "\\|";
455 		}else if(b.equalsIgnoreCase("questionmark")){
456 			return "\\?";
457 		}else if(b.equalsIgnoreCase("star") || b.equalsIgnoreCase("asterisk")){
458 			return "\\*";
459 		}else if(b.equalsIgnoreCase("plus")){
460 			return "\\+";
461 		}else if(b.equalsIgnoreCase("openparen")){
462 			return "\\(";
463 		}else if(b.equalsIgnoreCase("closeparen")){
464 			return "\\)";
465 		}else if(b.equalsIgnoreCase("opensquare")){
466 			return "\\[";
467 		}else if(b.equalsIgnoreCase("opencurly")){
468 			return "\\{";
469 		}
470 
471 		//No matches, return the literal
472 		return b;
473 	}
474 
parseRemap(String b)475 	public static byte[] parseRemap(String b){
476 		final byte[] remap;
477 		if(b==null || ("f".equalsIgnoreCase(b) || "false".equalsIgnoreCase(b))){
478 			remap=null;
479 		}else{
480 			assert((b.length()&1)==0) : "Length of remap argument must be even.  No whitespace is allowed.";
481 
482 			remap=new byte[128];
483 			for(int j=0; j<remap.length; j++){remap[j]=(byte)j;}
484 			for(int j=0; j<b.length(); j+=2){
485 				char x=b.charAt(j), y=b.charAt(j+1);
486 				remap[x]=(byte)y;
487 			}
488 		}
489 		return remap;
490 	}
491 
min(int x, int y)492 	public static final int min(int x, int y){return x<y ? x : y;}
max(int x, int y)493 	public static final int max(int x, int y){return x>y ? x : y;}
494 
495 }
496