1 package shared; 2 3 import structures.ByteBuilder; 4 import structures.LongList; 5 6 public class Parse { 7 8 parseIntKMG(String b)9 public static int parseIntKMG(String b){ 10 long x=parseKMG(b); 11 assert(x<=Integer.MAX_VALUE && x>Integer.MIN_VALUE) : "Value "+x+" is out of range for integers: "+b; 12 return (int)x; 13 } 14 parseKMG(String b)15 public static long parseKMG(String b){ 16 if(b==null){return 0;} 17 assert(b.length()>0); 18 final char c=Tools.toLowerCase(b.charAt(b.length()-1)); 19 final boolean dot=b.indexOf('.')>=0; 20 if(!dot && !Tools.isLetter(c)){return Long.parseLong(b);} 21 // if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);} 22 23 if(b.equalsIgnoreCase("big") || b.equalsIgnoreCase("inf") || b.equalsIgnoreCase("infinity") || b.equalsIgnoreCase("max") || b.equalsIgnoreCase("huge")){ 24 return Long.MAX_VALUE; 25 } 26 27 long mult=1; 28 if(Tools.isLetter(c)){ 29 if(c=='k'){mult=1000;} 30 else if(c=='m'){mult=1000000;} 31 else if(c=='g' || c=='b'){mult=1000000000;} 32 else if(c=='t'){mult=1000000000000L;} 33 else if(c=='p' || c=='q'){mult=1000000000000000L;} 34 else if(c=='e'){mult=1000000000000000000L;} 35 // else if(c=='z'){mult=1000000000000000000000L;}//Out of range 36 else if(c=='c' || c=='h'){mult=100;} 37 else if(c=='d'){mult=10;} 38 else{throw new RuntimeException(b);} 39 b=b.substring(0, b.length()-1); 40 } 41 42 //Calculate product, check for overflow, and return 43 if(!dot){ 44 long m=Long.parseLong(b); 45 long p=m*mult; 46 assert(p>=m) : p+", "+m+", "+b; 47 return p; 48 }else{ 49 double m=Double.parseDouble(b); 50 long p=(long)(m*mult); 51 assert(p>=m) : p+", "+m+", "+b; 52 return p; 53 } 54 } 55 parseKMGBinary(String b)56 public static long parseKMGBinary(String b){ 57 if(b==null){return 0;} 58 char c=Tools.toLowerCase(b.charAt(b.length()-1)); 59 boolean dot=b.indexOf('.')>=0; 60 if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);} 61 62 long mult=1; 63 if(Tools.isLetter(c)){ 64 if(c=='k'){mult=1024;} 65 else if(c=='m'){mult=1024*1024;} 66 else if(c=='g' || c=='b'){mult=1024*1024*1024;} 67 else if(c=='t'){mult=1024L*1024L*1024L*1024L;} 68 else{throw new RuntimeException(b);} 69 b=b.substring(0, b.length()-1); 70 } 71 72 if(!dot){return Long.parseLong(b)*mult;} 73 74 return (long)(Double.parseDouble(b)*mult); 75 } 76 isNumber(String s)77 public static boolean isNumber(String s){ 78 if(s==null || s.length()==0){return false;} 79 char c=s.charAt(0); 80 return Tools.isDigit(c) || c=='.' || c=='-'; 81 } 82 83 /** 84 * Parse this argument. More liberal than Boolean.parseBoolean. 85 * Null, t, true, or 1 all yield true. 86 * Everything else, including the String "null", is false. 87 * @param s Argument to parse 88 * @return boolean form 89 */ parseBoolean(String s)90 public static boolean parseBoolean(String s){ 91 if(s==null || s.length()<1){return true;} 92 if(s.length()==1){ 93 char c=Tools.toLowerCase(s.charAt(0)); 94 return c=='t' || c=='1'; 95 } 96 if(s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return false;} 97 return Boolean.parseBoolean(s); 98 } 99 parseYesNo(String s)100 public static boolean parseYesNo(String s){ 101 if(s==null || s.length()<1){return true;} 102 if(s.length()==1){ 103 char c=Tools.toLowerCase(s.charAt(0)); 104 if(c=='y'){return true;} 105 if(c=='n'){return false;} 106 throw new RuntimeException(s); 107 } 108 109 if(s.equalsIgnoreCase("yes")){return true;} 110 if(s.equalsIgnoreCase("no")){return false;} 111 if(s.equalsIgnoreCase("unknown")){return false;} //Special case for IMG database 112 113 throw new RuntimeException(s); 114 } 115 parseIntArray(String s, String regex)116 public static int[] parseIntArray(String s, String regex){ 117 if(s==null){return null;} 118 String[] split=s.split(regex); 119 int[] array=new int[split.length]; 120 for(int i=0; i<split.length; i++){ 121 array[i]=Integer.parseInt(split[i]); 122 } 123 return array; 124 } 125 parseByteArray(String s, String regex)126 public static byte[] parseByteArray(String s, String regex){ 127 if(s==null){return null;} 128 String[] split=s.split(regex); 129 byte[] array=new byte[split.length]; 130 for(int i=0; i<split.length; i++){ 131 array[i]=Byte.parseByte(split[i]); 132 } 133 return array; 134 } 135 parseIntHexDecOctBin(final String s)136 public static int parseIntHexDecOctBin(final String s){ 137 if(s==null || s.length()<1){return 0;} 138 int radix=10; 139 if(s.length()>1 && s.charAt(1)=='0'){ 140 final char c=s.charAt(1); 141 if(c=='x' || c=='X'){radix=16;} 142 else if(c=='b' || c=='B'){radix=2;} 143 else if(c=='o' || c=='O'){radix=8;} 144 } 145 return Integer.parseInt(s, radix); 146 } 147 148 /** 149 * @param array Text 150 * @param a Index of first digit 151 * @param b Index after last digit (e.g., array.length) 152 * @return Parsed number 153 */ parseFloat(byte[] array, int a, int b)154 public static float parseFloat(byte[] array, int a, int b){ 155 return (float)parseDouble(array, a, b); 156 } 157 158 /** 159 * @param array Text 160 * @param a Index of first digit 161 * @param b Index after last digit (e.g., array.length) 162 * @return Parsed number 163 */ parseDoubleSlow(byte[] array, int a, int b)164 public static double parseDoubleSlow(byte[] array, int a, int b){ 165 String s=new String(array, a, b-a); 166 return Double.parseDouble(s); 167 } 168 parseDouble(final byte[] array, final int start)169 public static double parseDouble(final byte[] array, final int start){ 170 return parseDouble(array, start, array.length); 171 } 172 173 /** 174 * @param array Text 175 * @param a0 Index of first digit 176 * @param b Index after last digit (e.g., array.length) 177 * @return Parsed number 178 */ parseDouble(final byte[] array, final int a0, final int b)179 public static double parseDouble(final byte[] array, final int a0, final int b){ 180 if(Tools.FORCE_JAVA_PARSE_DOUBLE){ 181 return Double.parseDouble(new String(array, a0, b-a0)); 182 } 183 int a=a0; 184 assert(b>a); 185 long upper=0; 186 final byte z='0'; 187 long mult=1; 188 if(array[a]=='-'){mult=-1; a++;} 189 190 for(; a<b; a++){ 191 final byte c=array[a]; 192 if(c=='.'){break;} 193 final int x=(c-z); 194 assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b; 195 upper=(upper*10)+x; 196 } 197 198 long lower=0; 199 int places=0; 200 for(a++; a<b; a++){ 201 final byte c=array[a]; 202 final int x=(c-z); 203 assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b+ 204 "\nThis function does not support exponents; if the input has an exponent, add the flag 'forceJavaParseDouble'."; 205 lower=(lower*10)+x; 206 places++; 207 } 208 209 double d=mult*(upper+lower*ByteBuilder.decimalInvMult[places]); 210 // assert(d==parseDoubleSlow(array, a0, b)) : d+", "+parseDoubleSlow(array, a0, b); 211 return d; 212 } 213 parseInt(byte[] array, int start)214 public static int parseInt(byte[] array, int start){ 215 return parseInt(array, start, array.length); 216 } 217 218 // /** 219 // * @param array Text 220 // * @param a Index of first digit 221 // * @param b Index after last digit (e.g., array.length) 222 // * @return Parsed number 223 // */ 224 // public static int parseInt(byte[] array, int a, int b){ 225 // assert(b>a); 226 // int r=0; 227 // final byte z='0'; 228 // int mult=1; 229 // if(array[a]=='-'){mult=-1; a++;} 230 // for(; a<b; a++){ 231 // int x=(array[a]-z); 232 // assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b; 233 // r=(r*10)+x; 234 // } 235 // return r*mult; 236 // } 237 238 /** 239 * Returns the int representation of a number represented in ASCII text, from position a to b. 240 * This function is much faster than creating a substring and calling Integer.parseInt() 241 * Throws Assertions rather than Exceptions for invalid input. 242 * This function does NOT detect overflows, e.g., values over 2^31-1 (Integer.MAX_VALUE). 243 * This function has no side-effects. 244 * @param array byte array containing the text to parse. 245 * @param a Index of the first digit of the number. 246 * @param b Index after the last digit (e.g., array.length). 247 * @return int representation of the parsed number. 248 * @throws Assertions rather than Exceptions for invalid input. 249 * 250 * @TODO Correctly represent Integer.MIN_VALUE 251 * @TODO Detect overflow. 252 */ parseInt(byte[] array, int a, int b)253 public static int parseInt(byte[] array, int a, int b){ 254 assert(b>a) : "The start position of the text to parse must come before the stop position: "+ 255 a+","+b+","+new String(array); 256 int r=0; //Initialize the return value to 0. 257 258 //z holds the ASCII code for 0, which is subtracted from other ASCII codes 259 //to yield the int value of a character. For example, '7'-'0'=7, 260 //because ASCII '7'=55, while ASCII '0'=48, and 55-48=7. 261 final byte z='0'; 262 263 //mult is 1 for positive numbers, or -1 for negative numbers. 264 //It will be multiplied by the unsigned result to yield the final signed result. 265 int mult=1; 266 267 //If the term starts with a minus sign, set the multiplier to -1 and increment the position. 268 if(array[a]=='-'){mult=-1; a++;} 269 270 //Iterate through every position, incrementing a, up to b (exclusive). 271 for(; a<b; a++){ 272 //x is the numeric value of the character at position a. 273 //In other words, if array[a]='7', 274 //x would be 7, not the ASCII code for '7' (which is 55). 275 int x=(array[a]-z); 276 277 //Assert that x is in the range of 0-9; otherwise, the character was not a digit. 278 //The ASCII code will be printed here because in some cases the character could be 279 //a control character (like carriage return or vertical tab or bell) which is unprintable. 280 //But if possible the character will be printed to, as well as the position, 281 //and the entire String from which the number is to be parsed. 282 assert(x<10 && x>=0) : "Non-digit character with ASCII code "+(int)array[a]+" was encountered.\n" 283 +"x="+x+"; char="+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b; 284 285 //Multiply the old value by 10, then add the new 1's digit. 286 //This is because the text is assumed to be base-10, 287 //so each subsequent character will represent 1/10th the significance of the previous character. 288 r=(r*10)+x; 289 } 290 291 //Change the unsigned value into a signed result, and return it. 292 return r*mult; 293 } 294 295 /** 296 * @param array Text 297 * @param a Index of first digit 298 * @param b Index after last digit (e.g., array.length) 299 * @return Parsed number 300 */ parseInt(String array, int a, int b)301 public static int parseInt(String array, int a, int b){ 302 // assert(false) : Character.toString(array.charAt(a)); 303 assert(b>a); 304 int r=0; 305 final byte z='0'; 306 int mult=1; 307 if(array.charAt(a)=='-'){mult=-1; a++;} 308 for(; a<b; a++){ 309 int x=(array.charAt(a)-z); 310 assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b; 311 r=(r*10)+x; 312 } 313 return r*mult; 314 } 315 parseLong(byte[] array)316 public static long parseLong(byte[] array){return parseLong(array, 0, array.length);} 317 parseLong(byte[] array, int start)318 public static long parseLong(byte[] array, int start){return parseLong(array, start, array.length);} 319 320 /** 321 * @param array Text 322 * @param a Index of first digit 323 * @param b Index after last digit (e.g., array.length) 324 * @return Parsed number 325 */ parseLong(byte[] array, int a, int b)326 public static long parseLong(byte[] array, int a, int b){ 327 assert(b>a); 328 long r=0; 329 final byte z='0'; 330 long mult=1; 331 if(array[a]=='-'){mult=-1; a++;} 332 for(; a<b; a++){ 333 int x=(array[a]-z); 334 assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b; 335 r=(r*10)+x; 336 } 337 return r*mult; 338 } 339 340 /** 341 * @param array Text 342 * @param a Index of first digit 343 * @param b Index after last digit (e.g., array.length) 344 * @return Parsed number 345 */ parseLong(String array, int a, int b)346 public static long parseLong(String array, int a, int b){ 347 assert(b>a); 348 long r=0; 349 final byte z='0'; 350 long mult=1; 351 if(array.charAt(a)=='-'){mult=-1; a++;} 352 for(; a<b; a++){ 353 int x=(array.charAt(a)-z); 354 assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b; 355 r=(r*10)+x; 356 } 357 return r*mult; 358 } 359 360 361 //Note: clen is optional, but allows poorly-formatted input like trailing whitespace 362 //Without clen ",,," would become {0,0,0,0} parseLongArray(String sub)363 public static long[] parseLongArray(String sub) { 364 if(sub==null || sub.length()<1){return null;} 365 long current=0; 366 // int clen=0; 367 LongList list=new LongList(min(8, 1+sub.length()/2)); 368 for(int i=0, len=sub.length(); i<len; i++){ 369 // System.err.println(); 370 int c=sub.charAt(i)-'0'; 371 if(c<0 || c>9){ 372 // System.err.println('A'); 373 //assert(clen>0); 374 list.add(current); 375 current=0; 376 // clen=0; 377 }else{ 378 // System.err.println('B'); 379 current=(current*10)+c; 380 // clen++; 381 } 382 // System.err.println("i="+i+", c="+c+", current="+current+", list="+list); 383 } 384 // if(clen>0){ 385 list.add(current); 386 // } 387 // assert(false) : "\n'"+sub+"'\n"+Arrays.toString(list.toArray()); 388 return list.toArray(); 389 } 390 parseZmw(String id)391 public static int parseZmw(String id){ 392 //Example: m54283_190403_183820/4194374/919_2614 393 //Run ID is m54283_190403_183820 394 //zmw ID is 4194374. 395 //Read start/stop coordinates are 919_2614 396 int under=id.indexOf('_'); 397 int slash=id.indexOf('/'); 398 if(under<0 || slash<0){return -1;} 399 String[] split=id.split("/"); 400 String z=split[1]; 401 return Integer.parseInt(z); 402 } 403 parseSymbolToCharacter(String b)404 public static char parseSymbolToCharacter(String b){ 405 b=parseSymbol(b); 406 while(b.length()>1 && b.charAt(0)=='\\'){ 407 b=b.substring(1); 408 } 409 return b.charAt(0); 410 } 411 parseSymbol(String b)412 public static String parseSymbol(String b){ 413 if(b==null || b.length()<2){return b;} 414 415 //Convenience characters 416 if(b.equalsIgnoreCase("space")){ 417 return " "; 418 }else if(b.equalsIgnoreCase("tab")){ 419 return "\t"; 420 }else if(b.equalsIgnoreCase("whitespace")){ 421 return "\\s+"; 422 }else if(b.equalsIgnoreCase("pound")){ 423 return "#"; 424 }else if(b.equalsIgnoreCase("greaterthan")){ 425 return ">"; 426 }else if(b.equalsIgnoreCase("lessthan")){ 427 return "<"; 428 }else if(b.equalsIgnoreCase("equals")){ 429 return "="; 430 }else if(b.equalsIgnoreCase("colon")){ 431 return ":"; 432 }else if(b.equalsIgnoreCase("semicolon")){ 433 return ";"; 434 }else if(b.equalsIgnoreCase("bang")){ 435 return "!"; 436 }else if(b.equalsIgnoreCase("and") || b.equalsIgnoreCase("ampersand")){ 437 return "&"; 438 }else if(b.equalsIgnoreCase("quote") || b.equalsIgnoreCase("doublequote")){ 439 return "\""; 440 }else if(b.equalsIgnoreCase("singlequote") || b.equalsIgnoreCase("apostrophe")){ 441 return "'"; 442 } 443 444 //Java meta characters 445 if(b.equalsIgnoreCase("backslash")){ 446 return "\\\\"; 447 }else if(b.equalsIgnoreCase("hat") || b.equalsIgnoreCase("caret")){ 448 return "\\^"; 449 }else if(b.equalsIgnoreCase("dollar")){ 450 return "\\$"; 451 }else if(b.equalsIgnoreCase("dot")){ 452 return "\\."; 453 }else if(b.equalsIgnoreCase("pipe") || b.equalsIgnoreCase("or")){ 454 return "\\|"; 455 }else if(b.equalsIgnoreCase("questionmark")){ 456 return "\\?"; 457 }else if(b.equalsIgnoreCase("star") || b.equalsIgnoreCase("asterisk")){ 458 return "\\*"; 459 }else if(b.equalsIgnoreCase("plus")){ 460 return "\\+"; 461 }else if(b.equalsIgnoreCase("openparen")){ 462 return "\\("; 463 }else if(b.equalsIgnoreCase("closeparen")){ 464 return "\\)"; 465 }else if(b.equalsIgnoreCase("opensquare")){ 466 return "\\["; 467 }else if(b.equalsIgnoreCase("opencurly")){ 468 return "\\{"; 469 } 470 471 //No matches, return the literal 472 return b; 473 } 474 parseRemap(String b)475 public static byte[] parseRemap(String b){ 476 final byte[] remap; 477 if(b==null || ("f".equalsIgnoreCase(b) || "false".equalsIgnoreCase(b))){ 478 remap=null; 479 }else{ 480 assert((b.length()&1)==0) : "Length of remap argument must be even. No whitespace is allowed."; 481 482 remap=new byte[128]; 483 for(int j=0; j<remap.length; j++){remap[j]=(byte)j;} 484 for(int j=0; j<b.length(); j+=2){ 485 char x=b.charAt(j), y=b.charAt(j+1); 486 remap[x]=(byte)y; 487 } 488 } 489 return remap; 490 } 491 min(int x, int y)492 public static final int min(int x, int y){return x<y ? x : y;} max(int x, int y)493 public static final int max(int x, int y){return x>y ? x : y;} 494 495 } 496