1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.util; 22 23 import java.io.UnsupportedEncodingException; 24 import java.net.URLEncoder; 25 import java.util.ArrayList; 26 import java.util.List; 27 import java.util.regex.Pattern; 28 29 public class StringUtils 30 { 31 private static final Pattern DELIMITERS_PATTERN = Pattern 32 .compile(".*='[^']*(?!')"); 33 34 private static final char PERCENT = '%'; 35 36 private static final boolean DEBUG = false; 37 38 /* 39 * URL encoded characters, indexed by char value 40 * e.g. urlEncodings['='] = urlEncodings[61] = "%3D" 41 */ 42 private static String[] urlEncodings = new String[255]; 43 44 /** 45 * Returns a new character array, after inserting characters into the given 46 * character array. 47 * 48 * @param in 49 * the character array to insert into 50 * @param position 51 * the 0-based position for insertion 52 * @param count 53 * the number of characters to insert 54 * @param ch 55 * the character to insert 56 */ insertCharAt(char[] in, int position, int count, char ch)57 public static final char[] insertCharAt(char[] in, int position, 58 int count, char ch) 59 { 60 char[] tmp = new char[in.length + count]; 61 62 if (position >= in.length) 63 { 64 System.arraycopy(in, 0, tmp, 0, in.length); 65 position = in.length; 66 } 67 else 68 { 69 System.arraycopy(in, 0, tmp, 0, position); 70 } 71 72 int index = position; 73 while (count > 0) 74 { 75 tmp[index++] = ch; 76 count--; 77 } 78 79 if (position < in.length) 80 { 81 System.arraycopy(in, position, tmp, index, in.length - position); 82 } 83 84 return tmp; 85 } 86 87 /** 88 * Delete 89 * 90 * @param in 91 * @param from 92 * @param to 93 * @return 94 */ deleteChars(char[] in, int from, int to)95 public static final char[] deleteChars(char[] in, int from, int to) 96 { 97 if (from >= in.length || from < 0) 98 { 99 return in; 100 } 101 102 char[] tmp; 103 104 if (to >= in.length) 105 { 106 tmp = new char[from]; 107 System.arraycopy(in, 0, tmp, 0, from); 108 to = in.length; 109 } 110 else 111 { 112 tmp = new char[in.length - to + from]; 113 System.arraycopy(in, 0, tmp, 0, from); 114 System.arraycopy(in, to, tmp, from, in.length - to); 115 } 116 return tmp; 117 } 118 119 /** 120 * Returns the last part of 'input' after the last occurrence of 'token'. For 121 * example to extract only the filename from a full path or URL. 122 * 123 * @param input 124 * @param token 125 * a delimiter which must be in regular expression format 126 * @return 127 */ getLastToken(String input, String token)128 public static String getLastToken(String input, String token) 129 { 130 if (input == null) 131 { 132 return null; 133 } 134 if (token == null) 135 { 136 return input; 137 } 138 String[] st = input.split(token); 139 return st[st.length - 1]; 140 } 141 142 /** 143 * Parses the input string into components separated by the delimiter. Unlike 144 * String.split(), this method will ignore occurrences of the delimiter which 145 * are nested within single quotes in name-value pair values, e.g. a='b,c'. 146 * 147 * @param input 148 * @param delimiter 149 * @return elements separated by separator 150 */ separatorListToArray(String input, String delimiter)151 public static String[] separatorListToArray(String input, 152 String delimiter) 153 { 154 int seplen = delimiter.length(); 155 if (input == null || input.equals("") || input.equals(delimiter)) 156 { 157 return null; 158 } 159 List<String> jv = new ArrayList<>(); 160 int cp = 0, pos, escape; 161 boolean wasescaped = false, wasquoted = false; 162 String lstitem = null; 163 while ((pos = input.indexOf(delimiter, cp)) >= cp) 164 { 165 escape = (pos > 0 && input.charAt(pos - 1) == '\\') ? -1 : 0; 166 if (wasescaped || wasquoted) 167 { 168 // append to previous pos 169 jv.set(jv.size() - 1, lstitem = lstitem + delimiter 170 + input.substring(cp, pos + escape)); 171 } 172 else 173 { 174 jv.add(lstitem = input.substring(cp, pos + escape)); 175 } 176 cp = pos + seplen; 177 wasescaped = escape == -1; 178 // last separator may be in an unmatched quote 179 wasquoted = DELIMITERS_PATTERN.matcher(lstitem).matches(); 180 } 181 if (cp < input.length()) 182 { 183 String c = input.substring(cp); 184 if (wasescaped || wasquoted) 185 { 186 // append final separator 187 jv.set(jv.size() - 1, lstitem + delimiter + c); 188 } 189 else 190 { 191 if (!c.equals(delimiter)) 192 { 193 jv.add(c); 194 } 195 } 196 } 197 if (jv.size() > 0) 198 { 199 String[] v = jv.toArray(new String[jv.size()]); 200 jv.clear(); 201 if (DEBUG) 202 { 203 System.err.println("Array from '" + delimiter 204 + "' separated List:\n" + v.length); 205 for (int i = 0; i < v.length; i++) 206 { 207 System.err.println("item " + i + " '" + v[i] + "'"); 208 } 209 } 210 return v; 211 } 212 if (DEBUG) 213 { 214 System.err.println( 215 "Empty Array from '" + delimiter + "' separated List"); 216 } 217 return null; 218 } 219 220 /** 221 * Returns a string which contains the list elements delimited by the 222 * separator. Null items are ignored. If the input is null or has length zero, 223 * a single delimiter is returned. 224 * 225 * @param list 226 * @param separator 227 * @return concatenated string 228 */ arrayToSeparatorList(String[] list, String separator)229 public static String arrayToSeparatorList(String[] list, String separator) 230 { 231 StringBuffer v = new StringBuffer(); 232 if (list != null && list.length > 0) 233 { 234 for (int i = 0, iSize = list.length; i < iSize; i++) 235 { 236 if (list[i] != null) 237 { 238 if (v.length() > 0) 239 { 240 v.append(separator); 241 } 242 // TODO - escape any separator values in list[i] 243 v.append(list[i]); 244 } 245 } 246 if (DEBUG) 247 { 248 System.err 249 .println("Returning '" + separator + "' separated List:\n"); 250 System.err.println(v); 251 } 252 return v.toString(); 253 } 254 if (DEBUG) 255 { 256 System.err.println( 257 "Returning empty '" + separator + "' separated List\n"); 258 } 259 return "" + separator; 260 } 261 262 /** 263 * Converts a list to a string with a delimiter before each term except the 264 * first. Returns an empty string given a null or zero-length argument. This 265 * can be replaced with StringJoiner in Java 8. 266 * 267 * @param terms 268 * @param delim 269 * @return 270 */ listToDelimitedString(List<String> terms, String delim)271 public static String listToDelimitedString(List<String> terms, 272 String delim) 273 { 274 StringBuilder sb = new StringBuilder(32); 275 if (terms != null && !terms.isEmpty()) 276 { 277 boolean appended = false; 278 for (String term : terms) 279 { 280 if (appended) 281 { 282 sb.append(delim); 283 } 284 appended = true; 285 sb.append(term); 286 } 287 } 288 return sb.toString(); 289 } 290 291 /** 292 * Convenience method to parse a string to an integer, returning 0 if the 293 * input is null or not a valid integer 294 * 295 * @param s 296 * @return 297 */ parseInt(String s)298 public static int parseInt(String s) 299 { 300 int result = 0; 301 if (s != null && s.length() > 0) 302 { 303 try 304 { 305 result = Integer.parseInt(s); 306 } catch (NumberFormatException ex) 307 { 308 } 309 } 310 return result; 311 } 312 313 /** 314 * Compares two versions formatted as e.g. "3.4.5" and returns -1, 0 or 1 as 315 * the first version precedes, is equal to, or follows the second 316 * 317 * @param v1 318 * @param v2 319 * @return 320 */ compareVersions(String v1, String v2)321 public static int compareVersions(String v1, String v2) 322 { 323 return compareVersions(v1, v2, null); 324 } 325 326 /** 327 * Compares two versions formatted as e.g. "3.4.5b1" and returns -1, 0 or 1 as 328 * the first version precedes, is equal to, or follows the second 329 * 330 * @param v1 331 * @param v2 332 * @param pointSeparator 333 * a string used to delimit point increments in sub-tokens of the 334 * version 335 * @return 336 */ compareVersions(String v1, String v2, String pointSeparator)337 public static int compareVersions(String v1, String v2, 338 String pointSeparator) 339 { 340 if (v1 == null || v2 == null) 341 { 342 return 0; 343 } 344 String[] toks1 = v1.split("\\."); 345 String[] toks2 = v2.split("\\."); 346 int i = 0; 347 for (; i < toks1.length; i++) 348 { 349 if (i >= toks2.length) 350 { 351 /* 352 * extra tokens in v1 353 */ 354 return 1; 355 } 356 String tok1 = toks1[i]; 357 String tok2 = toks2[i]; 358 if (pointSeparator != null) 359 { 360 /* 361 * convert e.g. 5b2 into decimal 5.2 for comparison purposes 362 */ 363 tok1 = tok1.replace(pointSeparator, "."); 364 tok2 = tok2.replace(pointSeparator, "."); 365 } 366 try 367 { 368 float f1 = Float.valueOf(tok1); 369 float f2 = Float.valueOf(tok2); 370 int comp = Float.compare(f1, f2); 371 if (comp != 0) 372 { 373 return comp; 374 } 375 } catch (NumberFormatException e) 376 { 377 System.err 378 .println("Invalid version format found: " + e.getMessage()); 379 return 0; 380 } 381 } 382 383 if (i < toks2.length) 384 { 385 /* 386 * extra tokens in v2 387 */ 388 return -1; 389 } 390 391 /* 392 * same length, all tokens match 393 */ 394 return 0; 395 } 396 397 /** 398 * Converts the string to all lower-case except the first character which is 399 * upper-cased 400 * 401 * @param s 402 * @return 403 */ toSentenceCase(String s)404 public static String toSentenceCase(String s) 405 { 406 if (s == null) 407 { 408 return s; 409 } 410 if (s.length() <= 1) 411 { 412 return s.toUpperCase(); 413 } 414 return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); 415 } 416 417 /** 418 * A helper method that strips off any leading or trailing html and body tags. 419 * If no html tag is found, then also html-encodes angle bracket characters. 420 * 421 * @param text 422 * @return 423 */ stripHtmlTags(String text)424 public static String stripHtmlTags(String text) 425 { 426 if (text == null) 427 { 428 return null; 429 } 430 String tmp2up = text.toUpperCase(); 431 int startTag = tmp2up.indexOf("<HTML>"); 432 if (startTag > -1) 433 { 434 text = text.substring(startTag + 6); 435 tmp2up = tmp2up.substring(startTag + 6); 436 } 437 // is omission of "<BODY>" intentional here?? 438 int endTag = tmp2up.indexOf("</BODY>"); 439 if (endTag > -1) 440 { 441 text = text.substring(0, endTag); 442 tmp2up = tmp2up.substring(0, endTag); 443 } 444 endTag = tmp2up.indexOf("</HTML>"); 445 if (endTag > -1) 446 { 447 text = text.substring(0, endTag); 448 } 449 450 if (startTag == -1 && (text.contains("<") || text.contains(">"))) 451 { 452 text = text.replaceAll("<", "<"); 453 text = text.replaceAll(">", ">"); 454 } 455 return text; 456 } 457 458 /** 459 * Answers the input string with any occurrences of the 'encodeable' characters 460 * replaced by their URL encoding 461 * 462 * @param s 463 * @param encodable 464 * @return 465 */ urlEncode(String s, String encodable)466 public static String urlEncode(String s, String encodable) 467 { 468 if (s == null || s.isEmpty()) 469 { 470 return s; 471 } 472 473 /* 474 * do % encoding first, as otherwise it may double-encode! 475 */ 476 if (encodable.indexOf(PERCENT) != -1) 477 { 478 s = urlEncode(s, PERCENT); 479 } 480 481 for (char c : encodable.toCharArray()) 482 { 483 if (c != PERCENT) 484 { 485 s = urlEncode(s, c); 486 } 487 } 488 return s; 489 } 490 491 /** 492 * Answers the input string with any occurrences of {@code c} replaced with 493 * their url encoding. Answers the input string if it is unchanged. 494 * 495 * @param s 496 * @param c 497 * @return 498 */ urlEncode(String s, char c)499 static String urlEncode(String s, char c) 500 { 501 String decoded = String.valueOf(c); 502 if (s.indexOf(decoded) != -1) 503 { 504 String encoded = getUrlEncoding(c); 505 if (!encoded.equals(decoded)) 506 { 507 s = s.replace(decoded, encoded); 508 } 509 } 510 return s; 511 } 512 513 /** 514 * Answers the input string with any occurrences of the specified (unencoded) 515 * characters replaced by their URL decoding. 516 * <p> 517 * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer 518 * {@code "a=b;c"}. 519 * 520 * @param s 521 * @param encodable 522 * @return 523 */ urlDecode(String s, String encodable)524 public static String urlDecode(String s, String encodable) 525 { 526 if (s == null || s.isEmpty()) 527 { 528 return s; 529 } 530 531 for (char c : encodable.toCharArray()) 532 { 533 String encoded = getUrlEncoding(c); 534 if (s.indexOf(encoded) != -1) 535 { 536 String decoded = String.valueOf(c); 537 s = s.replace(encoded, decoded); 538 } 539 } 540 return s; 541 } 542 543 /** 544 * Does a lazy lookup of the url encoding of the given character, saving the 545 * value for repeat lookups 546 * 547 * @param c 548 * @return 549 */ getUrlEncoding(char c)550 private static String getUrlEncoding(char c) 551 { 552 if (c < 0 || c >= urlEncodings.length) 553 { 554 return String.valueOf(c); 555 } 556 557 String enc = urlEncodings[c]; 558 if (enc == null) 559 { 560 try 561 { 562 enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c), 563 "UTF-8"); 564 } catch (UnsupportedEncodingException e) 565 { 566 enc = urlEncodings[c] = String.valueOf(c); 567 } 568 } 569 return enc; 570 } 571 } 572