1 package shared; 2 3 import java.io.File; 4 import java.io.PrintStream; 5 import java.io.PrintWriter; 6 import java.io.StringWriter; 7 import java.util.ArrayList; 8 import java.util.Arrays; 9 import java.util.BitSet; 10 import java.util.Collection; 11 import java.util.HashSet; 12 import java.util.LinkedHashSet; 13 import java.util.Locale; 14 import java.util.Random; 15 import java.util.concurrent.atomic.AtomicIntegerArray; 16 import java.util.concurrent.atomic.AtomicLongArray; 17 import java.util.regex.Pattern; 18 19 import dna.AminoAcid; 20 import dna.Data; 21 import fileIO.ByteFile; 22 import fileIO.FileFormat; 23 import fileIO.ReadWrite; 24 import sketch.Sketch; 25 import stream.Read; 26 import stream.ReadInputStream; 27 import stream.SamLine; 28 import stream.SiteScore; 29 import structures.CoverageArray; 30 import structures.IntHashSet; 31 import structures.IntList; 32 33 public final class Tools { 34 main(String[] args)35 public static void main(String[] args){ 36 37 long[] array=new long[1000]; 38 for(int i=0; i<10; i++){ 39 for(int j=0; j<array.length; j++){ 40 array[j]=(int)Math.round((Math.random()*100)); 41 } 42 System.err.println(String.format(Locale.ROOT, "%.2f",weightedAverage(array))); 43 System.err.println(String.format(Locale.ROOT, "%.2f",mean(array))); 44 // System.err.println(String.format(Locale.ROOT, "%.2f",median(array))); 45 Arrays.sort(array); 46 System.err.println(String.format(Locale.ROOT, "%.2f",weightedAverage(array))); 47 System.err.println(String.format(Locale.ROOT, "%.2f",mean(array))); 48 // System.err.println(Arrays.toString(array)); 49 System.err.println("\n"); 50 } 51 52 } 53 loadIntSet(String numbers)54 public static IntHashSet loadIntSet(String numbers){ 55 IntList list=loadIntegers(numbers); 56 if(list==null || list.isEmpty()){return null;} 57 IntHashSet set=new IntHashSet((int)((list.size*5L)/4)); 58 set.addAll(list); 59 return set; 60 } 61 62 /** 63 * Loads integers. 64 * Can be comma-delimited, or a comma-delimited list of files. 65 * Files should contain one integer per line. 66 * @param numbers 67 * @return 68 */ loadIntegers(String numbers)69 public static IntList loadIntegers(String numbers){ 70 return loadIntegers(numbers, null); 71 } 72 loadIntegers(String numbers, IntList list)73 private static IntList loadIntegers(String numbers, IntList list){ 74 if(numbers==null){return list;} 75 if(list==null){list=new IntList();} 76 String[] array=commaPattern.split(numbers); 77 for(String s : array){ 78 if(Tools.isDigit(s.charAt(0)) && Tools.isDigit(s.charAt(s.length()-1))){ 79 final int x=Integer.parseInt(s); 80 list.add(x); 81 }else if(new File(s).exists()){ 82 ByteFile tf=ByteFile.makeByteFile(s, false); 83 for(byte[] line=tf.nextLine(); line!=null; line=tf.nextLine()){ 84 try { 85 list.add(Parse.parseInt(line, 0)); 86 } catch (Throwable e) {//In case there is a comma-delimited line in the file 87 loadIntegers(new String(line), list); 88 } 89 } 90 // } 91 }else{ 92 assert(false) : "Invalid number or file "+s; 93 } 94 } 95 return list; 96 } 97 existsInput(String fname)98 public static boolean existsInput(String fname) { 99 if(fname==null){return false;} 100 if(fname.equalsIgnoreCase("stdin") || fname.startsWith("stdin.")){return true;} 101 File f=new File(fname); 102 return f.exists(); 103 } 104 105 /** Checks for permission to read files, and input name collisions. */ testOutputFiles(boolean overwrite, boolean append, boolean allowDuplicates, ArrayList<String>...args)106 public static boolean testOutputFiles(boolean overwrite, boolean append, boolean allowDuplicates, ArrayList<String>...args){ 107 if(args==null || args.length==0){return true;} 108 ArrayList<String> list=new ArrayList<String>(); 109 for(ArrayList<String> als : args){ 110 if(als!=null){ 111 list.addAll(als); 112 } 113 } 114 return testOutputFiles(overwrite, append, allowDuplicates, list.toArray(new String[list.size()])); 115 } 116 117 /** Checks for permission to overwrite files, and output name collisions. */ testOutputFiles(boolean overwrite, boolean append, boolean allowDuplicates, String...args)118 public static boolean testOutputFiles(boolean overwrite, boolean append, boolean allowDuplicates, String...args){ 119 if(args==null || args.length==0){return true;} 120 HashSet<String> set=new HashSet<String>(args.length*2); 121 int terms=0; 122 for(String s : args){ 123 if(s!=null){ 124 if(isOutputFileName(s)){ 125 terms++; 126 127 if(!overwrite && !append && new File(s).exists()){ 128 assert(overwrite) : "File "+s+" exists and overwrite=false"; 129 return false; 130 } 131 132 if(!allowDuplicates && set.contains(s)){ 133 assert(false) : "Duplicate file "+s+" was specified for multiple output streams."; 134 return false; 135 } 136 137 set.add(s); 138 } 139 } 140 } 141 return true; 142 } 143 144 /** Checks for permission to read files, and input name collisions. */ testInputFiles(boolean allowDuplicates, boolean throwException, ArrayList<String>...args)145 public static boolean testInputFiles(boolean allowDuplicates, boolean throwException, ArrayList<String>...args){ 146 if(args==null || args.length==0){return true;} 147 ArrayList<String> list=new ArrayList<String>(); 148 for(ArrayList<String> als : args){ 149 if(als!=null){ 150 list.addAll(als); 151 } 152 } 153 return testInputFiles(allowDuplicates, throwException, list.toArray(new String[list.size()])); 154 } 155 156 /** Checks for permission to read files, and input name collisions. */ testInputFiles(boolean allowDuplicates, boolean throwException, String[]...args)157 public static boolean testInputFiles(boolean allowDuplicates, boolean throwException, String[]...args){ 158 if(args==null || args.length==0){return true;} 159 for(String[] s : args){ 160 if(!testInputFiles(allowDuplicates, throwException, s)){return false;} 161 } 162 return true; 163 } 164 165 /** Checks for permission to read files, and input name collisions. */ testInputFilesALA(boolean allowDuplicates, boolean throwException, ArrayList<String> list1, ArrayList<String> list2, String...args)166 public static boolean testInputFilesALA(boolean allowDuplicates, boolean throwException, ArrayList<String> list1, ArrayList<String> list2, String...args){ 167 ArrayList<String> list3=new ArrayList<String>(); 168 if(list1!=null){list3.addAll(list1);} 169 if(list2!=null){list3.addAll(list2);} 170 if(args!=null){for(String s : args){list3.add(s);}} 171 return testInputFiles(allowDuplicates, throwException, list3.toArray(new String[0])); 172 } 173 174 /** Checks for permission to read files, and input name collisions. */ testInputFiles(boolean allowDuplicates, boolean throwException, String...args)175 public static boolean testInputFiles(boolean allowDuplicates, boolean throwException, String...args){ 176 if(args==null || args.length==0){return true;} 177 HashSet<String> set=new HashSet<String>(args.length*2); 178 int terms=0; 179 for(String s : args){ 180 if(s!=null){ 181 String s2=s.toLowerCase(); 182 if(canRead(s)){ 183 terms++; 184 }else{ 185 if(throwException){throw new RuntimeException("Can't read file '"+s+"'");} 186 return false; 187 } 188 189 if(!allowDuplicates && set.contains(s2)){ 190 if(throwException){throw new RuntimeException("Duplicate file "+s+" was specified for multiple input streams.");} 191 return false; 192 } 193 194 set.add(s2); 195 } 196 } 197 return true; 198 } 199 200 /** Checks for permission to overwrite files, and output name collisions. */ testForDuplicateFilesALA(boolean throwException, ArrayList<String> list1, ArrayList<String> list2, String...args)201 public static boolean testForDuplicateFilesALA(boolean throwException, ArrayList<String> list1, ArrayList<String> list2, String...args){ 202 ArrayList<String> list3=new ArrayList<String>(); 203 if(list1!=null){list3.addAll(list1);} 204 if(list2!=null){list3.addAll(list2);} 205 if(args!=null){for(String s : args){list3.add(s);}} 206 return testForDuplicateFiles(throwException, list3.toArray(new String[0])); 207 } 208 209 /** Checks for permission to overwrite files, and output name collisions. 210 * @return True if no problems are detected */ testForDuplicateFiles(boolean throwException, String...args)211 public static boolean testForDuplicateFiles(boolean throwException, String...args){ 212 if(args==null || args.length==0){return true;} 213 HashSet<String> set=new HashSet<String>(args.length*2); 214 int terms=0; 215 for(String s0 : args){ 216 if(s0!=null){ 217 String s=s0.toLowerCase(); 218 terms++; 219 if(set.contains(s) && !s.equals("stdout") && !s.startsWith("stdout.") && !s.equals("stderr") && !s.startsWith("stderr.")){ 220 if(throwException){throw new RuntimeException("File '"+s0+"' was specified multiple times.");} 221 return false; 222 } 223 set.add(s); 224 } 225 } 226 return true; 227 } 228 canWrite(String s, boolean overwrite)229 public static final boolean canWrite(String s, boolean overwrite){ 230 if(isNullFileName(s) || isSpecialOutputName(s)){return true;} 231 File f=new File(s); 232 if(f.exists()){return overwrite && f.canWrite();} 233 return true; 234 } 235 236 // public static final boolean outputDestinationExists(String s){ 237 // if(isNullFileName(s)){return false;} 238 // if(isSpecialOutputName(s)){return false;} 239 // File f=new File(s); 240 // return f.exists(); 241 // } 242 isOutputFileName(String s)243 public static final boolean isOutputFileName(String s){ 244 return !(isNullFileName(s) || isSpecialOutputName(s)); 245 } 246 isNullFileName(String s)247 public static final boolean isNullFileName(String s){ 248 if(s==null || s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return true;} 249 for(int i=0; i<s.length(); i++){ 250 if(!Character.isWhitespace(s.charAt(i))){return false;} 251 } 252 return true; 253 } 254 isSpecialOutputName(String s)255 public static final boolean isSpecialOutputName(String s){ 256 if(s==null){return false;} 257 s=s.toLowerCase(); 258 return s.equals("stdout") || s.equals("stderr") || s.equals("standardout") || s.equals("standarderr") 259 || s.equals("/dev/null") || s.startsWith("stdout.") || s.startsWith("stderr."); 260 } 261 isSpecialInputName(String s)262 public static final boolean isSpecialInputName(String s){ 263 if(s==null){return false;} 264 s=s.toLowerCase(); 265 return s.equals("stdin") || s.equals("standardin") || s.startsWith("stdin.") || s.startsWith("jar:"); 266 } 267 canRead(String s)268 public static final boolean canRead(String s){ 269 if(s==null){return false;} 270 if(isSpecialInputName(s)){return true;} 271 File f=new File(s); 272 return f.canRead(); 273 } 274 addFiles(String b, ArrayList<String> list)275 public static boolean addFiles(String b, ArrayList<String> list){ 276 if(b==null){ 277 list.clear(); 278 return true; 279 } 280 281 boolean added=false; 282 boolean failed=false; 283 if(b.indexOf(',')>0){ 284 for(String s : b.split(",")){ 285 boolean x=addFiles(s, list); 286 added|=x; 287 failed|=!x; 288 } 289 }else{ 290 String s=b; 291 if(b.indexOf('@')>0 && !new File(b).exists()){s=b.split("@")[0];} 292 if(new File(s).exists() || s.equalsIgnoreCase("stdin") || s.toLowerCase().startsWith("stdin.")){ 293 list.add(b); 294 return true; 295 }else{ 296 return false; 297 } 298 } 299 return added&!failed; 300 } 301 fill(int[] target, int[] source)302 public static void fill(int[] target, int[] source) { 303 for(int i=0; i<target.length; i++){ 304 target[i]=source[i]; 305 } 306 } 307 isSorted(final int[] array)308 public static boolean isSorted(final int[] array) { 309 if(array==null || array.length<2){return true;} 310 for(int i=1; i<array.length; i++){ 311 if(array[i]<array[i-1]){return false;} 312 } 313 return true; 314 } 315 fixExtension(String[] fnames)316 public static String[] fixExtension(String[] fnames){ 317 if(!Shared.FIX_EXTENSIONS){return fnames;} 318 if(fnames==null){return fnames;} 319 for(int i=0; i<fnames.length; i++){ 320 fnames[i]=fixExtension(fnames[i]); 321 } 322 return fnames; 323 } 324 fixExtension(ArrayList<String> fnames)325 public static ArrayList<String> fixExtension(ArrayList<String> fnames){ 326 if(!Shared.FIX_EXTENSIONS){return fnames;} 327 if(fnames==null){return fnames;} 328 for(int i=0; i<fnames.size(); i++){ 329 fnames.set(i,fixExtension(fnames.get(i))); 330 } 331 return fnames; 332 } 333 fixExtension(String fname)334 public static String fixExtension(String fname){ 335 if(!Shared.FIX_EXTENSIONS){return fname;} 336 if(fname==null || fname.startsWith("stdin") || new File(fname).exists()){return fname;} 337 final String[] suffixes=new String[] {".gz", ".bz2"}; 338 for(String suffix : suffixes){ 339 if(fname.endsWith(suffix)){ 340 String sub=fname.substring(0, fname.length()-suffix.length()); 341 if(new File(sub).exists()){return sub;} 342 else{return fname;} 343 } 344 } 345 for(String suffix : suffixes){ 346 String s=fname+suffix; 347 if(new File(s).exists()){return s;} 348 } 349 return fname; 350 } 351 padRight(long number, int pad)352 public static String padRight(long number, int pad) { 353 String s=number+""; 354 while(s.length()<pad){s=s+" ";} 355 return s; 356 } 357 padRight(String s, int pad)358 public static String padRight(String s, int pad) { 359 while(s.length()<pad){s=s+" ";} 360 return s; 361 } 362 padLeft(long number, int pad)363 public static String padLeft(long number, int pad) { 364 String s=number+""; 365 while(s.length()<pad){s=" "+s;} 366 return s; 367 } 368 padLeft(String s, int pad)369 public static String padLeft(String s, int pad) { 370 while(s.length()<pad){s=" "+s;} 371 return s; 372 } 373 padKM(long number, int pad)374 public static String padKM(long number, int pad) { 375 String s; 376 if(Shared.OUTPUT_KMG){ 377 s=(number<100000 ? ""+number : number<100000000 ? (number/1000)+"k" : (number/1000000)+"m"); 378 }else{ 379 s=""+number; 380 } 381 while(s.length()<pad){s=" "+s;} 382 return s; 383 } 384 timeReadsBasesProcessed(Timer t, long reads, long bases, int pad)385 public static String timeReadsBasesProcessed(Timer t, long reads, long bases, int pad){ 386 return time(t, pad)+"\n"+readsBasesProcessed(t.elapsed, reads, bases, pad); 387 } 388 timeZMWsReadsBasesProcessed(Timer t, long ZMWs, long reads, long bases, int pad)389 public static String timeZMWsReadsBasesProcessed(Timer t, long ZMWs, long reads, long bases, int pad){ 390 return time(t, pad)+"\n"+ZMWsReadsBasesProcessed(t.elapsed, ZMWs, reads, bases, pad); 391 } 392 timeQueriesComparisonsProcessed(Timer t, long x, long y, int pad)393 public static String timeQueriesComparisonsProcessed(Timer t, long x, long y, int pad){ 394 return time(t, pad)+"\n"+queriesComparisonsProcessed(t.elapsed, x, y, pad); 395 } 396 time(Timer t, int pad)397 public static String time(Timer t, int pad){ 398 return ("Time: \t"+t); 399 } 400 readsBasesProcessed(long elapsed, long reads, long bases, int pad)401 public static String readsBasesProcessed(long elapsed, long reads, long bases, int pad){ 402 double rpnano=reads/(double)elapsed; 403 double bpnano=bases/(double)elapsed; 404 405 String rstring=padKM(reads, pad); 406 String bstring=padKM(bases, pad); 407 StringBuilder sb=new StringBuilder(); 408 sb.append("Reads Processed: ").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk reads/sec", rpnano*1000000)).append('\n'); 409 sb.append("Bases Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bases/sec", bpnano*1000)); 410 return sb.toString(); 411 } 412 things(String things, long amt, int pad)413 public static String things(String things, long amt, int pad){ 414 String rstring=padKM(amt, pad); 415 StringBuilder sb=new StringBuilder(); 416 sb.append(things).append(": "); 417 while(sb.length()<"Reads Processed: ".length()){sb.append(' ');}//dif could be added to pad instead 418 return sb.append(rstring).toString(); 419 } 420 ZMWsReadsBasesProcessed(long elapsed, long ZMWs, long reads, long bases, int pad)421 public static String ZMWsReadsBasesProcessed(long elapsed, long ZMWs, long reads, long bases, int pad){ 422 double zpnano=ZMWs/(double)elapsed; 423 double rpnano=reads/(double)elapsed; 424 double bpnano=bases/(double)elapsed; 425 426 String zstring=padKM(ZMWs, pad); 427 String rstring=padKM(reads, pad); 428 String bstring=padKM(bases, pad); 429 StringBuilder sb=new StringBuilder(); 430 sb.append("ZMWs Processed: ").append(zstring).append(String.format(Locale.ROOT, " \t%.2fk ZMWs/sec", zpnano*1000000)).append('\n'); 431 sb.append("Reads Processed: ").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk reads/sec", rpnano*1000000)).append('\n'); 432 sb.append("Bases Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bases/sec", bpnano*1000)); 433 return sb.toString(); 434 } 435 queriesComparisonsProcessed(long elapsed, long queries, long comparisons, int pad)436 public static String queriesComparisonsProcessed(long elapsed, long queries, long comparisons, int pad){ 437 double rpnano=queries/(double)elapsed; 438 double bpnano=comparisons/(double)elapsed; 439 440 String rstring=padKM(queries, pad); 441 String bstring=padKM(comparisons, pad); 442 StringBuilder sb=new StringBuilder(); 443 sb.append("Queries: ").append(rstring).append(String.format(Locale.ROOT, " \t%.2f queries/sec", rpnano*1000000000)).append('\n'); 444 sb.append("Comparisons: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2f comparisons/sec", bpnano*1000000000)); 445 return sb.toString(); 446 } 447 timeSketchesKeysProcessed(Timer t, long sketchesProcessed, long keysProcessed, int pad)448 public static String timeSketchesKeysProcessed(Timer t, long sketchesProcessed, long keysProcessed, int pad){ 449 return time(t, pad)+"\n"+sketchesKeysProcessed(t.elapsed, sketchesProcessed, keysProcessed, pad); 450 } 451 sketchesKeysProcessed(long elapsed, long sketches, long keys, int pad)452 public static String sketchesKeysProcessed(long elapsed, long sketches, long keys, int pad){ 453 double rpnano=sketches/(double)elapsed; 454 double bpnano=keys/(double)elapsed; 455 456 String rstring=padKM(sketches, pad); 457 String bstring=padKM(keys, pad); 458 StringBuilder sb=new StringBuilder(); 459 sb.append("Sketches Processed: ").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk sketches/sec", rpnano*1000000)).append('\n'); 460 sb.append("Keys Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm keys/sec", bpnano*1000)); 461 return sb.toString(); 462 } 463 readsBasesOut(long readsIn, long basesIn, long readsOut, long basesOut, int pad, boolean percent)464 public static String readsBasesOut(long readsIn, long basesIn, long readsOut, long basesOut, int pad, boolean percent){ 465 double rpct=readsOut*100.0/readsIn; 466 double bpct=basesOut*100.0/basesIn; 467 String rstring=padKM(readsOut, pad); 468 String bstring=padKM(basesOut, pad); 469 StringBuilder sb=new StringBuilder(); 470 sb.append("Reads Out: ").append(rstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", rpct) : "").append('\n'); 471 sb.append("Bases Out: ").append(bstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", bpct) : ""); 472 return sb.toString(); 473 } 474 ZMWsReadsBasesOut(long ZMWsIn, long readsIn, long basesIn, long ZMWsOut, long readsOut, long basesOut, int pad, boolean percent)475 public static String ZMWsReadsBasesOut(long ZMWsIn, long readsIn, long basesIn, long ZMWsOut, long readsOut, long basesOut, int pad, boolean percent){ 476 double zpct=ZMWsOut*100.0/ZMWsIn; 477 double rpct=readsOut*100.0/readsIn; 478 double bpct=basesOut*100.0/basesIn; 479 String zstring=padKM(ZMWsOut, pad); 480 String rstring=padKM(readsOut, pad); 481 String bstring=padKM(basesOut, pad); 482 StringBuilder sb=new StringBuilder(); 483 sb.append("ZMWs Out: ").append(zstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", zpct) : "").append('\n'); 484 sb.append("Reads Out: ").append(rstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", rpct) : "").append('\n'); 485 sb.append("Bases Out: ").append(bstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", bpct) : ""); 486 return sb.toString(); 487 } 488 numberPercent(String text, long number, double percent, int decimals, int pad)489 public static String numberPercent(String text, long number, double percent, int decimals, int pad){ 490 String rstring=padLeft(number, pad); 491 StringBuilder sb=new StringBuilder(); 492 while(text.length()<20){text+=" ";} 493 sb.append(text).append(rstring).append(String.format(Locale.ROOT, " \t%."+decimals+"f%%", percent)); 494 return sb.toString(); 495 } 496 number(String text, double number, int decimals, int pad)497 public static String number(String text, double number, int decimals, int pad){ 498 String rstring=padLeft(String.format(Locale.ROOT, "%."+decimals+"f", number), pad); 499 StringBuilder sb=new StringBuilder(); 500 while(text.length()<20){text+=" ";} 501 sb.append(text).append(rstring); 502 return sb.toString(); 503 } 504 number(String text, long number, int pad)505 public static String number(String text, long number, int pad){ 506 String rstring=padLeft(""+number, pad); 507 StringBuilder sb=new StringBuilder(); 508 while(text.length()<20){text+=" ";} 509 sb.append(text).append(rstring); 510 return sb.toString(); 511 } 512 string(String text, String value, int pad)513 public static String string(String text, String value, int pad){ 514 String rstring=padLeft(value, pad); 515 StringBuilder sb=new StringBuilder(); 516 while(text.length()<20){text+=" ";} 517 sb.append(text).append(rstring); 518 return sb.toString(); 519 } 520 timeLinesBytesProcessed(Timer t, long linesProcessed, long bytesProcessed, int pad)521 public static String timeLinesBytesProcessed(Timer t, long linesProcessed, long bytesProcessed, int pad){ 522 return ("Time: \t"+t+"\n"+linesBytesProcessed(t.elapsed, linesProcessed, bytesProcessed, pad)); 523 } 524 linesBytesProcessed(long elapsed, long lines, long bytes, int pad)525 public static String linesBytesProcessed(long elapsed, long lines, long bytes, int pad){ 526 double rpnano=lines/(double)elapsed; 527 double bpnano=bytes/(double)elapsed; 528 529 String rstring=padKM(lines, pad); 530 String bstring=padKM(bytes, pad); 531 StringBuilder sb=new StringBuilder(); 532 sb.append("Lines Processed: ").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk lines/sec", rpnano*1000000)).append('\n'); 533 sb.append("Bytes Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bytes/sec", bpnano*1000)); 534 return sb.toString(); 535 } 536 linesBytesOut(long linesIn, long bytesIn, long linesOut, long bytesOut, int pad, boolean percent)537 public static String linesBytesOut(long linesIn, long bytesIn, long linesOut, long bytesOut, int pad, boolean percent){ 538 double rpct=linesOut*100.0/linesIn; 539 double bpct=bytesOut*100.0/bytesIn; 540 String rstring=padKM(linesOut, pad); 541 String bstring=padKM(bytesOut, pad); 542 StringBuilder sb=new StringBuilder(); 543 sb.append("Lines Out: ").append(rstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", rpct) : "").append('\n'); 544 sb.append("Bytes Out: ").append(bstring).append(percent ? String.format(Locale.ROOT, " \t%.2f%%", bpct) : ""); 545 return sb.toString(); 546 } 547 split(byte[] line, int start, byte delimiter)548 public static ArrayList<byte[]> split(byte[] line, int start, byte delimiter) { 549 if(line.length<start){return null;} 550 int a=start-1, b=start; 551 ArrayList<byte[]> list=new ArrayList<byte[]>(8); 552 while(b<line.length){ 553 byte c=line[b]; 554 if(c==delimiter){ 555 list.add(Arrays.copyOfRange(line, a+1, b)); 556 a=b; 557 } 558 b++; 559 } 560 list.add(Arrays.copyOfRange(line, a+1, b)); 561 return list; 562 } 563 breakReads(ArrayList<Read> list, final int max, int min, final PrintStream outstream)564 public static void breakReads(ArrayList<Read> list, final int max, int min, final PrintStream outstream){ 565 if(!containsReadsOutsideSizeRange(list, min, max)){return;} 566 assert(max>0 || min>0) : "min or max read length must be positive."; 567 assert(max<1 || max>=min) : "max read length must be at least min read length: "+max+"<"+min; 568 min=Tools.max(0, min); 569 570 ArrayList<Read> temp=new ArrayList<Read>(list.size()*2); 571 for(Read r : list){ 572 if(r==null || r.bases==null){ 573 temp.add(r); 574 }else if(r.length()<min){ 575 temp.add(null); 576 }else if(max<1 || r.length()<=max){ 577 temp.add(r); 578 }else{ 579 final byte[] bases=r.bases; 580 final byte[] quals=r.quality; 581 final String name=r.id; 582 final int limit=bases.length-min; 583 for(int num=1, start=0, stop=max; start<limit; num++, start+=max, stop+=max){ 584 if(outstream!=null){ 585 outstream.println(bases.length+", "+start+", "+stop); 586 if(quals!=null){outstream.println(quals.length+", "+start+", "+stop);} 587 } 588 stop=Tools.min(stop, bases.length); 589 byte[] b2=KillSwitch.copyOfRange(bases, start, stop); 590 byte[] q2=(quals==null ? null : KillSwitch.copyOfRange(quals, start, stop)); 591 String n2=name+"_"+num; 592 Read r2=new Read(b2, q2, n2, r.numericID, r.flags); 593 r2.setMapped(false); 594 temp.add(r2); 595 } 596 } 597 } 598 list.clear(); 599 list.ensureCapacity(temp.size()); 600 // list.addAll(temp); 601 for(Read r : temp){ 602 if(r!=null){list.add(r);} 603 } 604 } 605 containsReadsAboveSize(ArrayList<Read> list, int size)606 private static boolean containsReadsAboveSize(ArrayList<Read> list, int size){ 607 for(Read r : list){ 608 if(r!=null && r.bases!=null){ 609 if(r.length()>size){ 610 assert(r.mate==null) : "Read of length "+r.length()+">"+size+". Paired input is incompatible with 'breaklength'"; 611 return true; 612 } 613 } 614 } 615 return false; 616 } 617 containsReadsOutsideSizeRange(ArrayList<Read> list, int min, int max)618 private static boolean containsReadsOutsideSizeRange(ArrayList<Read> list, int min, int max){ 619 for(Read r : list){ 620 if(r!=null && r.bases!=null){ 621 if((max>0 && r.length()>max) || r.length()<min){ 622 assert(r.mate==null) : "Read of length "+r.length()+" outside of range "+min+"-"+max+". Paired input is incompatible with 'breaklength'"; 623 return true; 624 } 625 } 626 } 627 return false; 628 } 629 shiftRight(final byte[] array, final int amt)630 public static void shiftRight(final byte[] array, final int amt){ 631 for(int i=array.length-1-amt, j=array.length-1; i>=0; i--, j--){ 632 array[j]=array[i]; 633 } 634 } 635 shiftLeft(final byte[] array, final int amt)636 public static void shiftLeft(final byte[] array, final int amt){ 637 for(int i=amt, j=0; i<array.length; i++, j++){ 638 array[j]=array[i]; 639 } 640 } 641 startsWithIgnoreCase(String s, String prefix)642 public static boolean startsWithIgnoreCase(String s, String prefix){ 643 if(s==null || s.length()<prefix.length()){return false;} 644 for(int i=0; i<prefix.length(); i++){ 645 if(Tools.toLowerCase(s.charAt(i))!=Tools.toLowerCase(prefix.charAt(i))){ 646 return false; 647 } 648 } 649 return true; 650 } 651 652 /** Iterative guess-and-check using a one-way formula */ observedToActualCoverage_iterative(double y, double error)653 public static double observedToActualCoverage_iterative(double y, double error){ 654 double guess=y-0.95/Math.pow(y, 1.4); 655 double y2=actualToObservedCoverage(guess); 656 double dif=y-y2; 657 for(int i=0; i<20 && Math.abs(dif)>error; i++){ 658 guess=guess+dif*0.9; 659 y2=actualToObservedCoverage(guess); 660 dif=y-y2; 661 } 662 return guess; 663 } 664 665 /** Derived from curve-fitting simulated data. 666 * Yields actual kmer coverage from observed kmer coverage. 667 * Not perfectly accurate but the deviation is typically under 5%. */ observedToActualCoverage(double y)668 public static double observedToActualCoverage(double y){ 669 return Tools.max(0, y-Math.exp(-0.885*(y-1))); 670 } 671 672 /** Derived from curve-fitting simulated data. 673 * Yields observed kmer coverage from actual kmer coverage. 674 * Not perfectly accurate but the deviation is typically under 10%. */ actualToObservedCoverage(double x)675 private static double actualToObservedCoverage(double x){ 676 return x+Math.exp(-0.597*x); 677 } 678 kmerToReadCoverage(double cov, double readlen, int k)679 public static double kmerToReadCoverage(double cov, double readlen, int k){ 680 return readlen<=k ? 0 : cov*readlen/(readlen-k+1); 681 } 682 readToKmerCoverage(double cov, double readlen, int k)683 public static double readToKmerCoverage(double cov, double readlen, int k){ 684 return readlen<=k ? 0 : cov*(readlen-k+1)/readlen; 685 } 686 isNumeric(String s)687 public static boolean isNumeric(String s) { 688 if(s==null || s.length()<1){return false;} 689 char first=s.charAt(0); 690 int dots=0, signs=0, nums=0; 691 if(first=='-'){signs++;} 692 else if(first>='0' && first<='9'){nums++;} 693 else if(first=='.'){dots++;} 694 else{return false;} 695 696 for(int i=1; i<s.length(); i++){ 697 char c=s.charAt(i); 698 if(c>='0' && c<='9'){nums++;} 699 else if(c=='.'){dots++;} 700 else{return false;} 701 } 702 return nums>0 && dots<=1; 703 } 704 isDigitOrSign(int c)705 public static boolean isDigitOrSign(int c) {return c<0 ? false : signOrDigitMap[c];} isNumeric(int c)706 public static boolean isNumeric(int c) {return c<0 ? false : numericMap[c];} isDigit(int c)707 public static boolean isDigit(int c) {return c>='0' && c<='9';} isLetter(int c)708 public static boolean isLetter(int c) {return c<0 ? false : letterMap[c];} isLetterOrDigit(int c)709 public static boolean isLetterOrDigit(int c) {return c<0 ? false : isDigit(c) || letterMap[c];} isUpperCase(int c)710 public static boolean isUpperCase(int c) {return c>='A' && c<='Z';} isLowerCase(int c)711 public static boolean isLowerCase(int c) {return c>='a' && c<='z';} toUpperCase(int c)712 public static int toUpperCase(int c) {return c<'a' || c>'z' ? c : c-32;} toLowerCase(int c)713 public static int toLowerCase(int c) {return c<'A' || c>'Z' ? c : c+32;} 714 isDigitOrSign(byte c)715 public static boolean isDigitOrSign(byte c) {return c<0 ? false : signOrDigitMap[c];} isNumeric(byte c)716 public static boolean isNumeric(byte c) {return c<0 ? false : numericMap[c];} isDigit(byte c)717 public static boolean isDigit(byte c) {return c>='0' && c<='9';} isLetter(byte c)718 public static boolean isLetter(byte c) {return c<0 ? false : letterMap[c];} isLetterOrDigit(byte c)719 public static boolean isLetterOrDigit(byte c) {return c<0 ? false : isDigit(c) || letterMap[c];} isUpperCase(byte c)720 public static boolean isUpperCase(byte c) {return c>='A' && c<='Z';} isLowerCase(byte c)721 public static boolean isLowerCase(byte c) {return c>='a' && c<='z';} toUpperCase(byte c)722 public static byte toUpperCase(byte c) {return c<'a' || c>'z' ? c : (byte)(c-32);} toLowerCase(byte c)723 public static byte toLowerCase(byte c) {return c<'A' || c>'Z' ? c : (byte)(c+32);} 724 isDigitOrSign(char c)725 public static boolean isDigitOrSign(char c) {return c>127 ? false : signOrDigitMap[c];} isNumeric(char c)726 public static boolean isNumeric(char c) {return c>127 ? false : numericMap[c];} isDigit(char c)727 public static boolean isDigit(char c) {return c>='0' && c<='9';} isLetter(char c)728 public static boolean isLetter(char c) {return c>127 ? false : letterMap[c];} isLetterOrDigit(char c)729 public static boolean isLetterOrDigit(char c) {return c<0 ? false : isDigit(c) || letterMap[c];} isUpperCase(char c)730 public static boolean isUpperCase(char c) {return c>='A' && c<='Z';} isLowerCase(char c)731 public static boolean isLowerCase(char c) {return c>='a' && c<='z';} toUpperCase(char c)732 public static char toUpperCase(char c) {return c<'a' || c>'z' ? c : (char)(c-32);} toLowerCase(char c)733 public static char toLowerCase(char c) {return c<'A' || c>'Z' ? c : (char)(c+32);} 734 735 //Taken from https://stackoverflow.com/questions/1149703/how-can-i-convert-a-stack-trace-to-a-string toString(Throwable t)736 public static String toString(Throwable t){ 737 StringWriter sw = new StringWriter(); 738 PrintWriter pw = new PrintWriter(sw); 739 t.printStackTrace(pw); 740 String sStackTrace = sw.toString(); 741 return sStackTrace; 742 } countKmers(byte[] bases, int k)743 public static int countKmers(byte[] bases, int k){ 744 if(bases==null || bases.length<k || k<1){return 0;} 745 int len=0; 746 int kmers=0; 747 for(byte b : bases){ 748 if(AminoAcid.isFullyDefined(b)){len++;} 749 else{ 750 if(len>=k){kmers=kmers+len-k+1;} 751 len=0; 752 } 753 } 754 if(len>=k){kmers=kmers+len-k+1;} 755 return kmers; 756 } 757 countCorrectKmers(byte[] quals, int k)758 public static double countCorrectKmers(byte[] quals, int k){ 759 if(quals==null || quals.length<k || k<1){return 0;} 760 int len=0; 761 double kmers=0; 762 double prob=1; 763 for(int i=0; i<quals.length; i++){ 764 final byte q=quals[i]; 765 if(q>0){ 766 len++; 767 prob=prob*align2.QualityTools.PROB_CORRECT[q]; 768 if(len>k){ 769 byte oldq=quals[i-k]; 770 prob=prob*align2.QualityTools.PROB_CORRECT_INVERSE[oldq]; 771 } 772 if(len>=k){kmers+=prob;} 773 }else{ 774 len=0; 775 prob=1; 776 } 777 } 778 return kmers; 779 } 780 781 estimateFileSize(String fname)782 public static long estimateFileSize(String fname){ 783 FileFormat ff=FileFormat.testInput(fname, FileFormat.FASTQ, null, false, false); 784 if(ff==null || ff.stdio()){return -1;} 785 786 double mult=1; 787 if(ff.compressed()){ 788 if(ff.bam()){mult=6;} 789 if(ff.fasta()){mult=5;} 790 else{mult=4;} 791 } 792 793 File f=new File(fname); 794 long size=f.length(); 795 double diskEstimate=size*mult; 796 return (long)diskEstimate; 797 } 798 799 /** 800 * @param fname 801 * @param readsToFetch 802 * @param extraOverheadPerRead 803 * @param earlyExit 804 * @return {memEstimate, diskEstimate, memRatio, diskRatio, numReadsEstimate}; 805 */ estimateFileMemory(String fname, int readsToFetch, double extraOverheadPerRead, boolean earlyExit, boolean lowComplexity)806 public static double[] estimateFileMemory(String fname, int readsToFetch, double extraOverheadPerRead, boolean earlyExit, boolean lowComplexity){ 807 808 FileFormat ff=FileFormat.testInput(fname, FileFormat.FASTQ, null, false, false); 809 if(ff==null || ff.stdio()){return null;} 810 811 File f=new File(fname); 812 final long size=f.length(); 813 814 if(earlyExit){ 815 816 long available=Shared.memFree(); 817 818 819 double memRatio, diskRatio, readRatio; 820 if(ff.compressed()){ 821 memRatio=40; 822 diskRatio=5; 823 }else{ 824 memRatio=8; 825 diskRatio=1; 826 } 827 readRatio=diskRatio/100; 828 829 long memEstimate=(long)(memRatio*size); 830 long diskEstimate=(long)(diskRatio*size); 831 long readsEstimate=(long)(readRatio*size); 832 833 // System.err.println(memEstimate+", "+available+", "+(memEstimate*1.5)+", "+readsEstimate+", "+(memEstimate*2.1<available)+", "+(readsEstimate*5<Integer.MAX_VALUE)); 834 835 if(memEstimate*2.1<available && readsEstimate*4<Integer.MAX_VALUE){ 836 return new double[] {memEstimate, diskEstimate, memRatio, diskRatio, readsEstimate}; 837 } 838 } 839 // assert(false) : earlyExit; 840 readsToFetch=Tools.max(readsToFetch, 200); 841 ff=FileFormat.testInput(fname, FileFormat.FASTQ, null, ff.compressed() && !ff.gzip(), true); 842 ArrayList<Read> reads=ReadInputStream.toReads(ff, readsToFetch); 843 long minBytes=Integer.MAX_VALUE; 844 long maxBytes=1; 845 long sumBytes=0; 846 long minMem=Integer.MAX_VALUE; 847 long maxMem=1; 848 long sumMem=0; 849 long minLen=Integer.MAX_VALUE; 850 long maxLen=1; 851 long sumLen=0; 852 long minQLen=Integer.MAX_VALUE; 853 long maxQLen=1; 854 long sumQLen=0; 855 long minHdr=Integer.MAX_VALUE; 856 long maxHdr=1; 857 long sumHdr=0; 858 long readCount=0; 859 860 BitSet qualities=new BitSet(); 861 862 if(reads==null || reads.size()<1){ 863 minBytes=maxBytes=minLen=maxLen=minMem=maxMem=minHdr=maxHdr=1; 864 }else{ 865 for(Read r1 : reads){ 866 long x; 867 readCount++; 868 869 x=r1.length(); 870 minLen=min(minLen, x); 871 maxLen=max(maxLen, x); 872 sumLen+=x; 873 874 x=r1.qlength(); 875 minQLen=min(minQLen, x); 876 maxQLen=max(maxQLen, x); 877 sumQLen+=x; 878 879 if(x>0){ 880 for(byte q : r1.quality){qualities.set(q);} 881 } 882 883 x=r1.countBytes(); 884 minMem=min(minMem, x); 885 maxMem=max(maxMem, x); 886 sumMem+=x; 887 888 x=r1.id.length(); 889 minHdr=min(minHdr, x); 890 maxHdr=max(maxHdr, x); 891 sumHdr+=x; 892 893 x=r1.countFastqBytes(); 894 minBytes=min(minBytes, x); 895 maxBytes=max(maxBytes, x); 896 sumBytes+=x; 897 898 Read r2=r1.mate; 899 if(r2!=null){ 900 readCount++; 901 902 x=r2.length(); 903 minLen=min(minLen, x); 904 maxLen=max(maxLen, x); 905 sumLen+=x; 906 907 x=r2.qlength(); 908 minQLen=min(minQLen, x); 909 maxQLen=max(maxQLen, x); 910 sumQLen+=x; 911 912 if(x>0){ 913 for(byte q : r2.quality){qualities.set(q);} 914 } 915 916 x=r2.countBytes(); 917 minMem=min(minMem, x); 918 maxMem=max(maxMem, x); 919 sumMem+=x; 920 921 x=r2.id.length(); 922 minHdr=min(minHdr, x); 923 maxHdr=max(maxHdr, x); 924 sumHdr+=x; 925 926 x=r2.countFastqBytes(); 927 minBytes=min(minBytes, x); 928 maxBytes=max(maxBytes, x); 929 sumBytes+=x; 930 } 931 } 932 } 933 934 int numQualities=Tools.max(2, qualities.cardinality()); 935 double bitsPerQuality=(Math.log(numQualities)/Math.log(2)); 936 boolean binned=numQualities<=8; 937 938 double compressedSize; 939 if(ff.compressed()){ 940 compressedSize=0.125*( //bytes per bit 941 0.2*sumHdr //Repetitive header characters 942 +(lowComplexity ? 0.5 : 1.5)*sumLen 943 +(binned ? 0.5 : 2.5)*sumQLen 944 +(4*6*readCount) //@, +, and newline 945 +(8*2*readCount) //Actual information content of a typical header 946 ); 947 if(ff.bz2() || ff.fqz()){ 948 compressedSize*=0.83; 949 } 950 }else{ 951 compressedSize=sumBytes; 952 } 953 double memRatio=(sumMem+readCount*extraOverheadPerRead)/compressedSize; 954 double diskRatio=sumBytes/compressedSize; 955 956 long memEstimate=(long)(memRatio*size); 957 long diskEstimate=(long)(diskRatio*size); 958 double readRatio=readCount/(double)(Tools.max(1, sumBytes)); 959 long readEstimate=(long)(readRatio*diskEstimate); 960 // assert(false) : readCount+", "+sumBytes+", "+readRatio+", "+size; 961 962 // System.err.println("compressedSize="+compressedSize); 963 // System.err.println("memRatio="+memRatio); 964 // System.err.println("diskRatio="+diskRatio); 965 966 // assert(false) : diskEstimate+", "+minBytes+", "+ 967 // double worstCase=estimate*1.75; 968 return new double[] {memEstimate, diskEstimate, memRatio, diskRatio, readEstimate}; 969 } 970 nextBoolean(Random randy)971 public static final boolean nextBoolean(Random randy){ 972 return randy.nextBoolean(); 973 // int r=randy.nextInt()&0x7FFFFFFF; 974 // return r%294439>=147219; 975 } 976 inverse(float[] array)977 public static float[] inverse(float[] array) { 978 float[] out=new float[array.length]; 979 for(int i=0; i<array.length; i++){ 980 // out[i]=1/max(array[i], 1000000000f); //What was this line for? Changed to to below line. 981 out[i]=1/array[i]; 982 } 983 return out; 984 } 985 inverse(double[] array)986 public static double[] inverse(double[] array) { 987 double[] out=new double[array.length]; 988 for(int i=0; i<array.length; i++){ 989 out[i]=1/array[i]; 990 } 991 return out; 992 } 993 994 /** Ensures headers consist of printable ASCII characters. */ checkHeader(String s)995 public static boolean checkHeader(String s){ 996 if(s==null || s.length()<1){return false;} 997 boolean ok=true; 998 for(int i=0; i<s.length() && ok; i++){ 999 char c=s.charAt(i); 1000 ok=(c>=32 && c<=126); 1001 } 1002 return ok; 1003 } 1004 1005 /** Changes headers to consist of printable ASCII characters. */ fixHeader(String s, boolean allowNull, boolean processAssertions)1006 public static String fixHeader(String s, boolean allowNull, boolean processAssertions){ 1007 // assert(false) : new String(specialChars); 1008 if(checkHeader(s)){return s;} 1009 if(s==null || s.length()==0){ 1010 if(processAssertions && !allowNull){KillSwitch.kill("Sequence found with null header (unfixable). To bypass, set allownullheader=true.");} 1011 return ""; 1012 } 1013 StringBuilder sb=new StringBuilder(s.length()); 1014 for(int i=0; i<s.length(); i++){ 1015 final char c=s.charAt(i), d; 1016 1017 if(c>=0 && c<=255){ 1018 d=specialChars[c]; 1019 }else{ 1020 d='X'; 1021 } 1022 // System.err.println(c+"="+(int)c); 1023 sb.append(d); 1024 } 1025 return sb.toString(); 1026 } 1027 secondHighestPosition(int[] array)1028 public static int secondHighestPosition(int[] array) { 1029 int maxP, maxP2; 1030 if(array[0]>=array[1]){ 1031 maxP=0; 1032 maxP2=1; 1033 }else{ 1034 maxP=1; 1035 maxP2=0; 1036 } 1037 for(int i=2; i<array.length; i++){ 1038 int x=array[i]; 1039 if(x>array[maxP2]){ 1040 if(x>=array[maxP]){ 1041 maxP2=maxP; 1042 maxP=i; 1043 }else{ 1044 maxP2=i; 1045 } 1046 } 1047 } 1048 return maxP2; 1049 } 1050 1051 1052 /** 1053 * Returns this file name if it is a file, or all the files in the directory if it is a directory. 1054 * @param b 1055 * @param fasta 1056 * @param fastq 1057 * @param sam 1058 * @param any 1059 * @return A list of files 1060 */ getFileOrFiles(String b, ArrayList<String> list, boolean fasta, boolean fastq, boolean sam, boolean any)1061 public static ArrayList<String> getFileOrFiles(String b, ArrayList<String> list, boolean fasta, boolean fastq, boolean sam, boolean any){ 1062 if(list==null){list=new ArrayList<String>();} 1063 String[] split=b.split(","); 1064 for(String s : split){ 1065 File f=new File(s); 1066 if(f.isDirectory()){ 1067 for(File f2 : f.listFiles()){ 1068 if(f2.isFile()){ 1069 String name=f2.getName().toLowerCase(); 1070 String ext=ReadWrite.rawExtension(name); 1071 1072 boolean pass=any || (fasta && FileFormat.isFasta(ext)) || (fastq && FileFormat.isFastq(ext)) || (sam && FileFormat.isSamOrBam(ext)); 1073 1074 if(pass){ 1075 String s2=f2.getAbsolutePath(); 1076 list.add(s2); 1077 } 1078 } 1079 } 1080 }else{ 1081 list.add(s); 1082 } 1083 } 1084 return list; 1085 } 1086 toAdapterList(String name, int maxLength)1087 public static ArrayList<byte[]> toAdapterList(String name, int maxLength){ 1088 if(maxLength<1){maxLength=Integer.MAX_VALUE;} 1089 if(name==null){return null;} 1090 String[] split; 1091 1092 LinkedHashSet<String> set=new LinkedHashSet<String>(); //Prevents duplicates 1093 if(new File(name).exists()){ 1094 split=new String[] {name}; 1095 }else{ 1096 split=name.split(","); 1097 } 1098 for(String s : split){ 1099 if(new File(s).exists()){ 1100 ArrayList<Read> reads=ReadInputStream.toReads(s, FileFormat.FASTA, -1); 1101 for(Read r : reads){ 1102 if(r!=null && r.length()>0){ 1103 byte[] array=checkAdapter(r.bases, maxLength); 1104 if(array.length>0){set.add(new String(array));} 1105 } 1106 } 1107 }else{ 1108 byte[] array=checkAdapter(s.getBytes(), maxLength); 1109 if(array.length>0){set.add(new String(array));} 1110 } 1111 } 1112 1113 if(set.isEmpty()){return null;} 1114 ArrayList<byte[]> list=new ArrayList<byte[]>(set.size()); 1115 for(String s : set){ 1116 list.add(s.getBytes()); 1117 } 1118 return list; 1119 } 1120 checkAdapter(byte[] array, int maxLength)1121 private static byte[] checkAdapter(byte[] array, int maxLength){ 1122 if(array.length>maxLength){array=Arrays.copyOf(array, maxLength);} 1123 1124 for(int i=0; i<array.length; i++){ 1125 byte b=array[i]; 1126 int x=AminoAcid.baseToNumberExtended[b]; 1127 if(x<0 || !Tools.isLetter(b) || !Tools.isUpperCase(b)){ 1128 throw new RuntimeException("Invalid nucleotide "+(char)b+" in literal sequence "+new String(array)+"\n" 1129 + "If this was supposed to be a filename, the file was not found."); 1130 } 1131 if(AminoAcid.baseToNumber[b]<0){array[i]='N';}//Degenerate symbols become N 1132 } 1133 1134 int trailingNs=0; 1135 for(int i=array.length-1; i>=0; i--){ 1136 if(array[i]=='N'){trailingNs++;} 1137 } 1138 if(trailingNs>0){ 1139 array=Arrays.copyOf(array, array.length-trailingNs); 1140 } 1141 return array; 1142 } 1143 toAdapters(String name, final int maxLength)1144 public static byte[][] toAdapters(String name, final int maxLength){ 1145 ArrayList<byte[]> list=toAdapterList(name, maxLength); 1146 return list==null ? null : list.toArray(new byte[list.size()][]); 1147 } 1148 1149 /** Add names to a collection. 1150 * This can be a literal name, or a text file with one name per line, 1151 * or a fastq, fasta, or sam file, in which case the read names will be added. 1152 * @param s 1153 * @param names 1154 * @return Number added 1155 */ addNames(String s, Collection<String> names, boolean allowSubprocess)1156 public static final int addNames(String s, Collection<String> names, boolean allowSubprocess){ 1157 int added=0; 1158 if(new File(s).exists()){ 1159 1160 int[] vector=FileFormat.testFormat(s, false, false); 1161 final int type=vector[0]; 1162 ByteFile bf=ByteFile.makeByteFile(s, allowSubprocess); 1163 1164 if(type==FileFormat.FASTQ){ 1165 int num=0; 1166 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine(), num++){ 1167 if((num&3)==0 && line.length>0){ 1168 names.add(new String(line, 1, line.length-1)); 1169 } 1170 } 1171 }else if(type==FileFormat.FASTA){ 1172 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){ 1173 if(line.length>0 && line[0]=='>'){ 1174 names.add(new String(line, 1, line.length-1)); 1175 } 1176 } 1177 }else if(type==FileFormat.SAM){ 1178 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){ 1179 if(line.length>0 && line[0]!='@'){ 1180 String name=SamLine.parseNameOnly(line); 1181 if(name!=null && name.length()>0){names.add(name);} 1182 } 1183 } 1184 }else{ 1185 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){ 1186 if(line.length>0){ 1187 names.add(new String(line)); 1188 } 1189 } 1190 } 1191 bf.close(); 1192 }else{ 1193 added++; 1194 names.add(s); 1195 } 1196 return added; 1197 } 1198 1199 /** 1200 * Make copies of any read with ambiguous bases to represent all possible non-ambiguous representations. 1201 * @param reads A list of reads 1202 * @param minlen minimum length of reads to replicate 1203 * @return A list of reads with no ambiguity codes. 1204 */ replicateAmbiguous(ArrayList<Read> reads, int minlen)1205 public static ArrayList<Read> replicateAmbiguous(ArrayList<Read> reads, int minlen) { 1206 ArrayList<Read> out=new ArrayList<Read>(); 1207 for(Read r1 : reads){ 1208 final Read r2=r1.mate; 1209 r1.mate=null; 1210 1211 if(r1.containsUndefined() && r1.length()>=minlen){ 1212 ArrayList<Read> temp=makeReplicates(r1); 1213 out.addAll(temp); 1214 }else{ 1215 out.add(r1); 1216 } 1217 if(r2!=null){ 1218 r2.mate=null; 1219 if(r2.containsUndefined() && r2.length()>=minlen){ 1220 ArrayList<Read> temp=makeReplicates(r2); 1221 out.addAll(temp); 1222 }else{ 1223 out.add(r2); 1224 } 1225 } 1226 } 1227 return out; 1228 } 1229 1230 /** 1231 * Make copies of this read to represent all possible non-ambiguous representations. 1232 * Return a list of all fully-defined versions. 1233 * @param r A read to replicate 1234 * @return A list of reads with no ambiguity codes. 1235 */ makeReplicates(final Read r)1236 public static ArrayList<Read> makeReplicates(final Read r) { 1237 // System.err.println("\n***Called makeReplicates("+new String(r.bases)+")"); 1238 ArrayList<Read> temp=null; 1239 if(!r.containsUndefined()){ 1240 temp=new ArrayList<Read>(); 1241 temp.add(r); 1242 return temp; 1243 } 1244 final byte[] bases=r.bases; 1245 for(int i=0; i<r.bases.length; i++){ 1246 byte b=bases[i]; 1247 if(!AminoAcid.isFullyDefined(b)){ 1248 temp=replicateAtPosition(r, i); 1249 break; 1250 } 1251 } 1252 assert(temp!=null); 1253 final ArrayList<Read> out; 1254 if(temp.get(0).containsUndefined()){ 1255 out=new ArrayList<Read>(); 1256 for(Read rr : temp){ 1257 out.addAll(makeReplicates(rr)); 1258 } 1259 }else{ 1260 out=temp; 1261 } 1262 return out; 1263 } 1264 1265 /** 1266 * @param r A read 1267 * @param pos The position of an ambiguous base 1268 * @param out A list of replicates 1269 */ replicateAtPosition(final Read r, final int pos)1270 private static ArrayList<Read> replicateAtPosition(final Read r, final int pos) { 1271 // System.err.println("Called replicateAtPosition("+new String(r.bases)+", "+pos+")"); 1272 if(r.quality!=null){ 1273 r.quality[pos]=Shared.FAKE_QUAL; 1274 } 1275 final byte[] bases=r.bases; 1276 final byte b=bases[pos]; 1277 final int num=AminoAcid.baseToNumberExtended[b]&0xF; 1278 assert(num>0 && Integer.bitCount(num)>1 && Integer.bitCount(num)<=4) : b+", "+num; 1279 ArrayList<Read> out=new ArrayList<Read>(4); 1280 for(int i=0; i<4; i++){ 1281 int mask=(1<<i); 1282 if((num&mask)==mask){ 1283 Read rr=r.clone(); 1284 rr.bases=rr.bases.clone(); 1285 rr.bases[pos]=AminoAcid.numberToBase[i]; 1286 // System.err.println("Added clone ("+new String(rr.bases)+")"); 1287 out.add(rr); 1288 } 1289 } 1290 return out; 1291 } 1292 1293 /** Returns index of first matching location */ locationOf(final byte[] big, final byte[] small, final int maxMismatches)1294 public static final int locationOf(final byte[] big, final byte[] small, final int maxMismatches){ 1295 int x=containsForward(big, small, maxMismatches); 1296 return x>=0 ? x : containsReverse(big, small, maxMismatches); 1297 } 1298 1299 /** Returns index of first matching location */ containsForward(final byte[] big, final byte[] small, final int maxMismatches)1300 public static final int containsForward(final byte[] big, final byte[] small, final int maxMismatches){ 1301 final int ilimit=big.length-small.length; 1302 // System.err.println("Entering: ilimit="+ilimit+", maxMismatches="+maxMismatches+", small.length="+small.length); 1303 for(int i=0; i<=ilimit; i++){ 1304 int mismatches=0; 1305 for(int j=0; j<small.length && mismatches<=maxMismatches; j++){ 1306 final byte b=big[i+j]; 1307 final byte s=small[j]; 1308 if(b!=s){mismatches++;} 1309 } 1310 if(mismatches<=maxMismatches){ 1311 // System.err.println("Returning "+i+", mismatches="+mismatches); 1312 return i; 1313 } 1314 } 1315 return -1; 1316 } 1317 1318 /** Returns index of first matching location */ containsReverse(final byte[] big, final byte[] small, final int maxMismatches)1319 public static final int containsReverse(final byte[] big, final byte[] small, final int maxMismatches){ 1320 final int ilimit=big.length-small.length; 1321 for(int i=0; i<=ilimit; i++){ 1322 int mismatches=0; 1323 for(int j=0, k=small.length-1; j<small.length && mismatches<=maxMismatches; j++, k--){ 1324 final byte b=big[i+j]; 1325 final byte s=AminoAcid.baseToComplementExtended[small[k]]; 1326 if(b!=s){mismatches++;} 1327 } 1328 if(mismatches<=maxMismatches){return i;} 1329 } 1330 return -1; 1331 } 1332 1333 /** Removes null elements by shrinking the list. May change list order. */ condense(ArrayList<X> list)1334 public static final <X> int condense(ArrayList<X> list){ 1335 if(list==null || list.size()==0){return 0;} 1336 int removed=0; 1337 1338 for(int i=list.size()-1; i>0; i--){ 1339 if(list.get(i)==null){ 1340 removed++; 1341 X last=list.get(list.size()-1); 1342 list.set(i, last); 1343 list.remove(list.size()-1); 1344 } 1345 } 1346 return removed; 1347 } 1348 1349 /** Removes null elements by shrinking the list. Will not change list order. */ condenseStrict(ArrayList<X> list)1350 public static final <X> int condenseStrict(ArrayList<X> list){ 1351 if(list==null || list.size()==0){return 0;} 1352 int removed=0; 1353 1354 int insertPos=0; 1355 for(int i=0; i<list.size(); i++){ 1356 X x=list.get(i); 1357 if(x!=null){ 1358 if(insertPos!=i){ 1359 assert(insertPos<i); 1360 while(list.get(insertPos)!=null){insertPos++;} 1361 assert(insertPos<i && list.get(insertPos)==null) : insertPos+", "+i; //slow, temporary 1362 list.set(i, null); 1363 list.set(insertPos, x); 1364 } 1365 insertPos++; 1366 }else{ 1367 removed++; 1368 } 1369 } 1370 for(int i=0; i<removed; i++){ 1371 X x=list.remove(list.size()-1); 1372 assert(x==null); 1373 } 1374 return removed; 1375 } 1376 1377 /** Removes null elements by shrinking the array. Will not change array order. */ 1378 public static final <X> X[] condenseStrict(X[] array){ 1379 if(array==null){return array;} 1380 int nulls=0; 1381 for(X x : array){if(x==null){nulls++;}} 1382 if(nulls==0){return array;} 1383 X[] array2=Arrays.copyOf(array, array.length-nulls); 1384 1385 int j=0; 1386 for(X x : array){ 1387 if(x!=null){ 1388 array2[j]=x; 1389 j++; 1390 } 1391 } 1392 return array2; 1393 } 1394 1395 /** Creates a new list without null elements. */ 1396 public static final <X> ArrayList<X> condenseNew(ArrayList<X> list){ 1397 ArrayList<X> temp=new ArrayList<X>(list.size()); 1398 for(X x : list){ 1399 if(x!=null){temp.add(x);} 1400 } 1401 return temp; 1402 } 1403 1404 //This should also be correct. I'm not sure which is faster. 1405 // /** Removes null elements by shrinking the list. Will not change list order. */ 1406 // public static final <X> int condenseStrict(ArrayList<X> list){ 1407 // if(list==null || list.size()==0){return 0;} 1408 // int removed=0; 1409 // int last=0; 1410 // 1411 // for(int i=0; i<list.size(); i++){ 1412 // X x=list.get(i); 1413 // if(x==null){ 1414 // removed++; 1415 // }else{ 1416 // while(last<i && list.get(last)!=null){last++;} 1417 // assert(last==i || list.get(last)==null); 1418 // if(last!=i){ 1419 // assert(last<i); 1420 // list.set(last, x); 1421 // list.set(i, null); 1422 // } 1423 // } 1424 // } 1425 // for(int i=0; i<removed; i++){ 1426 // X x=list.remove(list.size()-1); 1427 // assert(x==null); 1428 // } 1429 // return removed; 1430 // } 1431 1432 1433 // public static final int trimSiteList(ArrayList<SiteScore> ssl, float fractionOfMax, boolean retainPaired){ 1434 //// assert(false); 1435 // if(ssl==null || ssl.size()==0){return -999999;} 1436 // if(ssl.size()==1){return ssl.get(0).score;} 1437 // int maxScore=-999999; 1438 // for(SiteScore ss : ssl){ 1439 // maxScore=Tools.max(maxScore, ss.score); 1440 // } 1441 // 1442 // int cutoff=(int) (maxScore*fractionOfMax); 1443 // trimSitesBelowCutoff(ssl, cutoff, retainPaired); 1444 //// trimSitesBelowCutoffInplace(ssl, cutoff); 1445 // return maxScore; 1446 // } 1447 1448 /** minSitesToRetain should be set to 1 if the list is not sorted by score (for efficiency of removal). Otherwise, it can be higher. */ 1449 public static final int trimSiteList(ArrayList<SiteScore> ssl, float fractionOfMax, boolean retainPaired, boolean retainSemiperfect, 1450 int minSitesToRetain, int maxSitesToRetain){ 1451 // assert(false); 1452 if(ssl==null || ssl.size()==0){return -999999;} 1453 if(ssl.size()==1){return ssl.get(0).score;} 1454 int maxScore=-999999; 1455 1456 if(minSitesToRetain>1 && minSitesToRetain<ssl.size()){ 1457 assert(inOrder(ssl)); 1458 maxScore=ssl.get(0).score; 1459 }else{ 1460 for(SiteScore ss : ssl){ 1461 maxScore=Tools.max(maxScore, ss.score); 1462 } 1463 } 1464 1465 int cutoff=(int) (maxScore*fractionOfMax); 1466 trimSitesBelowCutoff(ssl, cutoff, retainPaired, retainSemiperfect, minSitesToRetain, maxSitesToRetain); 1467 return maxScore; 1468 } 1469 1470 /** minSitesToRetain should be set to 1 if the list is not sorted by score. Otherwise, it can be higher. */ 1471 public static final void trimSiteListByMax(ArrayList<SiteScore> ssl, int cutoff, boolean retainPaired, boolean retainSemiperfect, 1472 int minSitesToRetain, int maxSitesToRetain){ 1473 // assert(false); 1474 if(ssl==null || ssl.size()==0){return;} 1475 if(ssl.size()==1){return;} 1476 1477 trimSitesBelowCutoff(ssl, cutoff, retainPaired, retainSemiperfect, minSitesToRetain, maxSitesToRetain); 1478 } 1479 1480 public static final <X extends Comparable<? super X>> boolean inOrder(ArrayList<X> list){ 1481 if(list==null || list.size()<2){return true;} 1482 for(int i=1; i<list.size(); i++){ 1483 X xa=list.get(i-1); 1484 X xb=list.get(i); 1485 if(xa.compareTo(xb)>0){return false;} 1486 } 1487 return true; 1488 } 1489 1490 1491 1492 public static final int mergeDuplicateSites(ArrayList<SiteScore> list, boolean doAssertions, boolean mergeDifferentGaps){ 1493 if(list==null || list.size()<2){return 0;} 1494 Shared.sort(list, SiteScore.PCOMP); 1495 1496 int removed=0; 1497 1498 SiteScore a=list.get(0); 1499 for(int i=1; i<list.size(); i++){ 1500 SiteScore b=list.get(i); 1501 if(a.positionalMatch(b, true)){ 1502 1503 if(doAssertions){ 1504 if(!(a.perfect==b.perfect || 1505 (a.perfect && (a.score>b.score || a.slowScore>b.slowScore)))){ 1506 throw new RuntimeException("\n"+SiteScore.header()+"\n"+a.toText()+"\n"+b.toText()+"\n"); 1507 } 1508 1509 assert(a.perfect==b.perfect || 1510 (a.perfect && (a.score>b.score || a.slowScore>b.slowScore))) : 1511 "\n"+SiteScore.header()+"\n"+a.toText()+"\n"+b.toText()+"\n"; 1512 } 1513 1514 a.setSlowScore(max(a.slowScore, b.slowScore)); 1515 // a.setPairedScore(a.pairedScore<=0 && b.pairedScore<=0 ? 0 : max(a.slowScore+1, a.pairedScore, b.pairedScore)); 1516 a.setPairedScore((a.pairedScore<=a.slowScore && b.pairedScore<=a.slowScore) ? 0 : max(0, a.pairedScore, b.pairedScore)); 1517 a.setScore(max(a.score, b.score)); 1518 a.perfect=(a.perfect || b.perfect); 1519 a.semiperfect=(a.semiperfect || b.semiperfect); 1520 1521 removed++; 1522 list.set(i, null); 1523 }else if(mergeDifferentGaps && a.positionalMatch(b, false)){ //Same outermost boundaries, different gaps 1524 1525 SiteScore better=null; 1526 if(a.score!=b.score){ 1527 better=(a.score>b.score ? a : b); 1528 }else if(a.slowScore!=b.slowScore){ 1529 better=(a.slowScore>b.slowScore ? a : b); 1530 }else if(a.pairedScore!=b.pairedScore){ 1531 better=(a.pairedScore>b.pairedScore ? a : b); 1532 }else{ 1533 better=a; 1534 } 1535 1536 a.setSlowScore(max(a.slowScore, b.slowScore)); 1537 a.setPairedScore((a.pairedScore<=a.slowScore && b.pairedScore<=a.slowScore) ? 0 : max(0, a.pairedScore, b.pairedScore)); 1538 a.setScore(max(a.score, b.score)); 1539 a.perfect=(a.perfect || b.perfect);//TODO: This is not correct. And perfect sites should not have gaps anyway. 1540 a.semiperfect=(a.semiperfect || b.semiperfect); 1541 a.gaps=better.gaps; 1542 1543 removed++; 1544 list.set(i, null); 1545 } 1546 else{ 1547 a=b; 1548 } 1549 } 1550 1551 // if(removed>0){condense(list);} 1552 if(removed>0){condenseStrict(list);} 1553 return removed; 1554 } 1555 1556 1557 1558 public static final int subsumeOverlappingSites(ArrayList<SiteScore> list, boolean subsumeIfOnlyStartMatches, boolean subsumeInexact){ 1559 if(list==null || list.size()<2){return 0;} 1560 Shared.sort(list, SiteScore.PCOMP); 1561 1562 int removed=0; 1563 1564 1565 for(int i=0; i<list.size(); i++){ 1566 SiteScore a=list.get(i); 1567 1568 assert(a==null || !a.perfect || a.semiperfect); 1569 1570 boolean overlappingA=true; 1571 if(a!=null){ 1572 for(int j=i+1; overlappingA && j<list.size(); j++){ 1573 SiteScore b=list.get(j); 1574 assert(b==null || !b.perfect || b.semiperfect); 1575 if(b!=null){ 1576 overlappingA=(a.chrom==b.chrom && b.start<a.stop && b.stop>a.start); 1577 if(overlappingA && a.strand==b.strand){ 1578 1579 SiteScore better=null; 1580 if(a.perfect!=b.perfect){ 1581 better=a.perfect ? a : b; 1582 }else if(a.semiperfect!=b.semiperfect){ 1583 better=a.semiperfect ? a : b; 1584 }else if(a.score!=b.score){ 1585 better=(a.score>b.score ? a : b); 1586 }else if(a.slowScore!=b.slowScore){ 1587 better=(a.slowScore>b.slowScore ? a : b); 1588 }else if(a.pairedScore!=b.pairedScore){ 1589 better=(a.pairedScore>b.pairedScore ? a : b); 1590 }else if(a.pairedScore!=b.pairedScore){ 1591 better=(a.quickScore>b.quickScore ? a : b); 1592 }else{ 1593 better=a; 1594 } 1595 1596 // if((a.perfect && b.perfect) || (a.semiperfect && b.semiperfect)){ 1597 if(a.semiperfect && b.semiperfect){ 1598 if(a.start==b.start || a.stop==b.stop){ 1599 list.set(i, better); 1600 list.set(j, null); 1601 removed++; 1602 a=better; 1603 }else{ 1604 //retain both of them 1605 } 1606 }else if(a.perfect || b.perfect){ 1607 list.set(i, better); 1608 list.set(j, null); 1609 removed++; 1610 a=better; 1611 }else if(a.semiperfect || b.semiperfect){ 1612 if(a.start==b.start && a.stop==b.stop){ 1613 list.set(i, better); 1614 list.set(j, null); 1615 removed++; 1616 a=better; 1617 }else{ 1618 //retain both of them 1619 } 1620 }else if(subsumeInexact || (a.start==b.start && (subsumeIfOnlyStartMatches || a.stop==b.stop))){ 1621 assert(!a.semiperfect && !a.perfect && !b.semiperfect && !b.perfect); 1622 a.setLimits(min(a.start, b.start), max(a.stop, b.stop)); 1623 a.setSlowScore(max(a.slowScore, b.slowScore)); 1624 a.setPairedScore(a.pairedScore<=0 && b.pairedScore<=0 ? 0 : max(a.slowScore+1, a.pairedScore, b.pairedScore)); 1625 a.quickScore=max(a.quickScore, b.quickScore); 1626 a.setScore(max(a.score, b.score, a.pairedScore)); 1627 a.gaps=better.gaps;//Warning! Merging gaps would be better; this could cause out-of-bounds. 1628 //TODO: Test for a subsumption length limit. 1629 list.set(j, null); 1630 removed++; 1631 } 1632 } 1633 } 1634 } 1635 } 1636 } 1637 1638 // if(removed>0){condense(list);} 1639 if(removed>0){condenseStrict(list);} 1640 return removed; 1641 } 1642 1643 1644 1645 public static final int removeOverlappingSites(ArrayList<SiteScore> list, boolean requireAMatchingEnd){ 1646 if(list==null || list.size()<2){return 0;} 1647 Shared.sort(list, SiteScore.PCOMP); 1648 1649 int removed=0; 1650 1651 1652 for(int i=0; i<list.size(); i++){ 1653 SiteScore a=list.get(i); 1654 boolean overlappingA=true; 1655 if(a!=null){ 1656 for(int j=i+1; overlappingA && j<list.size(); j++){ 1657 SiteScore b=list.get(j); 1658 if(b!=null){ 1659 overlappingA=(a.chrom==b.chrom && b.start<a.stop && b.stop>a.start); 1660 if(overlappingA && a.strand==b.strand){ 1661 1662 SiteScore better=null; 1663 if(a.perfect!=b.perfect){ 1664 better=a.perfect ? a : b; 1665 }else if(a.score!=b.score){ 1666 better=(a.score>b.score ? a : b); 1667 }else if(a.slowScore!=b.slowScore){ 1668 better=(a.slowScore>b.slowScore ? a : b); 1669 }else if(a.pairedScore!=b.pairedScore){ 1670 better=(a.pairedScore>b.pairedScore ? a : b); 1671 }else if(a.pairedScore!=b.pairedScore){ 1672 better=(a.quickScore>b.quickScore ? a : b); 1673 }else{ 1674 better=a; 1675 } 1676 1677 if(a.start==b.start && a.stop==b.stop){ 1678 list.set(i, better); 1679 list.set(j, null); 1680 a=better; 1681 removed++; 1682 }else if(a.start==b.start || a.stop==b.stop){ //In this case they cannot both be perfect 1683 list.set(i, better); 1684 list.set(j, null); 1685 a=better; 1686 removed++; 1687 }else if(!requireAMatchingEnd && a.score!=b.score){ 1688 list.set(i, better); 1689 list.set(j, null); 1690 a=better; 1691 removed++; 1692 } 1693 } 1694 } 1695 } 1696 } 1697 } 1698 1699 // if(removed>0){condense(list);} 1700 if(removed>0){condenseStrict(list);} 1701 return removed; 1702 } 1703 1704 1705 1706 /** Returns the number of sitescores in the list within "thresh" of the top score. Assumes list is sorted descending. 1707 * This is used to determine whether a mapping is ambiguous. */ countTopScores(ArrayList<SiteScore> list, int thresh)1708 public static final int countTopScores(ArrayList<SiteScore> list, int thresh){ 1709 assert(thresh>=0) : thresh; 1710 if(list==null || list.isEmpty()){return 0;} 1711 int count=1; 1712 final SiteScore ss=list.get(0); 1713 final int limit=ss.score-thresh; 1714 1715 for(int i=1; i<list.size(); i++){ 1716 SiteScore ss2=list.get(i); 1717 if(ss2.score<limit){break;} 1718 if(ss.start!=ss2.start && ss.stop!=ss2.stop){ //Don't count mappings to the same location 1719 count++; 1720 } 1721 } 1722 return count; 1723 } 1724 1725 1726 1727 /** Assumes list is sorted by NON-PAIRED score. 1728 * Returns number removed. */ removeLowQualitySitesPaired(ArrayList<SiteScore> list, int maxSwScore, float multSingle, float multPaired)1729 public static final int removeLowQualitySitesPaired(ArrayList<SiteScore> list, int maxSwScore, float multSingle, float multPaired){ 1730 if(list==null || list.size()==0){return 0;} 1731 1732 assert(multSingle>=multPaired); 1733 1734 int initialSize=list.size(); 1735 final int swScoreThresh=(int)(maxSwScore*multSingle); //Change low-quality alignments to no-hits. 1736 final int swScoreThreshPaired=(int)(maxSwScore*multPaired); 1737 if(list.get(0).score<swScoreThreshPaired){list.clear(); return initialSize;} 1738 1739 for(int i=list.size()-1; i>=0; i--){ 1740 SiteScore ss=list.get(i); 1741 assert(ss.score==ss.slowScore) : ss.quickScore+", "+ss.slowScore+", "+ss.pairedScore+", "+ss.score+"\n"+ss; 1742 assert(i==0 || ss.slowScore<=list.get(i-1).slowScore) : "List is not sorted by singleton score!"; 1743 if(ss.pairedScore>0){ 1744 assert(ss.pairedScore>ss.quickScore || ss.pairedScore>ss.slowScore) : ss; 1745 if(ss.slowScore<swScoreThreshPaired){list.remove(i);} 1746 }else{ 1747 assert(ss.pairedScore<=0) : ss.toText(); 1748 if(ss.slowScore<swScoreThresh){list.remove(i);} 1749 } 1750 } 1751 1752 return initialSize-list.size(); 1753 } 1754 1755 1756 1757 // /** Assumes list is sorted by NON-PAIRED score. 1758 // * Returns number removed. */ 1759 // public static final int removeLowQualitySitesUnpaired(ArrayList<SiteScore> list, int maxSwScore, float multSingle){ 1760 // if(list==null || list.size()==0){return 0;} 1761 // 1762 // int initialSize=list.size(); 1763 // final int swScoreThresh=(int)(maxSwScore*multSingle); //Change low-quality alignments to no-hits. 1764 // if(list.get(0).score<swScoreThresh){list.clear(); return initialSize;} 1765 // 1766 //// for(int i=list.size()-1; i>=0; i--){ 1767 // for(int i=list.size()-1; i>1; i--){ 1768 // SiteScore ss=list.get(i); 1769 // assert(ss.score==ss.slowScore); 1770 // assert(i==0 || ss.slowScore<=list.get(i-1).slowScore) : "List is not sorted by singleton score!"; 1771 // assert(ss.pairedScore==0) : ss.toText(); 1772 // if(ss.slowScore<swScoreThresh){list.remove(i);} 1773 // } 1774 // 1775 // return initialSize-list.size(); 1776 // } 1777 1778 1779 /** Assumes list is sorted by NON-PAIRED score. 1780 * Returns number removed. */ removeLowQualitySitesUnpaired(ArrayList<SiteScore> list, int thresh)1781 public static final int removeLowQualitySitesUnpaired(ArrayList<SiteScore> list, int thresh){ 1782 if(list==null || list.size()==0){return 0;} 1783 1784 int initialSize=list.size(); 1785 if(list.get(0).score<thresh){list.clear(); return initialSize;} 1786 1787 // for(int i=list.size()-1; i>=0; i--){ 1788 for(int i=list.size()-1; i>1; i--){ 1789 SiteScore ss=list.get(i); 1790 assert(ss.score==ss.slowScore || (ss.score<=0 && ss.slowScore<=0)) : ss; 1791 assert(i==0 || ss.slowScore<=list.get(i-1).slowScore) : "List is not sorted by singleton score!"; 1792 assert(ss.pairedScore<=0) : ss.toText(); 1793 if(ss.slowScore<thresh){list.remove(i);} 1794 } 1795 1796 return initialSize-list.size(); 1797 } 1798 1799 1800 1801 /** Assumes list is sorted by NON-PAIRED score. 1802 * Returns number removed. */ removeLowQualitySitesPaired2(ArrayList<SiteScore> list, int maxSwScore, float multSingle, float multPaired, int expectedSites)1803 public static final int removeLowQualitySitesPaired2(ArrayList<SiteScore> list, int maxSwScore, float multSingle, float multPaired, int expectedSites){ 1804 if(list==null || list.size()==0){return 0;} 1805 1806 assert(multSingle>=multPaired); 1807 1808 int initialSize=list.size(); 1809 final int swScoreThresh=(int)(maxSwScore*multSingle); //Change low-quality alignments to no-hits. 1810 final int swScoreThreshPaired=(int)(maxSwScore*multPaired); 1811 final int swScoreThresh2=(int)(maxSwScore*multSingle*1.2f); 1812 final int swScoreThreshPaired2=(int)(maxSwScore*multPaired*1.1f); 1813 if(list.get(0).score<swScoreThreshPaired){list.clear(); return initialSize;} 1814 final int nthBest=list.get(Tools.min(list.size(), expectedSites)-1).score-maxSwScore/64; 1815 1816 for(int i=list.size()-1, min=expectedSites*2; i>min; i--){ 1817 if(list.get(i).slowScore>=nthBest){break;} 1818 list.remove(i); 1819 } 1820 1821 for(int i=list.size()-1; i>=0; i--){ 1822 SiteScore ss=list.get(i); 1823 assert(ss.score==ss.slowScore); 1824 assert(i==0 || ss.slowScore<=list.get(i-1).slowScore) : "List is not sorted by singleton score!"; 1825 if(ss.pairedScore>0){ 1826 int thresh=(i>=expectedSites ? swScoreThreshPaired2 : swScoreThreshPaired); 1827 assert(ss.pairedScore>ss.quickScore || ss.pairedScore>ss.slowScore) : ss; 1828 if(ss.slowScore<thresh){list.remove(i);} 1829 }else{ 1830 int thresh=(i>=expectedSites ? swScoreThresh2 : swScoreThresh); 1831 // assert(ss.pairedScore==0) : ss.toText(); //Disable in case of negative values 1832 if(ss.slowScore<thresh){list.remove(i);} 1833 } 1834 } 1835 1836 return initialSize-list.size(); 1837 } 1838 1839 1840 1841 /** Assumes list is sorted by NON-PAIRED score. 1842 * Returns number removed. 1843 * This has a couple of changes (like potentially removing the second-best site) that make it applicable to SKIMMER not MAPPER. 1844 * */ removeLowQualitySitesUnpaired2(ArrayList<SiteScore> list, int maxSwScore, float multSingle, int expectedSites)1845 public static final int removeLowQualitySitesUnpaired2(ArrayList<SiteScore> list, int maxSwScore, float multSingle, int expectedSites){ 1846 if(list==null || list.size()==0){return 0;} 1847 1848 for(int i=expectedSites/2; i<list.size(); i++){ 1849 if(list.get(i).perfect){expectedSites++;} 1850 } 1851 1852 int initialSize=list.size(); 1853 final int swScoreThresh=(int)(maxSwScore*multSingle); //Change low-quality alignments to no-hits. 1854 final int swScoreThresh2=(int)(maxSwScore*multSingle*1.2f); //Change low-quality alignments to no-hits. 1855 if(list.get(0).score<swScoreThresh){list.clear(); return initialSize;} 1856 final int nthBest=list.get(Tools.min(list.size(), expectedSites)-1).score-maxSwScore/64; 1857 1858 for(int i=list.size()-1, min=expectedSites*2; i>min; i--){ 1859 if(list.get(i).slowScore>=nthBest){break;} 1860 list.remove(i); 1861 } 1862 1863 // for(int i=list.size()-1; i>=0; i--){ 1864 for(int i=list.size()-1; i>=1; i--){ 1865 SiteScore ss=list.get(i); 1866 assert(ss.score==ss.slowScore); 1867 assert(i==0 || ss.slowScore<=list.get(i-1).slowScore) : "List is not sorted by singleton score!"; 1868 assert(ss.pairedScore<=0) : ss.toText(); //This was "==0" but that makes the assertion fire for negative values. 1869 int thresh=(i>=expectedSites ? swScoreThresh2 : swScoreThresh); 1870 if(ss.slowScore<thresh){list.remove(i);} 1871 } 1872 1873 return initialSize-list.size(); 1874 } 1875 1876 1877 // public static final void trimSitesBelowCutoff(ArrayList<SiteScore> ssl, int cutoff, boolean retainPaired){ 1878 // trimSitesBelowCutoff(ssl, cutoff, retainPaired, 1); 1879 // } 1880 1881 1882 // public static final void trimSitesBelowCutoff(ArrayList<SiteScore> ssl, int cutoff, boolean retainPaired, int minSitesToRetain){ 1883 //// assert(false); 1884 // assert(minSitesToRetain>=1); 1885 // if(ssl==null || ssl.size()<minSitesToRetain){return;} 1886 // 1887 // ArrayList<SiteScore> ssl2=new ArrayList<SiteScore>(ssl.size()); 1888 // for(SiteScore ss : ssl){ 1889 // if(ss.score>=cutoff || (retainPaired && ss.pairedScore>0)){ 1890 // ssl2.add(ss); 1891 // } 1892 // } 1893 // 1894 //// Shared.sort(ssl2); 1895 //// System.err.println("Cutoff: "+cutoff); 1896 //// for(SiteScore ss : ssl2){ 1897 //// System.err.print("("+ss.chrom+", "+ss.score+"), "); 1898 //// } 1899 //// System.err.println(); 1900 // 1901 // if(ssl2.size()==ssl.size()){return;} 1902 //// System.err.println("cutoff: "+cutoff+",\tsize: "+ssl.size()+" -> "+ssl2.size()); 1903 // ssl.clear(); 1904 // ssl.addAll(ssl2); 1905 // } 1906 1907 trimSitesBelowCutoff(ArrayList<SiteScore> ssl, int cutoff, boolean retainPaired, boolean retainSemiperfect, int minSitesToRetain, int maxSitesToRetain)1908 public static final void trimSitesBelowCutoff(ArrayList<SiteScore> ssl, int cutoff, boolean retainPaired, boolean retainSemiperfect, 1909 int minSitesToRetain, int maxSitesToRetain){ 1910 // assert(false); 1911 assert(minSitesToRetain>=1); 1912 assert(maxSitesToRetain>minSitesToRetain) : maxSitesToRetain+", "+minSitesToRetain+"\nError - maxsites2 must be greater than "+minSitesToRetain+"!"; 1913 if(ssl==null || ssl.size()<=minSitesToRetain){return;} 1914 while(ssl.size()>maxSitesToRetain){ssl.remove(ssl.size()-1);} 1915 1916 int removed=0; 1917 final int maxToRemove=ssl.size()-minSitesToRetain; 1918 1919 assert(minSitesToRetain==1 || inOrder(ssl)); 1920 1921 if(retainPaired){ 1922 for(int i=ssl.size()-1; i>=0; i--){ 1923 SiteScore ss=ssl.get(i); 1924 if(!retainSemiperfect || !ss.semiperfect){ 1925 if(ss.score<cutoff && ss.pairedScore<=0){ 1926 ssl.set(i, null); 1927 removed++; 1928 if(removed>=maxToRemove){ 1929 assert(removed==maxToRemove); 1930 break; 1931 } 1932 } 1933 } 1934 } 1935 }else{ 1936 for(int i=ssl.size()-1; i>=0; i--){ 1937 SiteScore ss=ssl.get(i); 1938 if(!retainSemiperfect || !ss.semiperfect){ 1939 if(ss.score<cutoff){ 1940 ssl.set(i, null); 1941 removed++; 1942 if(removed>=maxToRemove){ 1943 assert(removed==maxToRemove); 1944 break; 1945 } 1946 } 1947 } 1948 } 1949 } 1950 1951 if(removed>0){ 1952 condenseStrict(ssl); 1953 } 1954 assert(ssl.size()>=minSitesToRetain); 1955 } 1956 1957 //Messes up order 1958 // public static final void trimSitesBelowCutoffInplace(ArrayList<SiteScore> ssl, int cutoff, boolean retainPaired){ 1959 //// assert(false); 1960 // if(ssl==null || ssl.size()<2){return;} 1961 // 1962 // for(int i=0; i<ssl.size(); i++){ 1963 // SiteScore ss=ssl.get(i); 1964 // if(ss.score<cutoff && (!retainPaired || ss.pairedScore==0)){ 1965 // SiteScore temp=ssl.remove(ssl.size()-1); 1966 // if(i<ssl.size()){ 1967 // ssl.set(i, temp); 1968 // i--; 1969 // } 1970 // } 1971 // } 1972 // } 1973 toStringSafe(byte[] array)1974 public static CharSequence toStringSafe(byte[] array){ 1975 if(array==null){return "null";} 1976 StringBuilder sb=new StringBuilder(); 1977 sb.append(Arrays.toString(array)); 1978 if(array.length<1 || array[0]<32 || array[0]>126){return sb;} 1979 sb.append('\n'); 1980 for(int i=0; i<array.length; i++){ 1981 byte b=array[i]; 1982 if(b<32 || b>126){break;} 1983 sb.append((char)b); 1984 } 1985 return sb; 1986 } 1987 equals(long[] a, long[] b)1988 public static boolean equals(long[] a, long[] b){ 1989 if(a==b){return true;} 1990 if(a==null || b==null){return false;} 1991 if(a.length!=b.length){return false;} 1992 for(int i=0; i<a.length; i++){ 1993 if(a[i]!=b[i]){return false;} 1994 } 1995 return true; 1996 } 1997 equals(int[] a, int[] b)1998 public static boolean equals(int[] a, int[] b){ 1999 if(a==b){return true;} 2000 if(a==null || b==null){return false;} 2001 if(a.length!=b.length){return false;} 2002 for(int i=0; i<a.length; i++){ 2003 if(a[i]!=b[i]){return false;} 2004 } 2005 return true; 2006 } 2007 equals(byte[] a, byte[] b)2008 public static boolean equals(byte[] a, byte[] b){ 2009 if(a==b){return true;} 2010 if(a==null || b==null){return false;} 2011 if(a.length!=b.length){return false;} 2012 for(int i=0; i<a.length; i++){ 2013 if(a[i]!=b[i]){return false;} 2014 } 2015 return true; 2016 } 2017 equals(String a, byte[] b)2018 public static boolean equals(String a, byte[] b){ 2019 if(a==null || b==null){ 2020 return (a==null && b==null); 2021 } 2022 if(a.length()!=b.length){return false;} 2023 for(int i=0; i<b.length; i++){ 2024 if(a.charAt(i)!=b[i]){return false;} 2025 } 2026 return true; 2027 } 2028 2029 /** 2030 * @param a 2031 * @param b 2032 * @param start 2033 * @return True if a contains b starting at start. 2034 */ contains(byte[] a, byte[] b, int start)2035 public static boolean contains(byte[] a, byte[] b, int start){ 2036 if(a==null || b==null){ 2037 return (a==null && b==null); 2038 } 2039 if(a.length<b.length+start){return false;} 2040 for(int i=start, j=0; j<b.length; i++, j++){ 2041 if(a[i]!=b[j]){return false;} 2042 } 2043 return true; 2044 } 2045 2046 /** 2047 * @param a 2048 * @param b 2049 * @param start 2050 * @return True if a contains b starting at start. 2051 */ contains(String a, String b, int start)2052 public static boolean contains(String a, String b, int start){ 2053 if(a==null || b==null){ 2054 return (a==null && b==null); 2055 } 2056 if(a.length()<b.length()+start){return false;} 2057 for(int i=start, j=0; j<b.length(); i++, j++){ 2058 if(a.charAt(i)!=b.charAt(j)){return false;} 2059 } 2060 return true; 2061 } 2062 2063 /** 2064 * @param array 2065 * @param s 2066 * @return True if the array starts with the String. 2067 */ startsWith(byte[] array, String s)2068 public static boolean startsWith(byte[] array, String s) { 2069 return startsWith(array, s, 0); 2070 } 2071 equals(byte[] array, String s)2072 public static boolean equals(byte[] array, String s) { 2073 return array.length==s.length() && startsWith(array, s, 0); 2074 } 2075 2076 /** 2077 * @param array 2078 * @param s 2079 * @return True if the array starts with s. 2080 */ startsWith(byte[] array, byte[] s)2081 public static boolean startsWith(byte[] array, byte[] s) { 2082 return startsWith(array, s, 0); 2083 } 2084 2085 /** 2086 * @param array 2087 * @param s 2088 * @return True if the array starts with the String. 2089 */ endsWith(byte[] array, String s)2090 public static boolean endsWith(byte[] array, String s) { 2091 if(s==null || array==null){return false;} 2092 if(s.length()>array.length){return false;} 2093 for(int i=s.length()-1, j=array.length-1; i>=0 && j>=0; i--, j--){ 2094 if(s.charAt(i)!=array[j]){return false;} 2095 } 2096 return true; 2097 } 2098 2099 /** 2100 * @param array 2101 * @param s 2102 * @return True if the array starts with the String. 2103 */ startsWith(byte[] array, String s, int initialPos)2104 public static boolean startsWith(byte[] array, String s, int initialPos) { 2105 if(array==null || s==null || array.length+initialPos<s.length()){return false;} 2106 for(int i=initialPos; i<s.length(); i++){ 2107 if(array[i]!=s.charAt(i)){return false;} 2108 } 2109 return true; 2110 } 2111 2112 /** 2113 * @param array 2114 * @param s 2115 * @return True if the array starts with the String. 2116 */ startsWith(byte[] array, byte[] s, int initialPos)2117 public static boolean startsWith(byte[] array, byte[] s, int initialPos) { 2118 if(array==null || s==null || array.length+initialPos<s.length){return false;} 2119 for(int i=initialPos; i<s.length; i++){ 2120 if(array[i]!=s[i]){return false;} 2121 } 2122 return true; 2123 } 2124 compare(byte[] a, byte[] b)2125 public static int compare(byte[] a, byte[] b){ 2126 if(a==b){return 0;} 2127 if(a==null){return -1;} 2128 if(b==null){return 1;} 2129 int lim=min(a.length, b.length); 2130 for(int i=0; i<lim; i++){ 2131 if(a[i]!=b[i]){return a[i]-b[i];} 2132 } 2133 return a.length-b.length; 2134 } 2135 fill(long[][][] matrix, int x)2136 public static void fill(long[][][] matrix, int x) { 2137 for(long[][] sub : matrix){ 2138 fill(sub, x); 2139 } 2140 } 2141 fill(long[][] matrix, int x)2142 public static void fill(long[][] matrix, int x) { 2143 for(long[] sub : matrix){ 2144 Arrays.fill(sub, x); 2145 } 2146 } 2147 sumInt(byte[] array)2148 public static int sumInt(byte[] array){ 2149 long x=0; 2150 for(byte y : array){x+=y;} 2151 assert(x<=Integer.MAX_VALUE && x>=Integer.MIN_VALUE) : x; 2152 return (int)x; 2153 } 2154 multiplyBy(int[] array, double mult)2155 public static void multiplyBy(int[] array, double mult) { 2156 for(int i=0; i<array.length; i++){ 2157 array[i]=(int)Math.round(array[i]+mult); 2158 } 2159 } 2160 multiplyBy(long[] array, double mult)2161 public static void multiplyBy(long[] array, double mult) { 2162 for(int i=0; i<array.length; i++){ 2163 array[i]=Math.round(array[i]+mult); 2164 } 2165 } 2166 multiplyBy(long[][] matrix, double mult)2167 public static void multiplyBy(long[][] matrix, double mult) { 2168 for(long[] array : matrix){ 2169 multiplyBy(array, mult); 2170 } 2171 } 2172 multiplyBy(long[][][] matrix, double mult)2173 public static void multiplyBy(long[][][] matrix, double mult) { 2174 for(long[][] array : matrix){ 2175 multiplyBy(array, mult); 2176 } 2177 } 2178 add(int[] array, int[] incr)2179 public static void add(int[] array, int[] incr) { 2180 for(int i=0; i<array.length; i++){ 2181 array[i]+=incr[i]; 2182 } 2183 } 2184 add(long[] array, long[] incr)2185 public static void add(long[] array, long[] incr) { 2186 for(int i=0; i<array.length; i++){ 2187 array[i]+=incr[i]; 2188 } 2189 } 2190 add(long[][] array, long[][] incr)2191 public static void add(long[][] array, long[][] incr) { 2192 for(int i=0; i<array.length; i++){ 2193 add(array[i], incr[i]); 2194 } 2195 } 2196 add(long[][][] array, long[][][] incr)2197 public static void add(long[][][] array, long[][][] incr) { 2198 for(int i=0; i<array.length; i++){ 2199 add(array[i], incr[i]); 2200 } 2201 } 2202 add(double[] array, double[] incr)2203 public static void add(double[] array, double[] incr) { 2204 for(int i=0; i<array.length; i++){ 2205 array[i]+=incr[i]; 2206 } 2207 } 2208 sum(byte[] array)2209 public static long sum(byte[] array){ 2210 if(array==null){return 0;} 2211 long x=0; 2212 for(byte y : array){x+=y;} 2213 return x; 2214 } 2215 sum(char[] array)2216 public static long sum(char[] array){ 2217 long x=0; 2218 for(char y : array){x+=y;} 2219 return x; 2220 } 2221 sum(short[] array)2222 public static long sum(short[] array){ 2223 long x=0; 2224 for(short y : array){x+=y;} 2225 return x; 2226 } 2227 cardinality(short[] array)2228 public static int cardinality(short[] array){ 2229 int x=0; 2230 for(int y : array){if(y!=0){x++;}} 2231 return x; 2232 } 2233 sum(int[] array)2234 public static long sum(int[] array){ 2235 long x=0; 2236 for(int y : array){x+=y;} 2237 return x; 2238 } 2239 sum(double[] array)2240 public static double sum(double[] array){ 2241 double x=0; 2242 for(double y : array){x+=y;} 2243 return x; 2244 } 2245 mean(int[] array)2246 public static double mean(int[] array){ 2247 return sum(array)/(double)array.length; 2248 } 2249 mean(long[] array)2250 public static double mean(long[] array){ 2251 return sum(array)/(double)array.length; 2252 } 2253 harmonicMean(int[] array)2254 public static double harmonicMean(int[] array){ 2255 double sum=0; 2256 for(int x : array){ 2257 if(x>0){sum+=1.0/x;} 2258 } 2259 return array.length/sum; 2260 } 2261 cardinality(int[] array)2262 public static int cardinality(int[] array){ 2263 int x=0; 2264 for(int y : array){if(y!=0){x++;}} 2265 return x; 2266 } 2267 weightedAverage(long[] array)2268 public static double weightedAverage(long[] array){ 2269 if(array.length<2){ 2270 return array.length==1 ? array[0] : 0; 2271 } 2272 double wsum=0; 2273 long div=0; 2274 final int mid=array.length/2; 2275 for(int i=0; i<mid; i++){ 2276 wsum+=(i+1)*(array[i]+array[array.length-i-1]); 2277 div+=(i+1)*2; 2278 } 2279 if((array.length&1)==1){ 2280 wsum+=(mid+1)*array[mid]; 2281 div+=(mid+1); 2282 } 2283 return wsum/div; 2284 } 2285 sum(int[] array, int from, int to)2286 public static long sum(int[] array, int from, int to){ 2287 long x=0; 2288 for(int i=from; i<=to; i++){x+=array[i];} 2289 return x; 2290 } 2291 sum(long[] array)2292 public static long sum(long[] array){ 2293 long x=0; 2294 for(long y : array){x+=y;} 2295 return x; 2296 } 2297 sum(long[] array, int from, int to)2298 public static long sum(long[] array, int from, int to){ 2299 long x=0; 2300 for(int i=from; i<=to; i++){x+=array[i];} 2301 return x; 2302 } 2303 sumHistogram(long[] array)2304 public static long sumHistogram(long[] array){ 2305 long x=0; 2306 for(int i=1; i<array.length; i++){ 2307 x+=(i*array[i]); 2308 } 2309 return x; 2310 } 2311 minHistogram(long[] array)2312 public static long minHistogram(long[] array){ 2313 for(int i=0; i<array.length; i++){ 2314 if(array[i]>0){return i;} 2315 } 2316 return 0; 2317 } 2318 maxHistogram(long[] array)2319 public static long maxHistogram(long[] array){ 2320 for(int i=array.length-1; i>=0; i--){ 2321 if(array[i]>0){return i;} 2322 } 2323 return 0; 2324 } 2325 sum(AtomicIntegerArray array)2326 public static long sum(AtomicIntegerArray array){ 2327 long x=0; 2328 for(int i=0; i<array.length(); i++){x+=array.get(i);} 2329 return x; 2330 } 2331 sum(AtomicLongArray array)2332 public static long sum(AtomicLongArray array){ 2333 long x=0; 2334 for(int i=0; i<array.length(); i++){x+=array.get(i);} 2335 return x; 2336 } 2337 toArray(AtomicLongArray array)2338 public static long[] toArray(AtomicLongArray array){ 2339 long[] x=new long[array.length()]; 2340 for(int i=0; i<array.length(); i++){x[i]=array.get(i);} 2341 return x; 2342 } 2343 toArray(CoverageArray array)2344 public static long[] toArray(CoverageArray array){ 2345 long[] x=new long[array.maxIndex+1]; 2346 for(int i=0; i<=array.maxIndex; i++){x[i]=array.get(i);} 2347 return x; 2348 } 2349 min(int[] array)2350 public static int min(int[] array){ 2351 int min=Integer.MAX_VALUE; 2352 for(int y : array){if(y<min){min=y;}} 2353 return min; 2354 } 2355 min(byte[] array)2356 public static byte min(byte[] array){ 2357 byte min=Byte.MAX_VALUE; 2358 for(byte y : array){if(y<min){min=y;}} 2359 return min; 2360 } 2361 intSum(int[] array)2362 public static int intSum(int[] array){ 2363 int x=0; 2364 for(int y : array){x+=y;} 2365 return x; 2366 } 2367 reverseInPlace(final byte[] array)2368 public static void reverseInPlace(final byte[] array){ 2369 if(array==null){return;} 2370 final int max=array.length/2, last=array.length-1; 2371 for(int i=0; i<max; i++){ 2372 byte temp=array[i]; 2373 array[i]=array[last-i]; 2374 array[last-i]=temp; 2375 } 2376 } 2377 reverseInPlace(final char[] array)2378 public static void reverseInPlace(final char[] array){ 2379 if(array==null){return;} 2380 final int max=array.length/2, last=array.length-1; 2381 for(int i=0; i<max; i++){ 2382 char temp=array[i]; 2383 array[i]=array[last-i]; 2384 array[last-i]=temp; 2385 } 2386 } 2387 reverseInPlace(final int[] array)2388 public static void reverseInPlace(final int[] array){ 2389 if(array==null){return;} 2390 reverseInPlace(array, 0, array.length); 2391 } 2392 reverseInPlace(final long[] array)2393 public static void reverseInPlace(final long[] array){ 2394 if(array==null){return;} 2395 reverseInPlace(array, 0, array.length); 2396 } 2397 reverseInPlace(final float[] array)2398 public static void reverseInPlace(final float[] array){ 2399 if(array==null){return;} 2400 reverseInPlace(array, 0, array.length); 2401 } 2402 reverseInPlace(final AtomicIntegerArray array)2403 public static void reverseInPlace(final AtomicIntegerArray array){ 2404 if(array==null){return;} 2405 reverseInPlace(array, 0, array.length()); 2406 } 2407 reverseInPlace(final X[] array)2408 public static <X> void reverseInPlace(final X[] array){ 2409 if(array==null){return;} 2410 reverseInPlace(array, 0, array.length); 2411 } 2412 reverseInPlace(final X[] array, final int from, final int to)2413 public static <X> void reverseInPlace(final X[] array, final int from, final int to){ 2414 if(array==null){return;} 2415 final int len=to-from; 2416 final int max=from+len/2, last=to-1; 2417 for(int i=from; i<max; i++){ 2418 X temp=array[i]; 2419 array[i]=array[last-i]; 2420 array[last-i]=temp; 2421 } 2422 } 2423 reverseInPlace(final byte[] array, final int from, final int to)2424 public static void reverseInPlace(final byte[] array, final int from, final int to){ 2425 if(array==null){return;} 2426 final int len=to-from; 2427 final int max=from+len/2, last=to-1; 2428 for(int i=from; i<max; i++){ 2429 byte temp=array[i]; 2430 array[i]=array[last-i]; 2431 array[last-i]=temp; 2432 } 2433 } 2434 reverseInPlace(final int[] array, final int from, final int to)2435 public static void reverseInPlace(final int[] array, final int from, final int to){ 2436 if(array==null){return;} 2437 final int len=to-from; 2438 final int max=from+len/2, last=to-1; 2439 for(int i=from; i<max; i++){ 2440 int temp=array[i]; 2441 array[i]=array[last-i]; 2442 array[last-i]=temp; 2443 } 2444 } 2445 reverseInPlace(final long[] array, final int from, final int to)2446 public static void reverseInPlace(final long[] array, final int from, final int to){ 2447 if(array==null){return;} 2448 final int len=to-from; 2449 final int max=from+len/2, last=to-1; 2450 for(int i=from; i<max; i++){ 2451 long temp=array[i]; 2452 array[i]=array[last-i]; 2453 array[last-i]=temp; 2454 } 2455 } 2456 reverseInPlace(final float[] array, final int from, final int to)2457 public static void reverseInPlace(final float[] array, final int from, final int to){ 2458 if(array==null){return;} 2459 final int len=to-from; 2460 final int max=from+len/2, last=to-1; 2461 for(int i=from; i<max; i++){ 2462 float temp=array[i]; 2463 array[i]=array[last-i]; 2464 array[last-i]=temp; 2465 } 2466 } 2467 reverseInPlace(final double[] array, final int from, final int to)2468 public static void reverseInPlace(final double[] array, final int from, final int to){ 2469 if(array==null){return;} 2470 final int len=to-from; 2471 final int max=from+len/2, last=to-1; 2472 for(int i=from; i<max; i++){ 2473 double temp=array[i]; 2474 array[i]=array[last-i]; 2475 array[last-i]=temp; 2476 } 2477 } 2478 reverseInPlace(final AtomicIntegerArray array, final int from, final int to)2479 public static void reverseInPlace(final AtomicIntegerArray array, final int from, final int to){ 2480 if(array==null){return;} 2481 final int len=to-from; 2482 final int max=from+len/2, last=to-1; 2483 for(int i=from; i<max; i++){ 2484 int temp=array.get(i); 2485 array.set(i, array.get(last-i)); 2486 array.set(last-i, temp); 2487 } 2488 } 2489 reverseAndCopy(final byte[] array)2490 public static byte[] reverseAndCopy(final byte[] array){ 2491 // if(array==null){return null;} 2492 // byte[] copy=Arrays.copyOf(array, array.length); 2493 // reverseInPlace(copy); 2494 // return copy; 2495 return reverseAndCopy(array, null); 2496 } 2497 reverseAndCopy(final int[] array)2498 public static int[] reverseAndCopy(final int[] array){ 2499 // if(array==null){return null;} 2500 // int[] copy=Arrays.copyOf(array, array.length); 2501 // reverseInPlace(copy); 2502 // return copy; 2503 return reverseAndCopy(array, null); 2504 } 2505 reverseAndCopy(final byte[] array, byte[] out)2506 public static byte[] reverseAndCopy(final byte[] array, byte[] out){ 2507 if(array==null){ 2508 assert(out==null); 2509 return null; 2510 } 2511 if(out==null){out=new byte[array.length];} 2512 assert(array.length==out.length && array!=out); 2513 for(int i=0, last=array.length-1; i<array.length; i++){out[i]=array[last-i];} 2514 return out; 2515 } 2516 reverseAndCopy(final int[] array, int[] out)2517 public static int[] reverseAndCopy(final int[] array, int[] out){ 2518 if(array==null){ 2519 assert(out==null); 2520 return null; 2521 } 2522 if(out==null){out=new int[array.length];} 2523 assert(array.length==out.length && array!=out); 2524 for(int i=0, last=array.length-1; i<array.length; i++){out[i]=array[last-i];} 2525 return out; 2526 } 2527 cullHighFreqEntries(int[][] data, float fractionToExclude)2528 public static void cullHighFreqEntries(int[][] data, float fractionToExclude){ 2529 if(fractionToExclude<=0){return;} 2530 int[] count=new int[data.length]; 2531 2532 long numBases=0; 2533 2534 for(int i=0; i<data.length; i++){ 2535 count[i]=(data[i]==null ? 0 : data[i].length); 2536 numBases+=count[i]; 2537 } 2538 2539 int numIndicesToRemove=((int)(numBases*fractionToExclude)); 2540 2541 Arrays.sort(count); 2542 2543 for(int i=1; i<count.length; i++){ 2544 assert(count[i]>=count[i-1]) : "\n\ncount["+i+"]="+count[i]+"\ncount["+(i-1)+"]="+count[i-1]+"\n"; 2545 } 2546 2547 int pos=count.length-1; 2548 for(int sum=0; pos>1 && sum<numIndicesToRemove; pos--){ 2549 sum+=count[pos]; 2550 } 2551 int maxLengthToKeep2=count[pos]; 2552 2553 for(int i=0; i<data.length; i++){ 2554 if(data[i]!=null && data[i].length>maxLengthToKeep2){data[i]=null;} 2555 } 2556 } 2557 findLimitForHighFreqEntries(int[][] data, float fractionToExclude)2558 public static int findLimitForHighFreqEntries(int[][] data, float fractionToExclude){ 2559 if(fractionToExclude<=0){return Integer.MAX_VALUE;} 2560 int[] count=new int[data.length]; 2561 2562 long numBases=0; 2563 2564 for(int i=0; i<data.length; i++){ 2565 count[i]=(data[i]==null ? 0 : data[i].length); 2566 numBases+=count[i]; 2567 } 2568 2569 int numIndicesToRemove=((int)(numBases*fractionToExclude)); 2570 2571 Arrays.sort(count); 2572 2573 for(int i=1; i<count.length; i++){ 2574 assert(count[i]>=count[i-1]) : "\n\ncount["+i+"]="+count[i]+"\ncount["+(i-1)+"]="+count[i-1]+"\n"; 2575 } 2576 2577 int pos=count.length-1; 2578 for(int sum=0; pos>1 && sum<numIndicesToRemove; pos--){ 2579 sum+=count[pos]; 2580 } 2581 int maxLengthToKeep2=count[pos]; 2582 2583 return maxLengthToKeep2; 2584 } 2585 cullClumpyEntries(final int[][] data, final int maxDist, final int minLength, final float fraction)2586 public static void cullClumpyEntries(final int[][] data, final int maxDist, final int minLength, final float fraction){ 2587 2588 long total=0; 2589 long removedSites=0; 2590 long removedKeys=0; 2591 2592 if(maxDist<=0){return;} 2593 for(int i=0; i<data.length; i++){ 2594 int[] array=data[i]; 2595 total+=(array==null ? 0 : array.length); 2596 if(array!=null && array.length>=minLength){ 2597 if(isClumpy(array, maxDist, fraction)){ 2598 removedSites+=array.length; 2599 removedKeys++; 2600 data[i]=null; 2601 } 2602 } 2603 } 2604 2605 // System.err.println("Removed\t"+removedSites+"\t/ "+total+"\tsites," + 2606 // " or "+String.format(Locale.ROOT, "%.4f", (removedSites*100f/total))+"%"); 2607 // System.err.println("Removed\t"+removedKeys+"\t/ "+data.length+"\tkeys," + 2608 // " or "+String.format(Locale.ROOT, "%.4f", (removedKeys*100f/data.length))+"%"); 2609 2610 } 2611 banClumpyEntries(final int[][] data, final int maxDist, final int minLength, final float fraction)2612 public static HashSet<Integer> banClumpyEntries(final int[][] data, final int maxDist, final int minLength, final float fraction){ 2613 2614 HashSet<Integer> set=new HashSet<Integer>(128); 2615 2616 long total=0; 2617 long removedSites=0; 2618 long removedKeys=0; 2619 2620 if(maxDist<=0){return set;} 2621 2622 for(int i=0; i<data.length; i++){ 2623 int[] array=data[i]; 2624 total+=(array==null ? 0 : array.length); 2625 if(array!=null && array.length>=minLength){ 2626 if(isClumpy(array, maxDist, fraction)){ 2627 removedSites+=array.length; 2628 removedKeys++; 2629 set.add(i); 2630 } 2631 } 2632 } 2633 2634 // System.err.println("Banned\t"+removedSites+"\t/ "+total+"\tsites," + 2635 // " or "+String.format(Locale.ROOT, "%.4f", (removedSites*100f/total))+"%"); 2636 // System.err.println("Banned\t"+removedKeys+"\t/ "+data.length+"\tkeys," + 2637 // " or "+String.format(Locale.ROOT, "%.4f", (removedKeys*100f/data.length))+"%"); 2638 2639 return set; 2640 2641 } 2642 isClumpy(final int[] array, final int maxDist, final float fraction)2643 public static final boolean isClumpy(final int[] array, final int maxDist, final float fraction){ 2644 if(array==null){return false;} 2645 int count=0; 2646 for(int i=1; i<array.length; i++){ 2647 int dif=array[i]-array[i-1]; 2648 if(dif<=maxDist){count++;} 2649 } 2650 return count>=(array.length*fraction); 2651 } 2652 makeLengthHistogram(int[][] x, int buckets)2653 public static int[] makeLengthHistogram(int[][] x, int buckets) { 2654 int[] lengths=new int[x.length]; 2655 long total=0; 2656 for(int i=0; i<x.length; i++){ 2657 int[] list=x[i]; 2658 if(list!=null){ 2659 lengths[i]=list.length; 2660 total+=list.length; 2661 } 2662 } 2663 Arrays.sort(lengths); 2664 2665 int[] hist=new int[buckets+1]; 2666 2667 long sum=0; 2668 int ptr=0; 2669 for(int i=0; i<buckets; i++){ 2670 long nextLimit=((total*i)+buckets/2)/buckets; 2671 while(ptr<lengths.length && sum<nextLimit){ 2672 sum+=lengths[ptr]; 2673 ptr++; 2674 } 2675 2676 hist[i]=lengths[Tools.max(0, ptr-1)]; 2677 } 2678 hist[hist.length-1]=lengths[lengths.length-1]; 2679 2680 // System.out.println(Arrays.toString(hist)); 2681 // assert(false); 2682 return hist; 2683 } 2684 toKMG(long x)2685 public static String toKMG(long x){ 2686 double div=1; 2687 String ext=""; 2688 if(x>10000000000000L){ 2689 div=1000000000000L; 2690 ext="T"; 2691 }else if(x>10000000000L){ 2692 div=1000000000L; 2693 ext="B"; 2694 }else if(x>10000000){ 2695 div=1000000; 2696 ext="M"; 2697 }else if(x>100000){ 2698 div=1000; 2699 ext="K"; 2700 } 2701 return String.format(Locale.ROOT, "%.2f", x/div)+ext; 2702 } 2703 2704 /** Replace characters in the array with different characters according to the map */ remapAndCount(byte[] remap, byte[] array)2705 public static int remapAndCount(byte[] remap, byte[] array) { 2706 if(array==null){return 0;} 2707 assert(remap!=null); 2708 int swaps=0; 2709 for(int i=0; i<array.length; i++){ 2710 byte a=array[i]; 2711 byte b=remap[a]; 2712 if(a!=b){ 2713 array[i]=b; 2714 swaps++; 2715 } 2716 } 2717 return swaps; 2718 } 2719 2720 /** Replace characters in a string with different characters according to the map */ remap(byte[] remap, String in)2721 public static String remap(byte[] remap, String in) { 2722 if(in==null){return in;} 2723 byte[] array=in.getBytes(); 2724 int x=remapAndCount(remap, array); 2725 return (x>0 ? new String(array) : in); 2726 } 2727 makeHistogram(AtomicLongArray data, int buckets)2728 public static int[] makeHistogram(AtomicLongArray data, int buckets) { 2729 long total=sum(data); 2730 long increment=total/(buckets+1); 2731 2732 int[] hist=new int[buckets+1]; 2733 2734 long sum=0; 2735 long target=increment/2; 2736 int ptr=0; 2737 for(int i=0; i<hist.length; i++){ 2738 while(ptr<data.length() && sum<target){ 2739 sum+=data.get(ptr); 2740 ptr++; 2741 } 2742 hist[i]=ptr; 2743 target+=increment; 2744 } 2745 return hist; 2746 } 2747 2748 /** TODO: This (temporarily) uses a lot of memory. Could be reduced by making an array of length max(x) and counting occurrences. */ makeLengthHistogram2(int[] x, int buckets, boolean verbose)2749 public static int[] makeLengthHistogram2(int[] x, int buckets, boolean verbose) { 2750 int[] lengths=KillSwitch.copyOf(x, x.length); 2751 long total=sum(x); 2752 Shared.sort(lengths); 2753 2754 if(verbose){ 2755 System.out.println("Length array size:\t"+x.length); 2756 System.out.println("Min value: \t"+lengths[0]); 2757 System.out.println("Med value: \t"+lengths[lengths.length/2]); 2758 System.out.println("Max value: \t"+lengths[lengths.length-1]); 2759 System.out.println("Total: \t"+total); 2760 } 2761 2762 int[] hist=new int[buckets+1]; 2763 2764 long sum=0; 2765 int ptr=0; 2766 for(int i=0; i<buckets; i++){ 2767 long nextLimit=((total*i)+buckets/2)/buckets; 2768 while(ptr<lengths.length && sum<nextLimit){ 2769 sum+=lengths[ptr]; 2770 ptr++; 2771 } 2772 2773 hist[i]=lengths[Tools.max(0, ptr-1)]; 2774 } 2775 hist[hist.length-1]=lengths[lengths.length-1]; 2776 2777 // System.out.println(Arrays.toString(hist)); 2778 // assert(false); 2779 return hist; 2780 } 2781 makeLengthHistogram3(int[] x, int buckets, boolean verbose)2782 public static int[] makeLengthHistogram3(int[] x, int buckets, boolean verbose) { 2783 int max=max(x); 2784 if(max>x.length){ 2785 Data.sysout.println("Reverted to old histogram mode."); 2786 return makeLengthHistogram2(x, buckets, verbose); 2787 } 2788 2789 int[] counts=new int[max+1]; 2790 long total=0; 2791 for(int i=0; i<x.length; i++){ 2792 int a=x[i]; 2793 if(a>=0){ 2794 counts[a]++; 2795 total+=a; 2796 } 2797 } 2798 2799 return makeLengthHistogram4(counts, buckets, total, verbose); 2800 } 2801 2802 /** Uses counts of occurrences of lengths rather than raw lengths */ makeLengthHistogram4(int[] counts, int buckets, long total, boolean verbose)2803 public static int[] makeLengthHistogram4(int[] counts, int buckets, long total, boolean verbose) { 2804 if(total<=0){ 2805 total=0; 2806 for(int i=1; i<counts.length; i++){ 2807 total+=(i*counts[i]); 2808 } 2809 } 2810 2811 if(verbose){ 2812 // System.out.println("Length array size:\t"+x.length); 2813 // System.out.println("Min value: \t"+lengths[0]); 2814 // System.out.println("Med value: \t"+lengths[lengths.length/2]); 2815 // System.out.println("Max value: \t"+lengths[lengths.length-1]); 2816 System.err.println("Total: \t"+total); 2817 } 2818 2819 int[] hist=new int[buckets+1]; 2820 2821 long sum=0; 2822 int ptr=0; 2823 for(int i=0; i<buckets; i++){ 2824 long nextLimit=((total*i)+buckets/2)/buckets; 2825 while(ptr<counts.length && sum<nextLimit){ 2826 sum+=counts[ptr]*ptr; 2827 ptr++; 2828 } 2829 2830 hist[i]=Tools.max(0, ptr-1); 2831 } 2832 hist[hist.length-1]=counts.length-1; 2833 2834 // System.out.println(Arrays.toString(hist)); 2835 // assert(false); 2836 return hist; 2837 } 2838 2839 /** 2840 * @param array 2841 * @return Array integer average 2842 */ averageInt(short[] array)2843 public static int averageInt(short[] array) { 2844 return (int)(array==null || array.length==0 ? 0 : sum(array)/array.length); 2845 } 2846 2847 /** 2848 * @param array 2849 * @return Array integer average 2850 */ averageInt(int[] array)2851 public static int averageInt(int[] array) { 2852 return (int)(array==null || array.length==0 ? 0 : sum(array)/array.length); 2853 } 2854 averageDouble(int[] array)2855 public static double averageDouble(int[] array) { 2856 return (array==null || array.length==0 ? 0 : sum(array)/(double)array.length); 2857 } 2858 2859 /** Returns the median of a histogram */ medianHistogram(int[] array)2860 public static int medianHistogram(int[] array){return percentileHistogram(array, .5);} 2861 2862 /** Returns the median of a histogram */ medianHistogram(long[] array)2863 public static int medianHistogram(long[] array){return percentileHistogram(array, .5);} 2864 2865 /** Returns the percentile of a histogram */ percentileHistogram(int[] array, double fraction)2866 public static int percentileHistogram(int[] array, double fraction){ 2867 if(array==null || array.length<1){return 0;} 2868 long target=(long)(sum(array)*fraction); 2869 long sum=0; 2870 for(int i=0; i<array.length; i++){ 2871 sum+=array[i]; 2872 if(sum>=target){ 2873 return i; 2874 } 2875 } 2876 return array.length-1; 2877 } 2878 2879 /** Returns the percentile of a histogram */ percentileHistogram(long[] array, double fraction)2880 public static int percentileHistogram(long[] array, double fraction){ 2881 if(array==null || array.length<1){return 0;} 2882 long target=(long)(sum(array)*fraction); 2883 long sum=0; 2884 for(int i=0; i<array.length; i++){ 2885 sum+=array[i]; 2886 if(sum>=target){ 2887 return i; 2888 } 2889 } 2890 return array.length-1; 2891 } 2892 calcModeHistogram(long array[])2893 public static int calcModeHistogram(long array[]){ 2894 if(array==null || array.length<1){return 0;} 2895 int median=percentileHistogram(array, 0.5); 2896 int mode=0; 2897 long modeCount=array[mode]; 2898 for(int i=1; i<array.length; i++){ 2899 long count=array[i]; 2900 if(count>modeCount || (count==modeCount && absdif(i, median)<absdif(mode, median))){ 2901 mode=i; 2902 modeCount=count; 2903 } 2904 } 2905 return mode; 2906 } 2907 absdif(int a, int b)2908 public static int absdif(int a, int b) { 2909 return a>b ? a-b : b-a; 2910 } 2911 absdif(float a, float b)2912 public static float absdif(float a, float b) { 2913 return a>b ? a-b : b-a; 2914 } 2915 absdif(double a, double b)2916 public static double absdif(double a, double b) { 2917 return a>b ? a-b : b-a; 2918 } 2919 2920 /** Uses unsigned math */ absdifUnsigned(int a, int b)2921 public static final int absdifUnsigned(int a, int b){ 2922 return (a<0 == b<0) ? a>b ? a-b : b-a : Integer.MAX_VALUE; 2923 } 2924 2925 /** True iff (a1,b1) overlaps (a2,b2) */ overlap(int a1, int b1, int a2, int b2)2926 public static final boolean overlap(int a1, int b1, int a2, int b2){ 2927 assert(a1<=b1 && a2<=b2) : a1+", "+b1+", "+a2+", "+b2; 2928 return a2<=b1 && b2>=a1; 2929 } 2930 overlapLength(int a1, int b1, int a2, int b2)2931 public static final int overlapLength(int a1, int b1, int a2, int b2){ 2932 if(!overlap(a1,b1,a2,b2)){return 0;} 2933 if(a1<=a2){ 2934 return b1>=b2 ? b2-a2+1 : b1-a2+1; 2935 }else{ 2936 return b2>=b1 ? b1-a1+1 : b2-a1+1; 2937 } 2938 } 2939 2940 /** Is (a1, b1) within (a2, b2) ? */ isWithin(int a1, int b1, int a2, int b2)2941 public static final boolean isWithin(int a1, int b1, int a2, int b2){ 2942 assert(a1<=b1 && a2<=b2) : a1+", "+b1+", "+a2+", "+b2; 2943 return a1>=a2 && b1<=b2; 2944 } 2945 constrict(int point, int a, int b)2946 public static final int constrict(int point, int a, int b){ 2947 assert(a<=b); 2948 return(point<a ? a : point>b ? b : point); 2949 } 2950 indexOf(byte[] array, char b)2951 public static final int indexOf(byte[] array, char b){ 2952 return indexOf(array, (byte)b, 0); 2953 } 2954 indexOf(byte[] array, byte b)2955 public static final int indexOf(byte[] array, byte b){ 2956 return indexOf(array, b, 0); 2957 } 2958 indexOfNth(byte[] array, byte b, int n)2959 public static final int indexOfNth(byte[] array, byte b, int n){ 2960 return indexOfNth(array, b, n, 0); 2961 } 2962 indexOfNth(byte[] array, char b, int n)2963 public static final int indexOfNth(byte[] array, char b, int n){ 2964 return indexOfNth(array, (byte)b, n, 0); 2965 } 2966 indexOf(final byte[] array, final char b, final int start)2967 public static final int indexOf(final byte[] array, final char b, final int start){return indexOf(array, (byte)b, start);} indexOf(final byte[] array, final byte b, final int start)2968 public static final int indexOf(final byte[] array, final byte b, final int start){ 2969 int i=start; 2970 while(i<array.length && array[i]!=b){i++;} 2971 return (i==array.length ? -1 : i); 2972 } 2973 indexOfNth(final byte[] array, final char b, final int n, final int start)2974 public static final int indexOfNth(final byte[] array, final char b, final int n, final int start){ 2975 return indexOfNth(array, (byte)b, n, start); 2976 } 2977 indexOfNth(final byte[] array, final byte b, final int n, final int start)2978 public static final int indexOfNth(final byte[] array, final byte b, final int n, final int start){ 2979 int i=start, seen=0; 2980 while(i<array.length && seen<n){ 2981 if(array[i]==b){seen++;} 2982 i++; 2983 } 2984 return (i==array.length ? -1 : i-1); 2985 } 2986 indexOf(final byte[] ref, final String query, final int start)2987 public static final int indexOf(final byte[] ref, final String query, final int start){ 2988 int i=start; 2989 final int lim=ref.length-query.length(); 2990 final byte first=(byte)query.charAt(0); 2991 for(; i<=lim; i++){ 2992 if(ref[i]==first && matches(ref, query, i)){return i;} 2993 } 2994 return -1; 2995 } 2996 indexOfDelimited(final byte[] ref, final String query, final int start, final byte delimiter)2997 public static final int indexOfDelimited(final byte[] ref, final String query, final int start, final byte delimiter){ 2998 // assert(false) : query+", "+start+", "+new String(ref); 2999 final int lim=ref.length-query.length(); 3000 if(matches(ref, query, start)){return start;} 3001 for(int i=start+1; i<=lim; i++){ 3002 if(ref[i]==delimiter && matches(ref, query, i+1)){ 3003 // System.err.println("Returning "+(i+1)); 3004 return i+1; 3005 } 3006 } 3007 return -1; 3008 } 3009 matches(byte[] ref, String query, int loc)3010 private static boolean matches(byte[] ref, String query, int loc){ 3011 if(ref.length-query.length()<loc){return false;} 3012 final int max=loc+query.length(); 3013 // System.err.println("Checking "+new String(ref, loc, query.length())); 3014 for(int i=0; loc<max; i++, loc++){ 3015 if(ref[loc]!=query.charAt(i)){return false;} 3016 } 3017 return true; 3018 } 3019 trimToWhitespace(byte[] array)3020 public static final byte[] trimToWhitespace(byte[] array){ 3021 if(array!=null){ 3022 int index=indexOfWhitespace(array); 3023 if(index>=0){return Arrays.copyOf(array, index);} 3024 } 3025 return array; 3026 } 3027 indexOfWhitespace(byte[] array)3028 public static final int indexOfWhitespace(byte[] array){ 3029 int i=0; 3030 while(i<array.length && !Character.isWhitespace(array[i])){i++;} 3031 return (i==array.length ? -1 : i); 3032 } 3033 trimToWhitespace(String array)3034 public static final String trimToWhitespace(String array){ 3035 if(array!=null){ 3036 int index=indexOfWhitespace(array); 3037 if(index>=0){return array.substring(0, index);} 3038 } 3039 return array; 3040 } 3041 indexOfWhitespace(String array)3042 public static final int indexOfWhitespace(String array){ 3043 int i=0; 3044 while(i<array.length() && !Character.isWhitespace(array.charAt(i))){i++;} 3045 return (i==array.length() ? -1 : i); 3046 } 3047 indexOf(char[] array, char b)3048 public static final int indexOf(char[] array, char b){ 3049 int i=0; 3050 while(i<array.length && array[i]!=b){i++;} 3051 return (i==array.length ? -1 : i); 3052 } 3053 lastIndexOf(byte[] array, byte b)3054 public static final int lastIndexOf(byte[] array, byte b){ 3055 int i=array.length-1; 3056 while(i>=0 && array[i]!=b){i--;} 3057 return i; 3058 } 3059 stringLength(long x)3060 public static final int stringLength(long x){ 3061 if(x<0){ 3062 if(x==Integer.MIN_VALUE){return 11;} 3063 return lengthOf(-x)+1; 3064 } 3065 return lengthOf(x); 3066 } 3067 stringLength(int x)3068 public static final int stringLength(int x){ 3069 if(x<0){ 3070 if(x==Long.MIN_VALUE){return 20;} 3071 return lengthOf(-x)+1; 3072 } 3073 return lengthOf(x); 3074 } 3075 lengthOf(int x)3076 public static final int lengthOf(int x){ 3077 assert(x>=0); 3078 int i=1; 3079 while(x>ilens[i]){i++;} 3080 return i; 3081 } 3082 lengthOf(long x)3083 public static final int lengthOf(long x){ 3084 assert(x>=0); 3085 int i=1; 3086 while(x>llens[i]){i++;} 3087 return i; 3088 } 3089 max(byte[] array)3090 public static final int max(byte[] array){return array[maxIndex(array)];} 3091 maxIndex(byte[] array)3092 public static final int maxIndex(byte[] array){ 3093 byte max=array[0]; 3094 int maxIndex=0; 3095 for(int i=1; i<array.length; i++){ 3096 if(array[i]>max){max=array[i];maxIndex=i;} 3097 } 3098 return maxIndex; 3099 } 3100 max(short[] array)3101 public static final int max(short[] array){return array[maxIndex(array)];} 3102 maxIndex(short[] array)3103 public static final int maxIndex(short[] array){ 3104 short max=array[0]; 3105 int maxIndex=0; 3106 for(int i=1; i<array.length; i++){ 3107 if(array[i]>max){max=array[i];maxIndex=i;} 3108 } 3109 return maxIndex; 3110 } 3111 max(int[] array)3112 public static final int max(int[] array){return array[maxIndex(array)];} 3113 maxIndex(int[] array)3114 public static final int maxIndex(int[] array){ 3115 int max=array[0], maxIndex=0; 3116 for(int i=1; i<array.length; i++){ 3117 if(array[i]>max){max=array[i];maxIndex=i;} 3118 } 3119 return maxIndex; 3120 } 3121 max(long[] array)3122 public static final long max(long[] array){return array[maxIndex(array)];} 3123 maxIndex(long[] array)3124 public static final int maxIndex(long[] array){ 3125 long max=array[0]; 3126 int maxIndex=0; 3127 for(int i=1; i<array.length; i++){ 3128 if(array[i]>max){max=array[i];maxIndex=i;} 3129 } 3130 return maxIndex; 3131 } 3132 max(double[] array)3133 public static final double max(double[] array){return array[maxIndex(array)];} 3134 maxIndex(double[] array)3135 public static final int maxIndex(double[] array){ 3136 double max=array[0]; 3137 int maxIndex=0; 3138 for(int i=1; i<array.length; i++){ 3139 if(array[i]>max){max=array[i];maxIndex=i;} 3140 } 3141 return maxIndex; 3142 } 3143 standardDeviation(long[] numbers)3144 public static final double standardDeviation(long[] numbers){ 3145 if(numbers==null || numbers.length<2){return 0;} 3146 long sum=sum(numbers); 3147 double avg=sum/(double)numbers.length; 3148 double sumdev2=0; 3149 for(int i=0; i<numbers.length; i++){ 3150 long x=numbers[i]; 3151 double dev=avg-x; 3152 sumdev2+=(dev*dev); 3153 } 3154 return Math.sqrt(sumdev2/numbers.length); 3155 } 3156 standardDeviation(double[] numbers)3157 public static final double standardDeviation(double[] numbers){ 3158 if(numbers==null || numbers.length<2){return 0;} 3159 double sum=sum(numbers); 3160 double avg=sum/(double)numbers.length; 3161 double sumdev2=0; 3162 for(int i=0; i<numbers.length; i++){ 3163 double x=numbers[i]; 3164 double dev=avg-x; 3165 sumdev2+=(dev*dev); 3166 } 3167 return Math.sqrt(sumdev2/numbers.length); 3168 } 3169 standardDeviation(int[] numbers)3170 public static final double standardDeviation(int[] numbers){ 3171 if(numbers==null || numbers.length<2){return 0;} 3172 long sum=sum(numbers); 3173 double avg=sum/(double)numbers.length; 3174 double sumdev2=0; 3175 for(int i=0; i<numbers.length; i++){ 3176 long x=numbers[i]; 3177 double dev=avg-x; 3178 sumdev2+=(dev*dev); 3179 } 3180 return Math.sqrt(sumdev2/numbers.length); 3181 } 3182 standardDeviation(char[] numbers)3183 public static final double standardDeviation(char[] numbers){ 3184 if(numbers==null || numbers.length<2){return 0;} 3185 long sum=sum(numbers); 3186 double avg=sum/(double)numbers.length; 3187 double sumdev2=0; 3188 for(int i=0; i<numbers.length; i++){ 3189 long x=numbers[i]; 3190 double dev=avg-x; 3191 sumdev2+=(dev*dev); 3192 } 3193 return Math.sqrt(sumdev2/numbers.length); 3194 } 3195 standardDeviation(short[] numbers)3196 public static final double standardDeviation(short[] numbers){ 3197 if(numbers==null || numbers.length<2){return 0;} 3198 long sum=sum(numbers); 3199 double avg=sum/(double)numbers.length; 3200 double sumdev2=0; 3201 for(int i=0; i<numbers.length; i++){ 3202 long x=numbers[i]; 3203 double dev=avg-x; 3204 sumdev2+=(dev*dev); 3205 } 3206 return Math.sqrt(sumdev2/numbers.length); 3207 } 3208 averageHistogram(long[] histogram)3209 public static final double averageHistogram(long[] histogram){ 3210 long sum=max(1, sum(histogram)); 3211 long sum2=0; 3212 for(int i=0; i<histogram.length; i++){ 3213 sum2+=(histogram[i]*i); 3214 } 3215 double avg=sum2/(double)sum; 3216 return avg; 3217 } 3218 standardDeviationHistogram(char[] histogram)3219 public static final double standardDeviationHistogram(char[] histogram){ 3220 long sum=max(1, sum(histogram)); 3221 long sum2=0; 3222 for(int i=0; i<histogram.length; i++){ 3223 sum2+=(histogram[i]*i); 3224 } 3225 double avg=sum2/(double)sum; 3226 double sumdev2=0; 3227 for(int i=0; i<histogram.length; i++){ 3228 double dev=avg-i; 3229 double dev2=dev*dev; 3230 sumdev2+=(histogram[i]*dev2); 3231 } 3232 return Math.sqrt(sumdev2/sum); 3233 } 3234 standardDeviationHistogram(int[] histogram)3235 public static final double standardDeviationHistogram(int[] histogram){ 3236 long sum=max(1, sum(histogram)); 3237 long sum2=0; 3238 for(int i=0; i<histogram.length; i++){ 3239 sum2+=(histogram[i]*i); 3240 } 3241 double avg=sum2/(double)sum; 3242 double sumdev2=0; 3243 for(int i=0; i<histogram.length; i++){ 3244 double dev=avg-i; 3245 double dev2=dev*dev; 3246 sumdev2+=(histogram[i]*dev2); 3247 } 3248 return Math.sqrt(sumdev2/sum); 3249 } 3250 standardDeviationHistogram(long[] histogram)3251 public static final double standardDeviationHistogram(long[] histogram){ 3252 long sum=max(1, sum(histogram)); 3253 long sum2=0; 3254 for(int i=0; i<histogram.length; i++){ 3255 sum2+=(histogram[i]*i); 3256 } 3257 double avg=sum2/(double)sum; 3258 double sumdev2=0; 3259 for(int i=0; i<histogram.length; i++){ 3260 double dev=avg-i; 3261 double dev2=dev*dev; 3262 sumdev2+=(histogram[i]*dev2); 3263 } 3264 return Math.sqrt(sumdev2/sum); 3265 } 3266 3267 /** Special version that calculates standard deviation based on unique kmers rather than overall events */ standardDeviationHistogramKmer(long[] histogram)3268 public static final double standardDeviationHistogramKmer(long[] histogram){ 3269 final long sum=sum(histogram); 3270 double sumU=0; 3271 for(int i=0; i<histogram.length; i++){ 3272 long x=histogram[i]; 3273 sumU+=(x/(double)max(i, 1)); 3274 } 3275 double avg=sum/max(sumU, 1); 3276 double sumdev2=0; 3277 for(int i=1; i<histogram.length; i++){ 3278 double dev=avg-i; 3279 double dev2=dev*dev; 3280 long x=histogram[i]; 3281 sumdev2+=((x/(double)max(i, 1))*dev2); 3282 } 3283 return Math.sqrt(sumdev2/sumU); 3284 } 3285 standardDeviationHistogram(AtomicLongArray histogram)3286 public static final double standardDeviationHistogram(AtomicLongArray histogram){ 3287 long sum=max(1, sum(histogram)); 3288 long sum2=0; 3289 for(int i=0; i<histogram.length(); i++){ 3290 sum2+=(histogram.get(i)*i); 3291 } 3292 double avg=sum2/(double)sum; 3293 double sumdev2=0; 3294 for(int i=0; i<histogram.length(); i++){ 3295 double dev=avg-i; 3296 double dev2=dev*dev; 3297 sumdev2+=(histogram.get(i)*dev2); 3298 } 3299 return Math.sqrt(sumdev2/sum); 3300 } 3301 3302 /** Special version that calculates standard deviation based on unique kmers rather than overall events */ standardDeviationHistogramKmer(AtomicLongArray histogram)3303 public static final double standardDeviationHistogramKmer(AtomicLongArray histogram){ 3304 final long sum=sum(histogram); 3305 double sumU=0; 3306 for(int i=0; i<histogram.length(); i++){ 3307 long x=histogram.get(i); 3308 sumU+=(x/(double)max(i, 1)); 3309 } 3310 double avg=sum/max(sumU, 1); 3311 double sumdev2=0; 3312 for(int i=1; i<histogram.length(); i++){ 3313 double dev=avg-i; 3314 double dev2=dev*dev; 3315 long x=histogram.get(i); 3316 sumdev2+=((x/(double)max(i, 1))*dev2); 3317 } 3318 return Math.sqrt(sumdev2/sumU); 3319 } 3320 downsample(long[] array, int bins)3321 public static final long[] downsample(long[] array, int bins){ 3322 if(array==null || array.length==bins){return array;} 3323 assert(bins<=array.length); 3324 assert(bins>=0); 3325 long[] r=new long[bins]; 3326 if(bins==0){return r;} 3327 double mult=bins/(double)array.length; 3328 for(int i=0; i<array.length; i++){ 3329 int j=(int)(mult*i); 3330 r[j]+=array[i]; 3331 // if(array[i]>0){System.err.println(i+"->"+j+": "+array[i]);} 3332 } 3333 return r; 3334 } 3335 3336 pause(int millis)3337 public static final void pause(int millis){ 3338 try { 3339 Thread.sleep(millis); 3340 } catch (InterruptedException e) { 3341 // TODO Auto-generated catch block 3342 e.printStackTrace(); 3343 } 3344 } 3345 min(int x, int y)3346 public static final int min(int x, int y){return x<y ? x : y;} max(int x, int y)3347 public static final int max(int x, int y){return x>y ? x : y;} min(int x, int y, int z)3348 public static final int min(int x, int y, int z){return x<y ? (x<z ? x : z) : (y<z ? y : z);} max(int x, int y, int z)3349 public static final int max(int x, int y, int z){return x>y ? (x>z ? x : z) : (y>z ? y : z);} min(int x, int y, int z, int z2)3350 public static final int min(int x, int y, int z, int z2){return min(min(x,y), min(z,z2));} max(int x, int y, int z, int z2)3351 public static final int max(int x, int y, int z, int z2){return max(max(x,y), max(z,z2));} 3352 3353 //Median of 3 mid(int x, int y, int z)3354 public static final int mid(int x, int y, int z){return x<y ? (x<z ? min(y, z) : x) : (y<z ? min(x, z) : y);} 3355 min(char x, char y)3356 public static final char min(char x, char y){return x<y ? x : y;} max(char x, char y)3357 public static final char max(char x, char y){return x>y ? x : y;} 3358 min(byte x, byte y)3359 public static final byte min(byte x, byte y){return x<y ? x : y;} max(byte x, byte y)3360 public static final byte max(byte x, byte y){return x>y ? x : y;} min(byte x, byte y, byte z)3361 public static final byte min(byte x, byte y, byte z){return x<y ? min(x, z) : min(y, z);} max(byte x, byte y, byte z)3362 public static final byte max(byte x, byte y, byte z){return x>y ? max(x, z) : max(y, z);} min(byte x, byte y, byte z, byte a)3363 public static final byte min(byte x, byte y, byte z, byte a){return min(min(x, y), min(z, a));} max(byte x, byte y, byte z, byte a)3364 public static final byte max(byte x, byte y, byte z, byte a){return max(max(x, y), max(z, a));} 3365 mid(byte x, byte y, byte z)3366 public static final byte mid(byte x, byte y, byte z){return x<y ? (x<z ? min(y, z) : x) : (y<z ? min(x, z) : y);} 3367 min(long x, long y)3368 public static final long min(long x, long y){return x<y ? x : y;} max(long x, long y)3369 public static final long max(long x, long y){return x>y ? x : y;} min(long x, long y, long z)3370 public static final long min(long x, long y, long z){return x<y ? (x<z ? x : z) : (y<z ? y : z);} max(long x, long y, long z)3371 public static final long max(long x, long y, long z){return x>y ? (x>z ? x : z) : (y>z ? y : z);} min(long x, long y, long z, long z2)3372 public static final long min(long x, long y, long z, long z2){return min(min(x,y), min(z,z2));} max(long x, long y, long z, long z2)3373 public static final long max(long x, long y, long z, long z2){return max(max(x,y), max(z,z2));} mid(long x, long y, long z)3374 public static final long mid(long x, long y, long z){return x<y ? (x<z ? min(y, z) : x) : (y<z ? min(x, z) : y);} longToInt(long x)3375 public static final int longToInt(long x){return x<Integer.MIN_VALUE ? Integer.MIN_VALUE : x>Integer.MAX_VALUE ? Integer.MAX_VALUE : (int)x;} 3376 min(double x, double y)3377 public static final double min(double x, double y){return x<y ? x : y;} max(double x, double y)3378 public static final double max(double x, double y){return x>y ? x : y;} min(double x, double y, double z)3379 public static final double min(double x, double y, double z){return x<y ? (x<z ? x : z) : (y<z ? y : z);} max(double x, double y, double z)3380 public static final double max(double x, double y, double z){return x>y ? (x>z ? x : z) : (y>z ? y : z);} mid(double x, double y, double z)3381 public static final double mid(double x, double y, double z){return x<y ? (x<z ? min(y, z) : x) : (y<z ? min(x, z) : y);} 3382 min(float x, float y)3383 public static final float min(float x, float y){return x<y ? x : y;} max(float x, float y)3384 public static final float max(float x, float y){return x>y ? x : y;} min(float x, float y, float z)3385 public static final float min(float x, float y, float z){return x<y ? (x<z ? x : z) : (y<z ? y : z);} max(float x, float y, float z)3386 public static final float max(float x, float y, float z){return x>y ? (x>z ? x : z) : (y>z ? y : z);} min(float x, float y, float z, float z2)3387 public static final float min(float x, float y, float z, float z2){return min(min(x, y), min(z, z2));} max(float x, float y, float z, float z2)3388 public static final float max(float x, float y, float z, float z2){return max(max(x, y), max(z, z2));} mid(float x, float y, float z)3389 public static final float mid(float x, float y, float z){return x<y ? (x<z ? min(y, z) : x) : (y<z ? min(x, z) : y);} 3390 min(int[] array, int fromIndex, int toIndex)3391 public static final int min(int[] array, int fromIndex, int toIndex){ 3392 int min=array[fromIndex]; 3393 for(int i=fromIndex+1; i<=toIndex; i++){ 3394 min=min(min, array[i]); 3395 } 3396 return min; 3397 } 3398 max(int[] array, int fromIndex, int toIndex)3399 public static final int max(int[] array, int fromIndex, int toIndex){ 3400 int max=array[fromIndex]; 3401 for(int i=fromIndex+1; i<=toIndex; i++){ 3402 max=max(max, array[i]); 3403 } 3404 return max; 3405 } 3406 minIndex(int[] array)3407 public static int minIndex(int[] array) { 3408 if(array==null || array.length<1){return -1;} 3409 float min=array[0]; 3410 int index=0; 3411 for(int i=1; i<array.length; i++){ 3412 if(array[i]<min){ 3413 min=array[i]; 3414 index=i; 3415 } 3416 } 3417 return index; 3418 } 3419 trimWhitespace(String s)3420 public static String trimWhitespace(String s){ 3421 for(int i=0; i<s.length(); i++){ 3422 if(Character.isWhitespace(s.charAt(i))){ 3423 String s2=s.substring(0, i); 3424 return s2; 3425 } 3426 } 3427 return s; 3428 } 3429 exponential(Random randy, double lamda)3430 public static double exponential(Random randy, double lamda){ 3431 // for(int i=0; i<20; i++){ 3432 // double p=randy.nextDouble(); 3433 // double r=-Math.log(1-p)/lamda; 3434 // System.err.println(p+", "+lamda+"->"+"\n"+r); 3435 // } 3436 // assert(false); 3437 double p=randy.nextDouble(); 3438 return -Math.log(1-p)/lamda; 3439 } 3440 log2(double d)3441 public static double log2(double d){ 3442 return Math.log(d)*invlog2; 3443 } 3444 logRoot2(double d)3445 public static double logRoot2(double d){ 3446 return Math.log(d)*invlogRoot2; 3447 } 3448 log1point2(double d)3449 public static double log1point2(double d){ 3450 return Math.log(d)*invlog1point2; 3451 } 3452 3453 private static final double log2=Math.log(2); 3454 private static final double invlog2=1/log2; 3455 private static final double logRoot2=Math.log(Math.sqrt(2)); 3456 private static final double invlogRoot2=1/logRoot2; 3457 private static final double log1point2=Math.log(1.2); 3458 private static final double invlog1point2=1/log1point2; 3459 3460 public static final boolean[] digitMap; 3461 public static final boolean[] signOrDigitMap; 3462 public static final boolean[] numericMap; 3463 public static final boolean[] letterMap; 3464 3465 /** ASCII equivalents for extended-ASCII characters */ 3466 public static final char[] specialChars; 3467 3468 public static final int[] ilens; 3469 public static final long[] llens; 3470 3471 /* Precompiled regular expressions */ 3472 3473 /** A single whitespace */ 3474 public static final Pattern whitespace = Pattern.compile("\\s"); 3475 /** Multiple whitespace */ 3476 public static final Pattern whitespacePlus = Pattern.compile("\\s+"); 3477 /** Comma */ 3478 public static final Pattern commaPattern = Pattern.compile(","); 3479 /** Tab */ 3480 public static final Pattern tabPattern = Pattern.compile("\t"); 3481 /** Colon */ 3482 public static final Pattern colonPattern = Pattern.compile(":"); 3483 /** Semicolon */ 3484 public static final Pattern semiPattern = Pattern.compile(";"); 3485 /** Pipe */ 3486 public static final Pattern pipePattern = Pattern.compile("\\|"); 3487 /** Space */ 3488 public static final Pattern spacePattern = Pattern.compile(" "); 3489 /** Equals */ 3490 public static final Pattern equalsPattern = Pattern.compile("="); 3491 /** Equals */ 3492 public static final Pattern underscorePattern = Pattern.compile("_"); 3493 3494 public static boolean FORCE_JAVA_PARSE_DOUBLE=false; 3495 3496 static{ 3497 digitMap=new boolean[128]; 3498 signOrDigitMap=new boolean[128]; 3499 numericMap=new boolean[128]; 3500 letterMap=new boolean[128]; 3501 for(int i='a'; i<='z'; i++){letterMap[i]=true;} 3502 for(int i='A'; i<='Z'; i++){letterMap[i]=true;} 3503 for(int i='0'; i<='9'; i++){digitMap[i]=numericMap[i]=signOrDigitMap[i]=true;} 3504 numericMap['-']=signOrDigitMap['-']=true; 3505 numericMap['.']=true; 3506 3507 ilens=new int[Integer.toString(Integer.MAX_VALUE).length()+1]; 3508 llens=new long[Long.toString(Long.MAX_VALUE).length()+1]; 3509 for(int i=1, x=9; i<ilens.length; i++){ 3510 ilens[i]=x; 3511 x=(x*10)+9; 3512 } 3513 ilens[ilens.length-1]=Integer.MAX_VALUE; 3514 for(long i=1, x=9; i<llens.length; i++){ 3515 llens[(int)i]=x; 3516 x=(x*10)+9; 3517 } 3518 llens[llens.length-1]=Long.MAX_VALUE; 3519 3520 specialChars=new char[256]; 3521 Arrays.fill(specialChars, 'X'); 3522 for(int i=0; i<32; i++){ 3523 specialChars[i]=' '; 3524 } 3525 for(int i=32; i<127; i++){ 3526 specialChars[i]=(char)i; 3527 } 3528 specialChars[127]=' '; 3529 specialChars[128]='C'; 3530 specialChars[129]='u'; 3531 specialChars[130]='e'; 3532 specialChars[131]='a'; 3533 specialChars[132]='a'; 3534 specialChars[133]='a'; 3535 specialChars[134]='a'; 3536 specialChars[135]='c'; 3537 specialChars[136]='e'; 3538 specialChars[137]='e'; 3539 specialChars[138]='e'; 3540 specialChars[139]='i'; 3541 specialChars[140]='i'; 3542 specialChars[141]='i'; 3543 specialChars[142]='S'; 3544 specialChars[143]='S'; 3545 specialChars[144]='E'; 3546 specialChars[145]='a'; 3547 specialChars[146]='a'; 3548 specialChars[147]='o'; 3549 specialChars[148]='o'; 3550 specialChars[149]='o'; 3551 specialChars[150]='u'; 3552 specialChars[151]='u'; 3553 specialChars[152]='y'; 3554 specialChars[153]='O'; 3555 specialChars[154]='U'; 3556 specialChars[155]='c'; 3557 specialChars[156]='L'; 3558 specialChars[157]='Y'; 3559 specialChars[158]='P'; 3560 specialChars[159]='f'; 3561 specialChars[160]='a'; 3562 specialChars[161]='i'; 3563 specialChars[162]='o'; 3564 specialChars[163]='u'; 3565 specialChars[164]='n'; 3566 specialChars[165]='N'; 3567 specialChars[166]='a'; 3568 specialChars[167]='o'; 3569 specialChars[168]='?'; 3570 specialChars[224]='a'; 3571 specialChars[224]='B'; 3572 specialChars[230]='u'; 3573 specialChars[252]='n'; 3574 specialChars[253]='2'; 3575 } 3576 3577 /** 3578 * Find a String in an array. 3579 * @param a String to find. 3580 * @param array Array of Strings. 3581 * @return Index of element, or -1 if not found. 3582 */ find(String a, String[] array)3583 public static final int find(String a, String[] array){ 3584 for(int i=0; i<array.length; i++){ 3585 if(a.equals(array[i])){return i;} 3586 } 3587 return -1; 3588 } 3589 3590 /** 3591 * Find a String in an array. 3592 * @param a String to find. 3593 * @param array Array of Strings. 3594 * @return Index of element, or last index if not found. 3595 */ find2(String a, String[] array)3596 public static final int find2(String a, String[] array){ 3597 for(int i=0; i<array.length; i++){ 3598 if(a.equals(array[i])){return i;} 3599 } 3600 return array.length-1; //No assertion 3601 } 3602 getLast(ArrayList<X> list)3603 public static final <X> X getLast(ArrayList<X> list) { 3604 return (list.size()>0 ? list.get(list.size()-1) : null); 3605 } 3606 3607 } 3608