1 /* 2 * Copyright (c) 2005, The Regents of the University of California, through 3 * Lawrence Berkeley National Laboratory (subject to receipt of any required 4 * approvals from the U.S. Dept. of Energy). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * (1) Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * (2) Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * (3) Neither the name of the University of California, Lawrence Berkeley 17 * National Laboratory, U.S. Dept. of Energy nor the names of its contributors 18 * may be used to endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * You are under no obligation whatsoever to provide any bug fixes, patches, or 34 * upgrades to the features, functionality or performance of the source code 35 * ("Enhancements") to anyone; however, if you choose to make your Enhancements 36 * available either publicly, or directly to Lawrence Berkeley National 37 * Laboratory, without imposing a separate written license agreement for such 38 * Enhancements, then you hereby grant the following license: a non-exclusive, 39 * royalty-free perpetual license to install, use, modify, prepare derivative 40 * works, incorporate into other computer software, distribute, and sublicense 41 * such enhancements or derivative works thereof, in binary and source code 42 * form. 43 */ 44 package nux.xom.binary; 45 46 import java.io.IOException; 47 import java.io.InputStream; 48 import java.io.OutputStream; 49 import java.util.zip.DataFormatException; 50 import java.util.zip.Deflater; 51 import java.util.zip.Inflater; 52 53 54 /** 55 * Efficient resizable auto-expanding list holding <code>byte</code> elements; 56 * implemented with arrays; more practical and efficient than NIO ByteBuffer. 57 * 58 * @author whoschek.AT.lbl.DOT.gov 59 * @author $Author: hoschek $ 60 * @version $Revision: 1.27 $, $Date: 2006/06/18 21:25:02 $ 61 */ 62 final class ArrayByteList { 63 64 /** 65 * The array into which the elements of the list are stored. The 66 * capacity of the list is the length of this array. 67 */ 68 private transient byte[] elements; 69 70 /** 71 * The current number of elements contained in this list. 72 */ 73 private int size; 74 75 /** 76 * The current position for relative (cursor based) getXXX methods. 77 */ 78 private transient int position = 0; 79 80 private static final boolean DEBUG = false; 81 82 83 /** 84 * Constructs an empty list. 85 */ ArrayByteList()86 public ArrayByteList() { 87 this(64); 88 } 89 90 /** 91 * Constructs an empty list with the specified initial capacity. 92 * 93 * @param initialCapacity 94 * the number of elements the receiver can hold without 95 * auto-expanding itself by allocating new internal memory. 96 */ ArrayByteList(int initialCapacity)97 public ArrayByteList(int initialCapacity) { 98 elements = new byte[initialCapacity]; 99 size = 0; 100 } 101 102 /** 103 * Constructs a list SHARING the specified elements. The initial size and 104 * capacity of the list is the length of the backing array. 105 * <p> 106 * <b>WARNING: </b> For efficiency reasons and to keep memory usage low, 107 * <b>the array is SHARED, not copied </b>. So if subsequently you modify 108 * the specified array directly via the [] operator, be sure you know what 109 * you're doing. 110 * <p> 111 * If you rather need copying behaviour, use 112 * <code>copy = new ArrayByteList(byte[] elems).copy()</code> or similar. 113 * <p> 114 * If you need a list containing a copy of <code>elems[from..to)</code>, use 115 * <code>list = new ArrayByteList(to-from).add(elems, from, to-from)</code> 116 * or <code>list = new ArrayByteList(ByteBuffer.wrap(elems, from, to-from))</code> 117 * or similar. 118 * 119 * @param elems 120 * the array backing the constructed list 121 */ ArrayByteList(byte[] elems)122 public ArrayByteList(byte[] elems) { 123 elements = elems; 124 size = elems.length; 125 } 126 127 /** 128 * Appends the specified element to the end of this list. 129 * 130 * @param elem 131 * element to be appended to this list. 132 */ add(byte elem)133 public void add(byte elem) { 134 if (size == elements.length) ensureCapacity(size + 1); 135 elements[size++] = elem; 136 } 137 138 /** 139 * Appends the elements in the range <code>[offset..offset+length)</code> 140 * to the end of this list. 141 * 142 * @param elems 143 * the elements to be appended 144 * @param offset 145 * the offset of the first element to add (inclusive) 146 * @param length 147 * the number of elements to add 148 * @throws IndexOutOfBoundsException 149 * if indexes are out of range. 150 */ add(byte[] elems, int offset, int length)151 public void add(byte[] elems, int offset, int length) { 152 if (offset < 0 || length < 0 || offset + length > elems.length) 153 throw new IndexOutOfBoundsException("offset: " + offset + 154 ", length: " + length + ", elems.length: " + elems.length); 155 156 ensureCapacity(size + length); 157 System.arraycopy(elems, offset, this.elements, size, length); 158 size += length; 159 } 160 161 /** 162 * Returns the elements currently stored, including invalid elements between 163 * size and capacity, if any. 164 * <p> 165 * <b>WARNING: </b> For efficiency reasons and to keep memory usage low, 166 * <b>the array is SHARED, not copied </b>. So if subsequently you modify the 167 * returned array directly via the [] operator, be sure you know what you're 168 * doing. 169 * 170 * @return the elements currently stored. 171 */ asArray()172 public byte[] asArray() { 173 return elements; 174 } 175 176 /** 177 * Removes all elements but keeps the current capacity; Afterwards 178 * <code>size()</code> will yield zero. 179 */ clear()180 public void clear() { 181 size = 0; 182 position = 0; 183 } 184 185 /** 186 * Ensures that the receiver can hold at least the specified number of 187 * elements without needing to allocate new internal memory. If necessary, 188 * allocates new internal memory and increases the capacity of the receiver. 189 * 190 * @param minCapacity 191 * the desired minimum capacity. 192 */ ensureCapacity(int minCapacity)193 public void ensureCapacity(int minCapacity) { 194 if (minCapacity > elements.length) { 195 int newCapacity = Math.max(minCapacity, 2 * elements.length + 1); 196 elements = subArray(0, size, newCapacity); 197 } 198 } 199 200 /** 201 * Ensures that there are at least <code>need</code> bytes remaining to be 202 * read. If there are currenty less bytes remaining, this method reads from 203 * the given input stream until there exactly holds 204 * <code>remaining() == need</code>. 205 * 206 * @param input 207 * the stream to read from 208 * @param need 209 * the number of bytes to make available 210 * @return false if a premature end of stream was encountered, and not 211 * enough bytes could be read as a result. Returns true otherwise. 212 * @throws IOException 213 * if the underlying output stream encounters an I/O error 214 */ ensureRemaining(InputStream input, int need)215 public boolean ensureRemaining(InputStream input, int need) throws IOException { 216 int remaining = remaining(); 217 need -= remaining; 218 if (need <= 0) return true; 219 220 int free = elements.length - size; 221 if (free < need) { // not enough room available 222 if (free + position >= need) { // compaction yields enough room 223 System.arraycopy(elements, position, elements, 0, remaining); 224 } else { // expand and compact to make room 225 int newCapacity = Math.max(2*elements.length, remaining + need); 226 byte[] tmp = new byte[newCapacity]; 227 System.arraycopy(elements, position, tmp, 0, remaining); 228 elements = tmp; 229 } 230 size = remaining; 231 position = 0; 232 } 233 234 // read nomore bytes than necessary (message framing protocol) 235 return read(input, need) <= 0; 236 } 237 238 /** 239 * Reads <code>length</code> bytes from the given input stream, and appends 240 * them. 241 * <p> 242 * Assertion: There is enough room available, i.e. there holds 243 * <code>elements.length - size >= length</code>. 244 * 245 * @return the number of bytes missing in case of premature end-of-stream 246 */ read(InputStream input, int length)247 private int read(InputStream input, int length) throws IOException { 248 int n; 249 while (length > 0 && (n = input.read(elements, size, length)) >= 0) { 250 size += n; 251 length -= n; 252 } 253 return length; 254 } 255 256 /** 257 * Writes all contained elements onto the given output stream. 258 * 259 * @param out 260 * the output stream to write to 261 * @throws IOException 262 * if an I/O error occurs. In particular, an 263 * <code>IOException</code> is thrown if the output stream is 264 * closed. 265 */ write(OutputStream out)266 public void write(OutputStream out) throws IOException { 267 if (size > 0) { // minimize I/O calls 268 out.write(elements, 0, size); 269 } 270 } 271 272 /** 273 * Returns the element at the specified index. 274 * 275 * @param index 276 * index of element to return. 277 * @throws IndexOutOfBoundsException if index is out of range. 278 */ get(int index)279 public byte get(int index) { 280 if (DEBUG && index >= size) throwIndex(index); 281 return elements[index]; 282 } 283 284 /** 285 * Replaces the element at the specified index with the specified 286 * element. 287 * 288 * @param index 289 * index of element to replace. 290 * @param element 291 * element to be stored at the specified index. 292 * @throws IndexOutOfBoundsException 293 * if index is out of range. 294 */ set(int index, byte element)295 public void set(int index, byte element) { 296 if (DEBUG && index >= size) throwIndex(index); 297 elements[index] = element; 298 } 299 300 /** 301 * Returns the number of contained elements. 302 * 303 * @return the number of elements contained in the receiver. 304 */ size()305 public int size() { 306 return size; 307 } 308 309 /** Small helper method eliminating redundancy. */ subArray(int from, int length, int capacity)310 private byte[] subArray(int from, int length, int capacity) { 311 byte[] subArray = new byte[capacity]; 312 System.arraycopy(elements, from, subArray, 0, length); 313 return subArray; 314 } 315 316 /** 317 * Returns a copied array of bytes containing all elements; the returned 318 * array has length = this.size(). 319 */ toArray()320 public byte[] toArray() { 321 return subArray(0, size, size); 322 } 323 324 /** 325 * Returns a string representation, containing the numeric String 326 * representation of each element. 327 */ toString()328 public String toString() { 329 StringBuffer buf = new StringBuffer(4*size); 330 buf.append("["); 331 for (int i = 0; i < size; i++) { 332 buf.append(elements[i]); 333 if (i < size-1) buf.append(", "); 334 } 335 buf.append("]"); 336 return buf.toString(); 337 } 338 339 /** 340 * Checks if the given index is in range. 341 */ throwIndex(int index)342 private void throwIndex(int index) { 343 throw new IndexOutOfBoundsException("index: " + index 344 + ", size: " + size); 345 } 346 347 /** Returns the current position offset for relative gets. */ position()348 public int position() { 349 return position; 350 } 351 352 /** Sets the current position offset for relative gets. */ position(int position)353 public void position(int position) { 354 if (DEBUG && position > size) throwIndex(position); 355 this.position = position; 356 } 357 358 /** Returns the number of elements that are left to be read. */ remaining()359 public int remaining() { 360 return size - position; 361 } 362 363 /** Reads and returns the byte value at the current position offset. */ get()364 public byte get() { 365 if (DEBUG && position >= size) throwIndex(position); 366 return elements[position++]; 367 } 368 369 /** Reads and returns the 4 byte big endian value at the current position offset. */ getInt()370 public int getInt() { 371 if (DEBUG && position + 4 > size) throwIndex(position); 372 byte b3 = elements[position+0]; 373 byte b2 = elements[position+1]; 374 byte b1 = elements[position+2]; 375 byte b0 = elements[position+3]; 376 position += 4; 377 return ((((b3 & 0xff) << 24) | ((b2 & 0xff) << 16) | 378 ((b1 & 0xff) << 8) | ((b0 & 0xff) << 0))); 379 } 380 381 /** Reads and returns the 2 byte big endian value at the current position offset. */ getShort()382 public short getShort() { 383 if (DEBUG && position + 2 > size) throwIndex(position); 384 byte b1 = elements[position+0]; 385 byte b0 = elements[position+1]; 386 position += 2; 387 return (short) ((b1 << 8) | (b0 & 0xff)); 388 } 389 390 /** Writes the given value at the given index in 4 byte big endian. */ setInt(int index, int v)391 public void setInt(int index, int v) { 392 if (DEBUG && index + 4 > size) throwIndex(index); 393 elements[index+0] = (byte) (v >> 24); 394 elements[index+1] = (byte) (v >> 16); 395 elements[index+2] = (byte) (v >> 8); 396 elements[index+3] = (byte) (v >> 0); 397 } 398 399 /** Appends the given value in 4 byte big endian. */ addInt(int v)400 public void addInt(int v) { 401 if (size + 4 > elements.length) ensureCapacity(size + 4); 402 elements[size+0] = (byte) (v >> 24); 403 elements[size+1] = (byte) (v >> 16); 404 elements[size+2] = (byte) (v >> 8); 405 elements[size+3] = (byte) (v >> 0); 406 size += 4; 407 } 408 409 /** Appends the given value in 2 byte big endian. */ addShort(short v)410 public void addShort(short v) { 411 if (size + 2 > elements.length) ensureCapacity(size + 2); 412 elements[size+0] = (byte) (v >> 8); 413 elements[size+1] = (byte) (v >> 0); 414 size += 2; 415 } 416 417 /** 418 * Removes the elements in the range <code>[from..to)</code>. Shifts any 419 * subsequent elements to the left. Keeps the current capacity. 420 * Note: To remove a single element use <code>remove(index, index+1)</code>. 421 * 422 * @param from 423 * the index of the first element to removed (inclusive). 424 * @param to 425 * the index of the last element to removed (exclusive). 426 * @throws IndexOutOfBoundsException 427 * if indexes are out of range 428 */ remove(int from, int to)429 public void remove(int from, int to) { 430 shrinkOrExpand(from, to, 0); 431 } 432 433 /** 434 * The powerful work horse for all add/insert/replace/remove methods. 435 * One powerful efficient method does it all :-) 436 */ shrinkOrExpand(int from, int to, int replacementSize)437 private void shrinkOrExpand(int from, int to, int replacementSize) { 438 checkRange(from, to); 439 int diff = replacementSize - (to - from); 440 if (diff != 0) { 441 ensureCapacity(size + diff); 442 if (size - to > 0) { // check is for performance only (arraycopy is native method) 443 // diff > 0 shifts right, diff < 0 shifts left 444 System.arraycopy(elements, to, elements, to + diff, size - to); 445 } 446 size += diff; 447 } 448 } 449 450 /** 451 * Checks if the given range is within the contained array's bounds. 452 */ checkRange(int from, int to)453 private void checkRange(int from, int to) { 454 if (from < 0 || from > to || to > size) 455 throw new IndexOutOfBoundsException("from: " + from + ", to: " 456 + to + ", size: " + size); 457 } 458 459 /** Exchanges the internal state of the two lists. */ swap(ArrayByteList dst)460 public void swap(ArrayByteList dst) { 461 byte[] e = elements; elements = dst.elements; dst.elements = e; 462 int s = size; size = dst.size; dst.size = s; 463 int p = position; position = dst.position; dst.position = p; 464 } 465 466 /** 467 * Compresses the elements in the range 468 * <code>src[src.position()..src.size())</code>, appending the compressed 469 * data to the receiver. <code>src</code> is not modified in any way in 470 * the process. 471 */ add(Deflater compressor, ArrayByteList src)472 public void add(Deflater compressor, ArrayByteList src) { 473 compressor.reset(); 474 compressor.setInput(src.asArray(), src.position(), src.remaining()); 475 int minBufSize = 256; 476 ensureCapacity(size + Math.max(minBufSize, src.remaining() / 3)); 477 478 do { 479 ensureCapacity(size + minBufSize); 480 size += compressor.deflate(elements, size, elements.length - size); 481 } while (!compressor.needsInput()); 482 483 if (!compressor.finished()) { 484 compressor.finish(); 485 while (!compressor.finished()) { 486 ensureCapacity(size + minBufSize); 487 size += compressor.deflate(elements, size, elements.length - size); 488 } 489 } 490 491 compressor.reset(); 492 } 493 494 /** 495 * Decompresses the elements in the range 496 * <code>src[src.position()..src.size())</code>, appending the decompressed 497 * data to the receiver. <code>src</code> is not modified in any way in 498 * the process. 499 * <p> 500 * If ZLIB decompression finishes before reaching the end of the range, the 501 * remaining trailing bytes are added to the receiver as well. 502 * 503 * @param decompressor 504 * the ZLIB decompressor to use 505 * @param src 506 * the input data to decompress 507 * @return the number of trailing bytes that were found not to be part of 508 * the ZLIB data. 509 * 510 * @throws DataFormatException 511 * if the compressed data format is invalid 512 */ add(Inflater decompressor, ArrayByteList src)513 public int add(Inflater decompressor, ArrayByteList src) throws DataFormatException { 514 decompressor.reset(); 515 decompressor.setInput(src.asArray(), src.position(), src.remaining()); 516 517 int minBufSize = 256; 518 ensureCapacity(size + Math.max(minBufSize, src.remaining() * 3)); 519 520 do { 521 ensureCapacity(size + minBufSize); 522 size += decompressor.inflate(elements, size, elements.length - size); 523 } while (!(decompressor.finished() || decompressor.needsDictionary())); 524 525 int remaining = decompressor.getRemaining(); 526 if (remaining > 0) { // copy trailer that wasn't compressed to begin with 527 ensureCapacity(size + remaining); 528 int off = src.size() - remaining; 529 System.arraycopy(src.asArray(), off, elements, size, remaining); 530 size += remaining; 531 } 532 decompressor.reset(); 533 return remaining; 534 } 535 addUTF16String(String prefix, String localName)536 private void addUTF16String(String prefix, String localName) { 537 // length prefixed string: 538 // bit 0..6 [0..127] 539 // bit 7: is4ByteInt. if so, then bit 0..6 are ignored and next 4 bytes are signed int big endian length 540 int len = localName.length(); 541 if (prefix.length() != 0) len += prefix.length() + 1; 542 543 if (len <= 127) { 544 add((byte)len); 545 } else { 546 add((byte)-1); 547 addInt(len); 548 } 549 550 if (prefix.length() != 0) { 551 addUTF16String(prefix); 552 addUTF16String(":"); 553 // add((byte)0); 554 // add((byte)':'); // ASCII 58 555 // elements[size++] = (byte)':'; 556 } 557 addUTF16String(localName); 558 } 559 addUTF16String(String str)560 private void addUTF16String(String str) { 561 int len = str.length(); 562 for (int i=0; i < len; i++) { 563 char c = str.charAt(i); 564 add((byte)(c >> 8)); 565 add((byte)(c & 0xFF)); 566 } 567 } 568 getUTF16String(ArrayCharList buf)569 private String getUTF16String(ArrayCharList buf) { 570 buf.clear(); 571 int len = get(); 572 if (len < 0) len = getInt(); 573 574 for (int i=0; i < len; i++) { 575 int b1 = get() & 0xFF; 576 int b2 = get() & 0xFF; 577 char c = (char)((b1 << 8) | b2); 578 buf.add(c); 579 } 580 return buf.toString(); 581 } 582 getUTF16Strings(int count)583 public String[] getUTF16Strings(int count) { 584 final String[] dst = new String[count]; 585 final ArrayCharList buffer = new ArrayCharList(32); 586 final byte[] elems = elements; 587 int off = position; 588 589 for (int k = 0; k < count; k++) { 590 dst[k] = getUTF16String(buffer); 591 } 592 593 return dst; 594 } 595 596 /** Appends the UTF-8 encoding of the given string, followed by a zero byte terminator. */ addUTF8String(String prefix, String localName)597 public void addUTF8String(String prefix, String localName) { 598 // assert: array capacity is large enough, so there's no need to expand 599 if (prefix.length() != 0) { 600 addUTF8String(prefix); 601 // add((byte)':'); // ASCII 58 602 elements[size++] = (byte)':'; 603 } 604 addUTF8String(localName); 605 606 /* 607 * We exploit the facts that 1) the UTF-8 spec guarantees that a zero 608 * byte is never introduced as a side effect of UTF-8 encoding (unless a 609 * zero character 0x00 is part of the input), and that 2) in XML the 610 * zero character 0x00 is illegal, and hence can never occur (e.g. 611 * nu.xom.Verifier.java). It follows, that a zero byte can never occur 612 * anywhere in an UTF-8 encoded XML document. Thus, for compactness and 613 * simplicity we use zero terminated strings instead of length prefixed 614 * strings, without any ambiguity. See 615 * http://www.cl.cam.ac.uk/~mgk25/unicode.html and 616 * http://marc.theaimsgroup.com/?t=112253855300002&r=1&w=2&n=13 617 */ 618 // add((byte) 0); 619 elements[size++] = (byte) 0; 620 } 621 622 /** Appends the UTF-8 encoding of the given string. */ addUTF8String(String str)623 private void addUTF8String(String str) { 624 int len = str.length(); 625 // int max = 4*len; 626 // if (size + max > elements.length) ensureCapacity(size + max); 627 byte[] elems = elements; 628 int s = size; 629 for (int i = 0; i < len; i++) { 630 char c = str.charAt(i); 631 if (c < 0x80) { // Have at most seven bits (7 bit ASCII) 632 // add((byte) c); 633 elems[s++] = (byte) c; 634 continue; 635 } 636 else if (!UnicodeUtil.isSurrogate(c)) { 637 if (c < 0x800) { // 2 bytes, 11 bits 638 // add((byte) (0xc0 | ((c >> 06)))); 639 // add((byte) (0x80 | ((c >> 00) & 0x3f))); 640 elems[s++] = (byte) (0xc0 | ((c >> 06))); 641 elems[s++] = (byte) (0x80 | ((c >> 00) & 0x3f)); 642 continue; 643 } 644 if (c <= '\uFFFF') { // 3 bytes, 16 bits 645 // add((byte) (0xe0 | ((c >> 12)))); 646 // add((byte) (0x80 | ((c >> 06) & 0x3f))); 647 // add((byte) (0x80 | ((c >> 00) & 0x3f))); 648 elems[s++] = (byte) (0xe0 | ((c >> 12))); 649 elems[s++] = (byte) (0x80 | ((c >> 06) & 0x3f)); 650 elems[s++] = (byte) (0x80 | ((c >> 00) & 0x3f)); 651 continue; 652 } 653 } 654 655 size = s; 656 i = addUTFSurrogate(c, str, i); 657 s = size; 658 } 659 660 size = s; 661 } 662 addUTFSurrogate(char c, String str, int i)663 private int addUTFSurrogate(char c, String str, int i) { 664 int uc = 0; 665 if (UnicodeUtil.isHighSurrogate(c)) { 666 if (str.length() - i < 2) charCodingException("underflow", str); 667 char d = str.charAt(++i); 668 if (!UnicodeUtil.isLowSurrogate(d)) charCodingException("malformedForLength(1)", str); 669 uc = UnicodeUtil.toUCS4(c, d); 670 } 671 else { 672 if (UnicodeUtil.isLowSurrogate(c)) charCodingException("malformedForLength(1)", str); 673 uc = c; 674 } 675 676 if (uc < 0x200000) { 677 add((byte) (0xf0 | ((uc >> 18)))); 678 add((byte) (0x80 | ((uc >> 12) & 0x3f))); 679 add((byte) (0x80 | ((uc >> 06) & 0x3f))); 680 add((byte) (0x80 | ((uc >> 00) & 0x3f))); 681 } 682 return i; 683 } 684 charCodingException(String msg, String str)685 private static void charCodingException(String msg, String str) { 686 throw new RuntimeException("CharacterCodingException: " + msg 687 + " for string '" + str + "'"); 688 } 689 charCodingException(String msg, ArrayCharList str)690 private static void charCodingException(String msg, ArrayCharList str) { 691 throw new RuntimeException("CharacterCodingException: " + msg 692 + " for string '" + str + "'"); 693 } 694 695 /** 696 * Reads and returns the next "count" (zero terminated) 7 bit ASCII encoded 697 * strings, starting at the current position offset. Note that 7 bit ASCII 698 * is a proper subset of UTF-8. 699 */ getASCIIStrings(int count)700 public String[] getASCIIStrings(int count) { 701 final String[] dst = new String[count]; 702 final byte[] elems = elements; 703 int off = position; 704 for (int k = 0; k < count; k++) { 705 int start = off; 706 while (elems[off] != 0) { // until bnux string terminator 707 off++; 708 } 709 dst[k] = new String(elems, 0, start, off-start); // intentional and safe! 710 off++; // read past zero byte string terminator 711 } 712 713 position = off; 714 if (DEBUG && position > size) throwIndex(position); 715 return dst; 716 } 717 718 /** 719 * Reads and returns the next "count" (zero terminated) UTF-8 encoded 720 * strings, starting at the current position offset. 721 */ getUTF8Strings(int count)722 public String[] getUTF8Strings(int count) { 723 final String[] dst = new String[count]; 724 final ArrayCharList buffer = new ArrayCharList(32); 725 final byte[] elems = elements; 726 int off = position; 727 728 for (int k = 0; k < count; k++) { 729 int start = off; 730 int b; 731 while ((b = elems[off]) > 0) { // scan to see if string is pure 7 bit ASCII 732 off++; 733 } 734 if (b == 0) { // fast path: pure 7 bit ASCII (safe) 735 dst[k] = new String(elems, 0, start, off-start); // intentional and safe! 736 } else { // slow path: arbitrary UTF-8 737 dst[k] = getUTF8String(start, off, buffer); 738 off = position; 739 } 740 off++; // read past zero byte string terminator 741 } 742 743 position = off; 744 if (DEBUG && position > size) throwIndex(position); 745 return dst; 746 } 747 getUTF8String(int i, int asciiEnd, ArrayCharList buffer)748 private String getUTF8String(int i, int asciiEnd, ArrayCharList buffer) { 749 buffer.clear(); 750 final byte[] src = elements; 751 752 // Decode remaining UTF-8 bytes and add corresponding chars. 753 // We use doubly nested loop so we can use buf[s++] = ... instead of buf.add(char) 754 while (true) { 755 int s = buffer.size(); 756 final int max = i + buffer.capacity() - s - 6; // better safe than sorry 757 final char[] dst = buffer.asArray(); 758 759 int b1; 760 while (i < max && ((b1 = src[i]) != 0)) { // until bnux string terminator 761 int b2, b3; 762 switch ((b1 >> 4) & 0x0f) { 763 764 case 0: 765 case 1: 766 case 2: 767 case 3: 768 case 4: 769 case 5: 770 case 6: 771 case 7: { // 1 byte, 7 bits: 0xxxxxxx 772 // buffer.add((char) b1); 773 // i++; 774 while (i < max && (b1 = src[i]) > 0) { // fast path: pure 7 bit ASCII 775 // buffer.add((char)b1); 776 dst[s++] = (char) b1; 777 i++; 778 } 779 continue; 780 } 781 case 12: 782 case 13: { // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 783 // if (size - i < 2) charCodingException("underflow", buffer); 784 b2 = src[i+1]; 785 if (isLast(b2)) charCodingException("malformedForLength(1)", buffer); 786 dst[s++] = (char) (((b1 & 0x1f) << 6) | ((b2 & 0x3f) << 0)); 787 // buffer.add((char) (((b1 & 0x1f) << 6) | ((b2 & 0x3f) << 0))); 788 i += 2; 789 continue; 790 } 791 case 14: { // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 792 // if (size - i < 3) charCodingException("underflow", buffer); 793 b2 = src[i+1]; 794 if (isLast(b2)) charCodingException("malformedForLength(1)", buffer); 795 b3 = src[i+2]; 796 if (isLast(b3)) charCodingException("malformedForLength(2)", buffer); 797 dst[s++] = (char) (((b1 & 0x0f) << 12) 798 | ((b2 & 0x3f) << 06) | ((b3 & 0x3f) << 0)); 799 // buffer.add((char) (((b1 & 0x0f) << 12) 800 // | ((b2 & 0x3f) << 06) | ((b3 & 0x3f) << 0))); 801 i += 3; 802 continue; 803 } 804 case 15: { // 4, 5, or 6 bytes 805 buffer.setSize(s); 806 i = getUTF8String456(i, buffer); 807 s = buffer.size(); 808 continue; 809 } 810 default: 811 charCodingException("malformedForLength(1)", buffer); 812 } 813 814 } // end while 815 816 buffer.setSize(s); 817 if (i < max) break; // we're done 818 buffer.ensureCapacity(buffer.capacity() << 1); 819 } // end while(true) 820 821 position = i; 822 return buffer.toString(); 823 } 824 getUTF8String456(int i, ArrayCharList buffer)825 private int getUTF8String456(int i, ArrayCharList buffer) { 826 final byte[] elems = elements; 827 int b1 = elems[i]; 828 int b2, b3; 829 int b4; 830 int b5; 831 int b6; 832 int uc = 0; 833 int n = 0; 834 835 switch (b1 & 0x0f) { 836 837 case 0: 838 case 1: 839 case 2: 840 case 3: 841 case 4: 842 case 5: 843 case 6: 844 case 7: { // 4 bytes, 21 bits 845 // if (size - i < 4) charCodingException("underflow", buffer); 846 b2 = elems[i+1]; 847 if (isLast(b2)) charCodingException("malformedForLength(1)", buffer); 848 b3 = elems[i+2]; 849 if (isLast(b3)) charCodingException("malformedForLength(2)", buffer); 850 b4 = elems[i+3]; 851 if (isLast(b4)) charCodingException("malformedForLength(3)", buffer); 852 uc = (((b1 & 0x07) << 18) | ((b2 & 0x3f) << 12) 853 | ((b3 & 0x3f) << 06) | ((b4 & 0x3f) << 00)); 854 n = 4; 855 break; 856 } 857 case 8: 858 case 9: 859 case 10: 860 case 11: { // 5 bytes, 26 bits 861 // if (size - i < 5) charCodingException("underflow", buffer); 862 b2 = elems[i+1]; 863 if (isLast(b2)) charCodingException("malformedForLength(1)", buffer); 864 b3 = elems[i+2]; 865 if (isLast(b3)) charCodingException("malformedForLength(2)", buffer); 866 b4 = elems[i+3]; 867 if (isLast(b4)) charCodingException("malformedForLength(3)", buffer); 868 b5 = elems[i+4]; 869 if (isLast(b5)) charCodingException("malformedForLength(4)", buffer); 870 uc = (((b1 & 0x03) << 24) | ((b2 & 0x3f) << 18) 871 | ((b3 & 0x3f) << 12) | ((b4 & 0x3f) << 06) 872 | ((b5 & 0x3f) << 00)); 873 n = 5; 874 break; 875 } 876 case 12: 877 case 13: { // 6 bytes, 31 bits 878 // if (size - i < 6) charCodingException("underflow", buffer); 879 b2 = elems[i+1]; 880 if (isLast(b2)) charCodingException("malformedForLength(1)", buffer); 881 b3 = elems[i+2]; 882 if (isLast(b3)) charCodingException("malformedForLength(2)", buffer); 883 b4 = elems[i+3]; 884 if (isLast(b4)) charCodingException("malformedForLength(3)", buffer); 885 b5 = elems[i+4]; 886 if (isLast(b5)) charCodingException("malformedForLength(4)", buffer); 887 b6 = elems[i+5]; 888 if (isLast(b6)) charCodingException("malformedForLength(5)", buffer); 889 uc = (((b1 & 0x01) << 30) | ((b2 & 0x3f) << 24) 890 | ((b3 & 0x3f) << 18) | ((b4 & 0x3f) << 12) 891 | ((b5 & 0x3f) << 06) | ((b6 & 0x3f))); 892 n = 6; 893 break; 894 } 895 default: 896 charCodingException("malformedForLength(1)", buffer); 897 } 898 899 addSurrogate(uc, n, buffer); 900 i += n; 901 return i; 902 // continue; 903 } 904 isLast(int v)905 private static boolean isLast(int v) { 906 return DEBUG && (0x80 != (v & 0xc0)); 907 } 908 addSurrogate(int uc, int len, ArrayCharList dst)909 private static void addSurrogate(int uc, int len, ArrayCharList dst) { 910 if (uc <= 0xffff) { 911 if (UnicodeUtil.isSurrogate(uc)) charCodingException("malformedForLength(len)", dst); 912 dst.add((char) uc); 913 return; 914 } 915 if (uc < UnicodeUtil.UCS4_MIN) charCodingException("malformedForLength(len)", dst); 916 if (uc <= UnicodeUtil.UCS4_MAX) { 917 dst.add(UnicodeUtil.highSurrogate(uc)); 918 dst.add(UnicodeUtil.lowSurrogate(uc)); 919 return; 920 } 921 charCodingException("unmappableForLength(len)", dst); 922 } 923 924 }