1 /*
2  * Copyright (c) 2005, The Regents of the University of California, through
3  * Lawrence Berkeley National Laboratory (subject to receipt of any required
4  * approvals from the U.S. Dept. of Energy). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * (1) Redistributions of source code must retain the above copyright notice,
10  * this list of conditions and the following disclaimer.
11  *
12  * (2) Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * (3) Neither the name of the University of California, Lawrence Berkeley
17  * National Laboratory, U.S. Dept. of Energy nor the names of its contributors
18  * may be used to endorse or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * You are under no obligation whatsoever to provide any bug fixes, patches, or
34  * upgrades to the features, functionality or performance of the source code
35  * ("Enhancements") to anyone; however, if you choose to make your Enhancements
36  * available either publicly, or directly to Lawrence Berkeley National
37  * Laboratory, without imposing a separate written license agreement for such
38  * Enhancements, then you hereby grant the following license: a non-exclusive,
39  * royalty-free perpetual license to install, use, modify, prepare derivative
40  * works, incorporate into other computer software, distribute, and sublicense
41  * such enhancements or derivative works thereof, in binary and source code
42  * form.
43  */
44 package nux.xom.binary;
45 
46 import java.io.IOException;
47 import java.io.InputStream;
48 import java.io.OutputStream;
49 import java.util.zip.DataFormatException;
50 import java.util.zip.Deflater;
51 import java.util.zip.Inflater;
52 
53 
54 /**
55  * Efficient resizable auto-expanding list holding <code>byte</code> elements;
56  * implemented with arrays; more practical and efficient than NIO ByteBuffer.
57  *
58  * @author whoschek.AT.lbl.DOT.gov
59  * @author $Author: hoschek $
60  * @version $Revision: 1.27 $, $Date: 2006/06/18 21:25:02 $
61  */
62 final class ArrayByteList {
63 
64 	/**
65 	 * The array into which the elements of the list are stored. The
66 	 * capacity of the list is the length of this array.
67 	 */
68 	private transient byte[] elements;
69 
70 	/**
71 	 * The current number of elements contained in this list.
72 	 */
73 	private int size;
74 
75 	/**
76 	 * The current position for relative (cursor based) getXXX methods.
77 	 */
78 	private transient int position = 0;
79 
80 	private static final boolean DEBUG = false;
81 
82 
83 	/**
84 	 * Constructs an empty list.
85 	 */
ArrayByteList()86 	public ArrayByteList() {
87 		this(64);
88 	}
89 
90 	/**
91 	 * Constructs an empty list with the specified initial capacity.
92 	 *
93 	 * @param initialCapacity
94 	 *            the number of elements the receiver can hold without
95 	 *            auto-expanding itself by allocating new internal memory.
96 	 */
ArrayByteList(int initialCapacity)97 	public ArrayByteList(int initialCapacity) {
98 		elements = new byte[initialCapacity];
99 		size = 0;
100 	}
101 
102 	/**
103 	 * Constructs a list SHARING the specified elements. The initial size and
104 	 * capacity of the list is the length of the backing array.
105 	 * <p>
106 	 * <b>WARNING: </b> For efficiency reasons and to keep memory usage low,
107 	 * <b>the array is SHARED, not copied </b>. So if subsequently you modify
108 	 * the specified array directly via the [] operator, be sure you know what
109 	 * you're doing.
110 	 * <p>
111 	 * If you rather need copying behaviour, use
112 	 * <code>copy = new ArrayByteList(byte[] elems).copy()</code> or similar.
113 	 * <p>
114 	 * If you need a list containing a copy of <code>elems[from..to)</code>, use
115 	 * <code>list = new ArrayByteList(to-from).add(elems, from, to-from)</code>
116 	 * or <code>list = new ArrayByteList(ByteBuffer.wrap(elems, from, to-from))</code>
117 	 * or similar.
118 	 *
119 	 * @param elems
120 	 *            the array backing the constructed list
121 	 */
ArrayByteList(byte[] elems)122 	public ArrayByteList(byte[] elems) {
123 		elements = elems;
124 		size = elems.length;
125 	}
126 
127 	/**
128 	 * Appends the specified element to the end of this list.
129 	 *
130 	 * @param elem
131 	 *            element to be appended to this list.
132 	 */
add(byte elem)133 	public void add(byte elem) {
134 		if (size == elements.length) ensureCapacity(size + 1);
135 		elements[size++] = elem;
136 	}
137 
138 	/**
139 	 * Appends the elements in the range <code>[offset..offset+length)</code>
140 	 * to the end of this list.
141 	 *
142 	 * @param elems
143 	 *            the elements to be appended
144 	 * @param offset
145 	 *            the offset of the first element to add (inclusive)
146 	 * @param length
147 	 *            the number of elements to add
148 	 * @throws IndexOutOfBoundsException
149 	 *             if indexes are out of range.
150 	 */
add(byte[] elems, int offset, int length)151 	public void add(byte[] elems, int offset, int length) {
152 		if (offset < 0 || length < 0 || offset + length > elems.length)
153 			throw new IndexOutOfBoundsException("offset: " + offset +
154 				", length: " + length + ", elems.length: " + elems.length);
155 
156 		ensureCapacity(size + length);
157 		System.arraycopy(elems, offset, this.elements, size, length);
158 		size += length;
159 	}
160 
161 	/**
162 	 * Returns the elements currently stored, including invalid elements between
163 	 * size and capacity, if any.
164 	 * <p>
165 	 * <b>WARNING: </b> For efficiency reasons and to keep memory usage low,
166 	 * <b>the array is SHARED, not copied </b>. So if subsequently you modify the
167 	 * returned array directly via the [] operator, be sure you know what you're
168 	 * doing.
169 	 *
170 	 * @return the elements currently stored.
171 	 */
asArray()172 	public byte[] asArray() {
173 		return elements;
174 	}
175 
176     /**
177 	 * Removes all elements but keeps the current capacity; Afterwards
178 	 * <code>size()</code> will yield zero.
179 	 */
clear()180 	public void clear() {
181 		size = 0;
182 		position = 0;
183 	}
184 
185 	/**
186 	 * Ensures that the receiver can hold at least the specified number of
187 	 * elements without needing to allocate new internal memory. If necessary,
188 	 * allocates new internal memory and increases the capacity of the receiver.
189 	 *
190 	 * @param minCapacity
191 	 *            the desired minimum capacity.
192 	 */
ensureCapacity(int minCapacity)193 	public void ensureCapacity(int minCapacity) {
194 		if (minCapacity > elements.length) {
195 			int newCapacity = Math.max(minCapacity, 2 * elements.length + 1);
196 			elements = subArray(0, size, newCapacity);
197 		}
198 	}
199 
200 	/**
201 	 * Ensures that there are at least <code>need</code> bytes remaining to be
202 	 * read. If there are currenty less bytes remaining, this method reads from
203 	 * the given input stream until there exactly holds
204 	 * <code>remaining() == need</code>.
205 	 *
206 	 * @param input
207 	 *            the stream to read from
208 	 * @param need
209 	 *            the number of bytes to make available
210 	 * @return false if a premature end of stream was encountered, and not
211 	 *         enough bytes could be read as a result. Returns true otherwise.
212 	 * @throws IOException
213 	 *             if the underlying output stream encounters an I/O error
214 	 */
ensureRemaining(InputStream input, int need)215 	public boolean ensureRemaining(InputStream input, int need) throws IOException {
216 		int remaining = remaining();
217 		need -= remaining;
218 		if (need <= 0) return true;
219 
220 		int free = elements.length - size;
221 		if (free < need) { // not enough room available
222 			if (free + position >= need) { // compaction yields enough room
223 				System.arraycopy(elements, position, elements, 0, remaining);
224 			} else { // expand and compact to make room
225 				int newCapacity = Math.max(2*elements.length, remaining + need);
226 				byte[] tmp = new byte[newCapacity];
227 				System.arraycopy(elements, position, tmp, 0, remaining);
228 				elements = tmp;
229 			}
230 			size = remaining;
231 			position = 0;
232 		}
233 
234 		// read nomore bytes than necessary (message framing protocol)
235 		return read(input, need) <= 0;
236 	}
237 
238 	/**
239 	 * Reads <code>length</code> bytes from the given input stream, and appends
240 	 * them.
241 	 * <p>
242 	 * Assertion: There is enough room available, i.e. there holds
243 	 * <code>elements.length - size >= length</code>.
244 	 *
245 	 * @return the number of bytes missing in case of premature end-of-stream
246 	 */
read(InputStream input, int length)247 	private int read(InputStream input, int length) throws IOException {
248 		int n;
249 		while (length > 0 && (n = input.read(elements, size, length)) >= 0) {
250 			size += n;
251 			length -= n;
252 		}
253 		return length;
254 	}
255 
256 	/**
257 	 * Writes all contained elements onto the given output stream.
258 	 *
259 	 * @param out
260 	 *            the output stream to write to
261 	 * @throws IOException
262 	 *             if an I/O error occurs. In particular, an
263 	 *             <code>IOException</code> is thrown if the output stream is
264 	 *             closed.
265 	 */
write(OutputStream out)266 	public void write(OutputStream out) throws IOException {
267 		if (size > 0) { // minimize I/O calls
268 			out.write(elements, 0, size);
269 		}
270 	}
271 
272 	/**
273 	 * Returns the element at the specified index.
274 	 *
275 	 * @param index
276 	 *            index of element to return.
277 	 * @throws IndexOutOfBoundsException if index is out of range.
278 	 */
get(int index)279 	public byte get(int index) {
280 		if (DEBUG && index >= size) throwIndex(index);
281 		return elements[index];
282 	}
283 
284 	/**
285 	 * Replaces the element at the specified index with the specified
286 	 * element.
287 	 *
288 	 * @param index
289 	 *            index of element to replace.
290 	 * @param element
291 	 *            element to be stored at the specified index.
292 	 * @throws IndexOutOfBoundsException
293 	 *             if index is out of range.
294 	 */
set(int index, byte element)295 	public void set(int index, byte element) {
296 		if (DEBUG && index >= size) throwIndex(index);
297 		elements[index] = element;
298 	}
299 
300 	/**
301 	 * Returns the number of contained elements.
302 	 *
303 	 * @return  the number of elements contained in the receiver.
304 	 */
size()305 	public int size() {
306 		return size;
307 	}
308 
309 	/** Small helper method eliminating redundancy. */
subArray(int from, int length, int capacity)310 	private byte[] subArray(int from, int length, int capacity) {
311 		byte[] subArray = new byte[capacity];
312 		System.arraycopy(elements, from, subArray, 0, length);
313 		return subArray;
314 	}
315 
316 	/**
317 	 * Returns a copied array of bytes containing all elements; the returned
318 	 * array has length = this.size().
319 	 */
toArray()320 	public byte[] toArray() {
321 		return subArray(0, size, size);
322 	}
323 
324 	/**
325 	 * Returns a string representation, containing the numeric String
326 	 * representation of each element.
327 	 */
toString()328 	public String toString() {
329 		StringBuffer buf = new StringBuffer(4*size);
330 		buf.append("[");
331 		for (int i = 0; i < size; i++) {
332 			buf.append(elements[i]);
333 			if (i < size-1) buf.append(", ");
334 		}
335 		buf.append("]");
336 		return buf.toString();
337 	}
338 
339 	/**
340 	 * Checks if the given index is in range.
341 	 */
throwIndex(int index)342 	private void throwIndex(int index) {
343 		throw new IndexOutOfBoundsException("index: " + index
344 					+ ", size: " + size);
345 	}
346 
347 	/** Returns the current position offset for relative gets. */
position()348 	public int position() {
349 		return position;
350 	}
351 
352 	/** Sets the current position offset for relative gets. */
position(int position)353 	public void position(int position) {
354 		if (DEBUG && position > size) throwIndex(position);
355 		this.position = position;
356 	}
357 
358 	/** Returns the number of elements that are left to be read. */
remaining()359 	public int remaining() {
360 		return size - position;
361 	}
362 
363 	/** Reads and returns the byte value at the current position offset. */
get()364 	public byte get() {
365 		if (DEBUG && position >= size) throwIndex(position);
366 		return elements[position++];
367 	}
368 
369 	/** Reads and returns the 4 byte big endian value at the current position offset. */
getInt()370 	public int getInt() {
371 		if (DEBUG && position + 4 > size) throwIndex(position);
372 		byte b3 = elements[position+0];
373 		byte b2 = elements[position+1];
374 		byte b1 = elements[position+2];
375 		byte b0 = elements[position+3];
376 		position += 4;
377 		return ((((b3 & 0xff) << 24) | ((b2 & 0xff) << 16) |
378 				((b1 & 0xff) << 8) | ((b0 & 0xff) << 0)));
379 	}
380 
381 	/** Reads and returns the 2 byte big endian value at the current position offset. */
getShort()382 	public short getShort() {
383 		if (DEBUG && position + 2 > size) throwIndex(position);
384 		byte b1 = elements[position+0];
385 		byte b0 = elements[position+1];
386 		position += 2;
387 		return (short) ((b1 << 8) | (b0 & 0xff));
388 	}
389 
390 	/** Writes the given value at the given index in 4 byte big endian.  */
setInt(int index, int v)391 	public void setInt(int index, int v) {
392 		if (DEBUG && index + 4 > size) throwIndex(index);
393 		elements[index+0] = (byte) (v >> 24);
394 		elements[index+1] = (byte) (v >> 16);
395 		elements[index+2] = (byte) (v >> 8);
396 		elements[index+3] = (byte) (v >> 0);
397 	}
398 
399 	/** Appends the given value in 4 byte big endian. */
addInt(int v)400 	public void addInt(int v) {
401 		if (size + 4 > elements.length) ensureCapacity(size + 4);
402 		elements[size+0] = (byte) (v >> 24);
403 		elements[size+1] = (byte) (v >> 16);
404 		elements[size+2] = (byte) (v >> 8);
405 		elements[size+3] = (byte) (v >> 0);
406 		size += 4;
407 	}
408 
409 	/** Appends the given value in 2 byte big endian. */
addShort(short v)410 	public void addShort(short v) {
411 		if (size + 2 > elements.length) ensureCapacity(size + 2);
412 		elements[size+0] = (byte) (v >> 8);
413 		elements[size+1] = (byte) (v >> 0);
414 		size += 2;
415 	}
416 
417 	/**
418 	 * Removes the elements in the range <code>[from..to)</code>. Shifts any
419 	 * subsequent elements to the left.  Keeps the current capacity.
420 	 * Note: To remove a single element use <code>remove(index, index+1)</code>.
421 	 *
422 	 * @param from
423 	 *            the index of the first element to removed (inclusive).
424 	 * @param to
425 	 *            the index of the last element to removed (exclusive).
426 	 * @throws IndexOutOfBoundsException
427 	 *             if indexes are out of range
428 	 */
remove(int from, int to)429 	public void remove(int from, int to) {
430 		shrinkOrExpand(from, to, 0);
431 	}
432 
433 	/**
434 	 * The powerful work horse for all add/insert/replace/remove methods.
435 	 * One powerful efficient method does it all :-)
436 	 */
shrinkOrExpand(int from, int to, int replacementSize)437 	private void shrinkOrExpand(int from, int to, int replacementSize) {
438 		checkRange(from, to);
439 		int diff = replacementSize - (to - from);
440 		if (diff != 0) {
441 			ensureCapacity(size + diff);
442 			if (size - to > 0) { // check is for performance only (arraycopy is native method)
443 				// diff > 0 shifts right, diff < 0 shifts left
444 				System.arraycopy(elements, to, elements, to + diff, size - to);
445 			}
446 			size += diff;
447 		}
448 	}
449 
450 	/**
451 	 * Checks if the given range is within the contained array's bounds.
452 	 */
checkRange(int from, int to)453 	private void checkRange(int from, int to) {
454 		if (from < 0 || from > to || to > size)
455 			throw new IndexOutOfBoundsException("from: " + from + ", to: "
456 						+ to + ", size: " + size);
457 	}
458 
459 	/** Exchanges the internal state of the two lists. */
swap(ArrayByteList dst)460 	public void swap(ArrayByteList dst) {
461 		byte[] e = elements; elements = dst.elements; dst.elements = e;
462 		int s = size; size = dst.size; dst.size = s;
463 		int p = position; position = dst.position; dst.position = p;
464 	}
465 
466 	/**
467 	 * Compresses the elements in the range
468 	 * <code>src[src.position()..src.size())</code>, appending the compressed
469 	 * data to the receiver. <code>src</code> is not modified in any way in
470 	 * the process.
471 	 */
add(Deflater compressor, ArrayByteList src)472 	public void add(Deflater compressor, ArrayByteList src) {
473 		compressor.reset();
474 		compressor.setInput(src.asArray(), src.position(), src.remaining());
475 		int minBufSize = 256;
476 		ensureCapacity(size + Math.max(minBufSize, src.remaining() / 3));
477 
478 		do {
479 			ensureCapacity(size + minBufSize);
480 			size += compressor.deflate(elements, size, elements.length - size);
481 		} while (!compressor.needsInput());
482 
483 		if (!compressor.finished()) {
484 		    compressor.finish();
485 		    while (!compressor.finished()) {
486 		    		ensureCapacity(size + minBufSize);
487 				size += compressor.deflate(elements, size, elements.length - size);
488 		    }
489 		}
490 
491 		compressor.reset();
492 	}
493 
494 	/**
495 	 * Decompresses the elements in the range
496 	 * <code>src[src.position()..src.size())</code>, appending the decompressed
497 	 * data to the receiver. <code>src</code> is not modified in any way in
498 	 * the process.
499 	 * <p>
500 	 * If ZLIB decompression finishes before reaching the end of the range, the
501 	 * remaining trailing bytes are added to the receiver as well.
502 	 *
503 	 * @param decompressor
504 	 *            the ZLIB decompressor to use
505 	 * @param src
506 	 *            the input data to decompress
507 	 * @return the number of trailing bytes that were found not to be part of
508 	 *         the ZLIB data.
509 	 *
510 	 * @throws DataFormatException
511 	 *             if the compressed data format is invalid
512 	 */
add(Inflater decompressor, ArrayByteList src)513 	public int add(Inflater decompressor, ArrayByteList src) throws DataFormatException {
514 		decompressor.reset();
515 		decompressor.setInput(src.asArray(), src.position(), src.remaining());
516 
517 		int minBufSize = 256;
518 		ensureCapacity(size + Math.max(minBufSize, src.remaining() * 3));
519 
520 		do {
521 			ensureCapacity(size + minBufSize);
522 			size += decompressor.inflate(elements, size, elements.length - size);
523 		} while (!(decompressor.finished() || decompressor.needsDictionary()));
524 
525 		int remaining = decompressor.getRemaining();
526 		if (remaining > 0) { // copy trailer that wasn't compressed to begin with
527 			ensureCapacity(size + remaining);
528 			int off = src.size() - remaining;
529 			System.arraycopy(src.asArray(), off, elements, size, remaining);
530 			size += remaining;
531 		}
532 		decompressor.reset();
533 		return remaining;
534 	}
535 
addUTF16String(String prefix, String localName)536 	private void addUTF16String(String prefix, String localName) {
537 //		length prefixed string:
538 //		bit 0..6 [0..127]
539 //		bit 7: is4ByteInt. if so, then bit 0..6 are ignored and next 4 bytes are signed int big endian length
540 		int len = localName.length();
541 		if (prefix.length() != 0) len += prefix.length() + 1;
542 
543 		if (len <= 127) {
544 			add((byte)len);
545 		} else {
546 			add((byte)-1);
547 			addInt(len);
548 		}
549 
550 		if (prefix.length() != 0) {
551 			addUTF16String(prefix);
552 			addUTF16String(":");
553 //			add((byte)0);
554 //			add((byte)':'); // ASCII 58
555 //			elements[size++] = (byte)':';
556 		}
557 		addUTF16String(localName);
558 	}
559 
addUTF16String(String str)560 	private void addUTF16String(String str) {
561 		int len = str.length();
562 		for (int i=0; i < len; i++) {
563 			char c = str.charAt(i);
564 			add((byte)(c >> 8));
565 			add((byte)(c & 0xFF));
566 		}
567 	}
568 
getUTF16String(ArrayCharList buf)569 	private String getUTF16String(ArrayCharList buf) {
570 		buf.clear();
571 		int len = get();
572 		if (len < 0) len = getInt();
573 
574 		for (int i=0; i < len; i++) {
575 			int b1 = get() & 0xFF;
576 			int b2 = get() & 0xFF;
577 			char c = (char)((b1 << 8) | b2);
578 			buf.add(c);
579 		}
580 		return buf.toString();
581 	}
582 
getUTF16Strings(int count)583 	public String[] getUTF16Strings(int count) {
584 		final String[] dst = new String[count];
585 		final ArrayCharList buffer = new ArrayCharList(32);
586 		final byte[] elems = elements;
587 		int off = position;
588 
589 		for (int k = 0; k < count; k++) {
590 			dst[k] = getUTF16String(buffer);
591 		}
592 
593 		return dst;
594 	}
595 
596 	/** Appends the UTF-8 encoding of the given string, followed by a zero byte terminator. */
addUTF8String(String prefix, String localName)597 	public void addUTF8String(String prefix, String localName) {
598 		// assert: array capacity is large enough, so there's no need to expand
599 		if (prefix.length() != 0) {
600 			addUTF8String(prefix);
601 //			add((byte)':'); // ASCII 58
602 			elements[size++] = (byte)':';
603 		}
604 		addUTF8String(localName);
605 
606 		/*
607 		 * We exploit the facts that 1) the UTF-8 spec guarantees that a zero
608 		 * byte is never introduced as a side effect of UTF-8 encoding (unless a
609 		 * zero character 0x00 is part of the input), and that 2) in XML the
610 		 * zero character 0x00 is illegal, and hence can never occur (e.g.
611 		 * nu.xom.Verifier.java). It follows, that a zero byte can never occur
612 		 * anywhere in an UTF-8 encoded XML document. Thus, for compactness and
613 		 * simplicity we use zero terminated strings instead of length prefixed
614 		 * strings, without any ambiguity. See
615 		 * http://www.cl.cam.ac.uk/~mgk25/unicode.html and
616 		 * http://marc.theaimsgroup.com/?t=112253855300002&r=1&w=2&n=13
617 		 */
618 //		add((byte) 0);
619 		elements[size++] = (byte) 0;
620 	}
621 
622 	/** Appends the UTF-8 encoding of the given string. */
addUTF8String(String str)623 	private void addUTF8String(String str) {
624 		int len = str.length();
625 //		int max = 4*len;
626 //		if (size + max > elements.length) ensureCapacity(size + max);
627 		byte[] elems = elements;
628 		int s = size;
629 		for (int i = 0; i < len; i++) {
630 			char c = str.charAt(i);
631 			if (c < 0x80) { // Have at most seven bits (7 bit ASCII)
632 //				add((byte) c);
633 				elems[s++] = (byte) c;
634 				continue;
635 			}
636 			else if (!UnicodeUtil.isSurrogate(c)) {
637 				if (c < 0x800) { // 2 bytes, 11 bits
638 //					add((byte) (0xc0 | ((c >> 06))));
639 //					add((byte) (0x80 | ((c >> 00) & 0x3f)));
640 					elems[s++] = (byte) (0xc0 | ((c >> 06)));
641 					elems[s++] = (byte) (0x80 | ((c >> 00) & 0x3f));
642 					continue;
643 				}
644 				if (c <= '\uFFFF') { // 3 bytes, 16 bits
645 //					add((byte) (0xe0 | ((c >> 12))));
646 //					add((byte) (0x80 | ((c >> 06) & 0x3f)));
647 //					add((byte) (0x80 | ((c >> 00) & 0x3f)));
648 					elems[s++] = (byte) (0xe0 | ((c >> 12)));
649 					elems[s++] = (byte) (0x80 | ((c >> 06) & 0x3f));
650 					elems[s++] = (byte) (0x80 | ((c >> 00) & 0x3f));
651 					continue;
652 				}
653 			}
654 
655 			size = s;
656 			i = addUTFSurrogate(c, str, i);
657 			s = size;
658 		}
659 
660 		size = s;
661 	}
662 
addUTFSurrogate(char c, String str, int i)663 	private int addUTFSurrogate(char c, String str, int i) {
664 		int uc = 0;
665 		if (UnicodeUtil.isHighSurrogate(c)) {
666 			if (str.length() - i < 2) charCodingException("underflow", str);
667 			char d = str.charAt(++i);
668 			if (!UnicodeUtil.isLowSurrogate(d)) charCodingException("malformedForLength(1)", str);
669 			uc = UnicodeUtil.toUCS4(c, d);
670 		}
671 		else {
672 			if (UnicodeUtil.isLowSurrogate(c)) charCodingException("malformedForLength(1)", str);
673 			uc = c;
674 		}
675 
676 		if (uc < 0x200000) {
677 			add((byte) (0xf0 | ((uc >> 18))));
678 			add((byte) (0x80 | ((uc >> 12) & 0x3f)));
679 			add((byte) (0x80 | ((uc >> 06) & 0x3f)));
680 			add((byte) (0x80 | ((uc >> 00) & 0x3f)));
681 		}
682 		return i;
683 	}
684 
charCodingException(String msg, String str)685 	private static void charCodingException(String msg, String str) {
686 		throw new RuntimeException("CharacterCodingException: " + msg
687 				+ " for string '" + str + "'");
688 	}
689 
charCodingException(String msg, ArrayCharList str)690 	private static void charCodingException(String msg, ArrayCharList str) {
691 		throw new RuntimeException("CharacterCodingException: " + msg
692 				+ " for string '" + str + "'");
693 	}
694 
695 	/**
696 	 * Reads and returns the next "count" (zero terminated) 7 bit ASCII encoded
697 	 * strings, starting at the current position offset. Note that 7 bit ASCII
698 	 * is a proper subset of UTF-8.
699 	 */
getASCIIStrings(int count)700 	public String[] getASCIIStrings(int count) {
701 		final String[] dst = new String[count];
702 		final byte[] elems = elements;
703 		int off = position;
704 		for (int k = 0; k < count; k++) {
705 			int start = off;
706 			while (elems[off] != 0) { // until bnux string terminator
707 				off++;
708 			}
709 			dst[k] = new String(elems, 0, start, off-start); // intentional and safe!
710 			off++; // read past zero byte string terminator
711 		}
712 
713 		position = off;
714 		if (DEBUG && position > size) throwIndex(position);
715 		return dst;
716 	}
717 
718 	/**
719 	 * Reads and returns the next "count" (zero terminated) UTF-8 encoded
720 	 * strings, starting at the current position offset.
721 	 */
getUTF8Strings(int count)722 	public String[] getUTF8Strings(int count) {
723 		final String[] dst = new String[count];
724 		final ArrayCharList buffer = new ArrayCharList(32);
725 		final byte[] elems = elements;
726 		int off = position;
727 
728 		for (int k = 0; k < count; k++) {
729 			int start = off;
730 			int b;
731 			while ((b = elems[off]) > 0) { // scan to see if string is pure 7 bit ASCII
732 				off++;
733 			}
734 			if (b == 0) { // fast path: pure 7 bit ASCII (safe)
735 				dst[k] = new String(elems, 0, start, off-start); // intentional and safe!
736 			} else { // slow path: arbitrary UTF-8
737 				dst[k] = getUTF8String(start, off, buffer);
738 				off = position;
739 			}
740 			off++; // read past zero byte string terminator
741 		}
742 
743 		position = off;
744 		if (DEBUG && position > size) throwIndex(position);
745 		return dst;
746 	}
747 
getUTF8String(int i, int asciiEnd, ArrayCharList buffer)748 	private String getUTF8String(int i, int asciiEnd, ArrayCharList buffer) {
749 		buffer.clear();
750 		final byte[] src = elements;
751 
752 		// Decode remaining UTF-8 bytes and add corresponding chars.
753 		// We use doubly nested loop so we can use buf[s++] = ... instead of buf.add(char)
754 		while (true) {
755 			int s = buffer.size();
756 			final int max = i + buffer.capacity() - s - 6; // better safe than sorry
757 			final char[] dst = buffer.asArray();
758 
759 			int b1;
760 			while (i < max && ((b1 = src[i]) != 0)) { // until bnux string terminator
761 				int b2, b3;
762 				switch ((b1 >> 4) & 0x0f) {
763 
764 				case 0:
765 				case 1:
766 				case 2:
767 				case 3:
768 				case 4:
769 				case 5:
770 				case 6:
771 				case 7: { // 1 byte, 7 bits: 0xxxxxxx
772 //					buffer.add((char) b1);
773 //					i++;
774 					while (i < max && (b1 = src[i]) > 0) { // fast path: pure 7 bit ASCII
775 //						buffer.add((char)b1);
776 						dst[s++] = (char) b1;
777 						i++;
778 					}
779 					continue;
780 				}
781 				case 12:
782 				case 13: { // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
783 //					if (size - i < 2) charCodingException("underflow", buffer);
784 					b2 = src[i+1];
785 					if (isLast(b2)) charCodingException("malformedForLength(1)", buffer);
786 					dst[s++] = (char) (((b1 & 0x1f) << 6) | ((b2 & 0x3f) << 0));
787 //					buffer.add((char) (((b1 & 0x1f) << 6) | ((b2 & 0x3f) << 0)));
788 					i += 2;
789 					continue;
790 				}
791 				case 14: { // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
792 //					if (size - i < 3) charCodingException("underflow", buffer);
793 					b2 = src[i+1];
794 					if (isLast(b2)) charCodingException("malformedForLength(1)", buffer);
795 					b3 = src[i+2];
796 					if (isLast(b3)) charCodingException("malformedForLength(2)", buffer);
797 					dst[s++] = (char) (((b1 & 0x0f) << 12)
798 							| ((b2 & 0x3f) << 06) | ((b3 & 0x3f) << 0));
799 //					buffer.add((char) (((b1 & 0x0f) << 12)
800 //							| ((b2 & 0x3f) << 06) | ((b3 & 0x3f) << 0)));
801 					i += 3;
802 					continue;
803 				}
804 				case 15: { // 4, 5, or 6 bytes
805 					buffer.setSize(s);
806 					i = getUTF8String456(i, buffer);
807 					s = buffer.size();
808 					continue;
809 				}
810 				default:
811 					charCodingException("malformedForLength(1)", buffer);
812 				}
813 
814 			} // end while
815 
816 			buffer.setSize(s);
817 			if (i < max) break; // we're done
818 			buffer.ensureCapacity(buffer.capacity() << 1);
819 		} // end while(true)
820 
821 		position = i;
822 		return buffer.toString();
823 	}
824 
getUTF8String456(int i, ArrayCharList buffer)825 	private int getUTF8String456(int i, ArrayCharList buffer) {
826 		final byte[] elems = elements;
827 		int b1 = elems[i];
828 		int b2, b3;
829 		int b4;
830 		int b5;
831 		int b6;
832 		int uc = 0;
833 		int n = 0;
834 
835 		switch (b1 & 0x0f) {
836 
837 		case 0:
838 		case 1:
839 		case 2:
840 		case 3:
841 		case 4:
842 		case 5:
843 		case 6:
844 		case 7: { // 4 bytes, 21 bits
845 //			if (size - i < 4) charCodingException("underflow", buffer);
846 			b2 = elems[i+1];
847 			if (isLast(b2)) charCodingException("malformedForLength(1)", buffer);
848 			b3 = elems[i+2];
849 			if (isLast(b3)) charCodingException("malformedForLength(2)", buffer);
850 			b4 = elems[i+3];
851 			if (isLast(b4)) charCodingException("malformedForLength(3)", buffer);
852 			uc = (((b1 & 0x07) << 18) | ((b2 & 0x3f) << 12)
853 					| ((b3 & 0x3f) << 06) | ((b4 & 0x3f) << 00));
854 			n = 4;
855 			break;
856 		}
857 		case 8:
858 		case 9:
859 		case 10:
860 		case 11: { // 5 bytes, 26 bits
861 //			if (size - i < 5) charCodingException("underflow", buffer);
862 			b2 = elems[i+1];
863 			if (isLast(b2)) charCodingException("malformedForLength(1)", buffer);
864 			b3 = elems[i+2];
865 			if (isLast(b3)) charCodingException("malformedForLength(2)", buffer);
866 			b4 = elems[i+3];
867 			if (isLast(b4)) charCodingException("malformedForLength(3)", buffer);
868 			b5 = elems[i+4];
869 			if (isLast(b5)) charCodingException("malformedForLength(4)", buffer);
870 			uc = (((b1 & 0x03) << 24) | ((b2 & 0x3f) << 18)
871 				| ((b3 & 0x3f) << 12) | ((b4 & 0x3f) << 06)
872 				| ((b5 & 0x3f) << 00));
873 			n = 5;
874 			break;
875 		}
876 		case 12:
877 		case 13: { // 6 bytes, 31 bits
878 //			if (size - i < 6) charCodingException("underflow", buffer);
879 			b2 = elems[i+1];
880 			if (isLast(b2)) charCodingException("malformedForLength(1)", buffer);
881 			b3 = elems[i+2];
882 			if (isLast(b3)) charCodingException("malformedForLength(2)", buffer);
883 			b4 = elems[i+3];
884 			if (isLast(b4)) charCodingException("malformedForLength(3)", buffer);
885 			b5 = elems[i+4];
886 			if (isLast(b5)) charCodingException("malformedForLength(4)", buffer);
887 			b6 = elems[i+5];
888 			if (isLast(b6)) charCodingException("malformedForLength(5)", buffer);
889 			uc = (((b1 & 0x01) << 30) | ((b2 & 0x3f) << 24)
890 					| ((b3 & 0x3f) << 18) | ((b4 & 0x3f) << 12)
891 					| ((b5 & 0x3f) << 06) | ((b6 & 0x3f)));
892 			n = 6;
893 			break;
894 		}
895 		default:
896 			charCodingException("malformedForLength(1)", buffer);
897 		}
898 
899 		addSurrogate(uc, n, buffer);
900 		i += n;
901 		return i;
902 //		continue;
903 	}
904 
isLast(int v)905 	private static boolean isLast(int v) {
906 	    return DEBUG && (0x80 != (v & 0xc0));
907 	}
908 
addSurrogate(int uc, int len, ArrayCharList dst)909 	private static void addSurrogate(int uc, int len, ArrayCharList dst) {
910 		if (uc <= 0xffff) {
911 			if (UnicodeUtil.isSurrogate(uc)) charCodingException("malformedForLength(len)", dst);
912 			dst.add((char) uc);
913 			return;
914 		}
915 		if (uc < UnicodeUtil.UCS4_MIN) charCodingException("malformedForLength(len)", dst);
916 		if (uc <= UnicodeUtil.UCS4_MAX) {
917 			dst.add(UnicodeUtil.highSurrogate(uc));
918 			dst.add(UnicodeUtil.lowSurrogate(uc));
919 			return;
920 		}
921 		charCodingException("unmappableForLength(len)", dst);
922 	}
923 
924 }