xom/binary/BinaryXMLCodec.java

/*
 * Copyright (c) 2005, The Regents of the University of California, through
 * Lawrence Berkeley National Laboratory (subject to receipt of any required
 * approvals from the U.S. Dept. of Energy). All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * (1) Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * (2) Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * (3) Neither the name of the University of California, Lawrence Berkeley
 * National Laboratory, U.S. Dept. of Energy nor the names of its contributors
 * may be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * You are under no obligation whatsoever to provide any bug fixes, patches, or
 * upgrades to the features, functionality or performance of the source code
 * ("Enhancements") to anyone; however, if you choose to make your Enhancements
 * available either publicly, or directly to Lawrence Berkeley National
 * Laboratory, without imposing a separate written license agreement for such
 * Enhancements, then you hereby grant the following license: a non-exclusive,
 * royalty-free perpetual license to install, use, modify, prepare derivative
 * works, incorporate into other computer software, distribute, and sublicense
 * such enhancements or derivative works thereof, in binary and source code
 * form.
 */
package nux.xom.binary;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;

import nu.xom.Attribute;
import nu.xom.Comment;
import nu.xom.DocType;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.IllegalAddException;
import nu.xom.Node;
import nu.xom.NodeFactory;
import nu.xom.Nodes;
import nu.xom.ParentNode;
import nu.xom.ProcessingInstruction;
import nu.xom.Text;
import nu.xom.WellformednessException;
import nu.xom.XMLException;
import nux.xom.io.StreamingSerializer;

/**
 * Serializes (encodes) and deserializes (decodes) XOM XML documents to and from
 * an efficient and compact custom binary XML data format (termed <i>bnux </i>
 * format), without loss or change of any information. Serialization and
 * deserialization is much faster than with the standard textual XML format, and
 * the resulting binary data is more compressed than textual XML.
 *
 * <h4>Applicability</h4>
 *
 * The overall goal of the <i>bnux algorithm</i> is to maximize serialization
 * and deserialization (parsing) performance without requiring any schema
 * description. Serialization and deserialization speed are roughly balanced
 * against each other; neither side is particularly favoured over the other.
 * Another benefitial effect of the algorithm is that a considerable degree of
 * XML data redundancy is eliminated, but compression is more a welcome
 * side-effect than a primary goal in itself. The algorithm is primarily
 * intended for tightly coupled high-performance systems exchanging large
 * volumes of XML data over networks, as well as for compact main memory caches
 * and for <i>short-term </i> storage as BLOBs in backend databases or files
 * (e.g. "session" data with limited duration). In the case of BLOB storage,
 * selecting matching BLOBs can be sped up by maintaining a simple metaindex
 * side table for the most frequent access patterns. See the <a
 * href="#performance">performance results</a> below.
 * <p>
 * While the Java API is considered stable, the bnux data format should be
 * considered a black box: Its internals are under-documented and may change
 * without notice from release to release in backwards-incompatible manners. It
 * is unlikely that support for reading data written with older Nux versions
 * will ever be available. bnux is an exchange format but not an
 * interoperability format. Having said that, the data format is machine
 * architecture/platform independent. For example a bnux file can be moved back
 * and forth between a 32 bit Intel little-endian machine and a 64 bit PowerPC
 * big-endian machine; it remains parseable no matter where.
 * <p>
 * This approach is expressly <b>not intended </b>as a replacement for standard
 * textual XML in loosely coupled systems where maximum long-term
 * interoperability is the overarching concern. It is also expressly <b>not
 * intended </b>for long-term data storage. If you store data in bnux format
 * there's every chance you won't be able to read it back a year or two from
 * now, or even earlier. Finally, it is probably unwise to use this class if
 * your application's performance requirements are not particularly stringent,
 * or profiling indicates that the bottleneck is not related to XML
 * serialization/deserialization anyway.
 * <p>
 * The bnux serialization algorithm is a fully streaming block-oriented
 * algorithm, ideal for large numbers of very small to arbitrarily large
 * XML documents.
 * <p>
 * The bnux deserialization algorithm is a fully streaming algorithm and can
 * optionally be pushed through a {@link nu.xom.NodeFactory}. This enables
 * efficient filtering and can avoid the need to build a main memory tree, which
 * is particularly useful for arbitrarily large documents. For example, streaming
 * XQueries over binary XML can be expressed via the NodeFactory generated by a
 * {@link nux.xom.xquery.StreamingPathFilter}. In streaming mode, the binary
 * codec exactly mimics the NodeFactory based behaviour of the XOM
 * {@link nu.xom.Builder}.
 *
 * <h4>Faithfully Preversing XML</h4>
 *
 * Any and all arbitrary XOM XML documents are supported, and no schema is
 * required. A XOM document that is serialized and subsequently deserialized by
 * this class is <i>exactly the same </i> as the original document, preserving
 * "as is" all names and data for elements, namespaces, additional namespace
 * declarations, attributes, texts, document type, comments, processing
 * instructions, whitespace, Unicode characters including surrogates, etc. As a
 * result, the W3C XML Infoset and the W3C Canonical XML representation is
 * guaranteed to be preserved. In particular there always holds:
 *
 * <pre>
 * java.util.Arrays.equals(XOMUtil.toCanonicalXML(doc), XOMUtil
 * 		.toCanonicalXML(deserialize(serialize(doc))));
 * </pre>
 *
 * <h4>Optional ZLIB Compression</h4>
 *
 * The bnux algorithm considerably compresses XML data with little CPU
 * consumption, by its very design. However, bnux also has an option to further
 * compress/decompress its output/input with the ZLIB compression algorithm.
 * ZLIB is based on Huffman coding and also used by the popular
 * <code>gzip</code> (e.g. {@link java.util.zip.Deflater}). ZLIB compression
 * is rather CPU intensive, but it typically yields strong compression factors,
 * in particular for documents containing mostly narrative text (e.g. the
 * bible). For example, strong compression may be desirable over low-bandwith
 * networks or when bnux data is known to be accessed rather infrequently. On
 * the other hand, ZLIB compression probably kills performance in the presence
 * of high-bandwidth networks such as ESnet, Internet2/Abilene or 10 Gigabit
 * Ethernet/InfiniBand LANs, even with high-end CPUs. CPU drain is also a
 * scalability problem in the presence of large amounts of concurrent
 * connections. An option ranging from 0 (no ZLIB compression; best performance)
 * to 1 (little ZLIB compression; reduced performance) to 9 (strongest ZLIB
 * compression; worst performance) allows one to configure the CPU/memory
 * consumption trade-off.
 *
 * <h4>Reliability</h4>
 *
 * This class has been successfully tested against some 50000 extremely
 * weird and unique test documents, including the W3C XML conformance test
 * suite, and no bugs are known.
 * <p>
 * Serialization employs no error checking at all, since malformed XOM input
 * documents are impossible to produce given XOM's design: XOM strictly enforces
 * wellformedness anyway. Deserialization employs some limited error checking,
 * throwing exceptions for any improper API usage, non-bnux input data, data
 * format version mismatch, or general binary data corruption. Beyond this,
 * deserialization relies on XOM's hard-wired wellformedness checks, just like
 * serialization does. Barring one of the above catastrophic situations, the
 * bnux algorithm will always correctly and faithfully reconstruct the exact
 * same well-formed XOM document.
 *
 * <h4>Example Usage:</h4>
 *
 * <pre>
 * // parse standard textual XML, convert to binary format, round-trip it and compare results
 * Document doc = new Builder().build(new File("samples/data/periodic.xml"));
 * BinaryXMLCodec codec = new BinaryXMLCodec();
 * byte[] bnuxDoc = codec.serialize(doc, 0);
 * Document doc2 = codec.deserialize(bnuxDoc);
 * boolean isEqual = java.util.Arrays.equals(
 *     XOMUtil.toCanonicalXML(doc), XOMUtil.toCanonicalXML(doc2));
 * System.out.println("isEqual = " + isEqual);
 * System.out.println(doc2.toXML());
 *
 * // write binary XML document to file
 * OutputStream out = new FileOutputStream("/tmp/periodic.xml.bnux");
 * out.write(bnuxDoc);
 * out.close();
 *
 * // read binary XML document from file
 * bnuxDoc = FileUtil.toByteArray(new FileInputStream("/tmp/periodic.xml.bnux"));
 * Document doc3 = codec.deserialize(bnuxDoc);
 * System.out.println(doc3.toXML());
 * </pre>
 *
 * <a name="performance"/>
 * <h4>Performance</h4>
 *
 * This class has been carefully profiled and optimized. Preliminary performance
 * results over a wide range of real-world documents are given below. A more
 * detailed presentation can be found at the Global Grid Forum <a
 * target="_blank"
 * href="http://www.ggf.org/GGF15/ggf_events_schedule_WSPerform.htm">Web
 * Services Performance Workshop</a>.
 * <p>
 * Contrasting bnux BinaryXMLCodec with the XOM Builder and Serializer:
 * <ul>
 * <li>Tree Deserialization speedup: 40-100 MB/s vs. 3-30 MB/s</li>
 * <li>Streaming Deserialization speedup: 60-500 MB/s vs. 3-30 MB/s</li>
 * <li>Tree Serialization speedup: 50-150 MB/s vs. 5-20 MB/s</li>
 * <li>Data compression factor: 1.0 - 4</li>
 * </ul>
 * For meaningful comparison, MB/s and compression factors are always given
 * normalized in relation to the original standard textual XML file size.
 * <ul>
 * <li>Benchmark test data: A wide variety of small to medium large XML
 * documents is used, including SOAP documents heavily using namespaces ( <a
 * target="_blank" href="http://xbis.sourceforge.net/">XBIS </a>), simple XML
 * formatted web server logs using no namespaces, RDF documents with lots of
 * attributes and namespaces, the periodic table, documents consisting of large
 * narrative text ( <a target="_blank"
 * href="http://www.oasis-open.org/cover/bosakShakespeare200.html">Shakespeare
 * </a>), publication citations ( <a target="_blank"
 * href="http://dblp.uni-trier.de/xml/">DBLP </a>), music title databases ( <a
 * target="_blank" href="http://www.freedb.org/">FreeDB </a>), Japanese
 * documents (XML conformance test suite), SVG image files, etc.</li>
 *
 * <li>Benchmark configuration: no ZLIB compression, xom-1.2, non-validating
 * XOM Builder using xerces-2.8.0, no DTD or schema, Sun JDK 1.5.0 server VM,
 * commodity PC 2004, Dual Pentium 4, 3.4 GHz, Redhat 9</li>
 * </ul>
 *
 * Example Interpretation:
 * <ul>
 * <li>Small documents: Results translate, for example, to ping-pong
 * round-tripping of typical 500 byte SOAP request/response message documents at
 * a rate of 25000 msg/s, compared to 2500 msg/s with XOM (excluding network
 * latency). More pronounced, 500 (150) byte documents with few namespaces
 * translate to 35000 (120000) msg/s, compared to 3500 (5000) msg/s with XOM.
 * Consequently, XML serialization and deserialization are probably nomore your
 * application's bottleneck, leaving, say, 95% CPU headroom free for other
 * application modules.</li>
 *
 * <li>Medium documents: When having a main-memory cache of several thousand 1
 * MB documents, each containing highly structured complex data, one can
 * deserialize, XQuery and serve from the cache at a rate of 50 documents/s,
 * compared to 5 documents/s with XOM.</li>
 *
 * </ul>
 * Note that in contrast to other algorithms, these measurements include XOM
 * tree building and walking, hence measures delivering data to and from actual
 * XML applications, rather than merely to and from a low-level SAX event stream
 * (which is considerably cheaper and deemed less useful).
 * <p>
 * The deserialization speedup is further multiplied when DTDs or schema
 * validation is used while parsing standard textual XML.
 * <p>
 * This class relies on advanced Java compiler optimizations, which take
 * considerable time to warm up. Hence, for comparative benchmarks, use a
 * server-class VM and make sure to repeat runs for at least 30 seconds.
 * <p>
 * Further, you will probably want to eliminate drastic XOM hotspots by
 * compiling XOM with "ant -Dfat=true jar" to maintain an internal String
 * instead of an UTF-8 encoded byte array in {@link nu.xom.Text}, which
 * eliminates the expensive character conversions implied for each access to a
 * Text object. This increases performance at the expense of memory footprint.
 * The measurements above report numbers using these patches, both for xom and
 * bnux. If you're curious about the whereabouts of bottlenecks, run java with
 * the non-perturbing '-server -agentlib:hprof=cpu=samples,depth=10' flags, then
 * study the trace log and correlate its hotspot trailer with its call stack
 * headers (see <a target="_blank"
 * href="http://java.sun.com/developer/technicalArticles/Programming/HPROF.html">
 * hprof tracing </a>).
 * <p>
 * Use class {@link nux.xom.sandbox.BinaryXMLTest} to reproduce results, verify
 * correctness or to evaluate performance for your own datasets.
 *
 * @author whoschek.AT.lbl.DOT.gov
 * @author $Author: hoschek $
 * @version $Revision: 1.179 $, $Date: 2006/06/18 21:25:02 $
 */
public class BinaryXMLCodec {

	/*
	 * TODO: add a StAX interface on top of bnux?
	 * e.g. createXMLStreamReader(byte[]), similar for XMLStreamWriter
	 * TODO: add coalescing of adjacent Text nodes on deserialization?
	 */

	/*
	 * TODO: My impression is that there is remaining potential for some speedup,
	 * both for serialization and deserialization performance. Ideas towards this
	 * end include:
	 *
	 * - Add option to always use UTF16 (level=-1)
	 * - Micro caches may become obsolete once XOM has its own internal QName LRU cache
	 * - Use a better low level namespace iteration
	 * - Split unified symbolTable into several smaller symbolTables for Text, Attributes, and other.
	 * - other?
	 */

	// for deserialization: factory to stream (push) into
	private NodeFactory factory;

	// for deserialization: unique symbols from deserialized symbolTable
	private String[] symbols;

	// for deserialization: are pages ZLIB compressed?
	private boolean isCompressed;

	// for deserialization and serialization: page buffer
	private ArrayByteList page; // multi-byte integers are ALWAYS in big-endian

	// for serialization:
	// holds unique strings found in document (qnames, texts, uris, etc.)
	private SymbolTable symbolTable;

	// for serialization:
	// byte-level node type tokens in XML document order; also length of next index(es)
	private ArrayByteList nodeTokens;

	// for serialization: indexes into symbolTable/entries
	private ArrayIntList indexData;

	// for serialization: has first page of current document already been written?
	private boolean isFirstPage = true;

	// ZLIB
	private Inflater decompressor; // ZLIB
	private Deflater compressor;   // ZLIB
	private int compressionLevel = -1; // initialize to "undefined"

	// for deserialization: avoids reverification of PCDATA
	private Text[] textCache;

	// for deserialization: avoids reverification of namespace URIs
	private String[] nameCache;
	private LRUHashMap1 internedNames;

	// for deserialization:
	// avoids reverification of qname and URI, as well as indexOf() calls, and saves string memory
	private NodeBuilder nodeBuilder;

	// for streaming serialization
	private OutputStream out;

	/**
	 * For serialization: (approximate) maximum number of bytes per page.
	 * <p>
	 * To enable true streaming, a serialized document consists of one or more
	 * independent consecutive pages. Each page contains a portion of the XML
	 * document, in document order. More specifically, each page consists of a
	 * tokenized byte array and corresponding symbols. Once a page has been
	 * read/written related (heavy) state can be discarded, freeing memory. No
	 * more than one page needs to be held in memory at any given time. For very
	 * large documents this page design reduces memory consumption, increases
	 * throughput and reduces latency. For small to medium sized documents it
	 * makes next to no difference.
	 * <p>
	 * A small page capacity (e.g. 128 bytes) leads to lower latency per page
	 * but also lower throughput and lower compression overall. Conversely, a
	 * large page capacity (e.g. 1 MB) leads to higher throughput and higher
	 * compression, at the expense of higher latency. However, a very large page
	 * capacity (e.g. 10 MB) leads to memory subsystem pressure on streaming.
	 * Thus, here we use a happy medium, small enough to generate little memory
	 * subsystem pressure, and large enough to gain high throughput, retain
	 * almost all compression stemming from redundancy eliminating tokenization,
	 * with near zero overhead for small to medium sized documents, and
	 * outstanding performance for very large documents.
	 */
	private static final int MAX_PAGE_CAPACITY = 64 * 1024;
//	private static final int MAX_PAGE_CAPACITY = 1; // DEBUG only

	// low 3 bits for node token types
	// high 4 bits typically hold number of bytes of the next two indexes
	private static final int TEXT = 0;
	private static final int ATTRIBUTE = 1;
	private static final int BEGIN_ELEMENT = 2;
	private static final int END_ELEMENT = 3;
	private static final int COMMENT = 4;
	private static final int NAMESPACE_DECLARATION = 5;
	private static final int PROCESSING_INSTRUCTION = 6;
	private static final int DOC_TYPE = 7;

	private static final int BNUX_MAGIC = createMagicNumber(); // for sanity checks
	private static final byte VERSION = 7; // version of bnux data format
	private static final int DOCUMENT_HEADER_SIZE = 4 + 1; // in bytes
	private static final int PAGE_HEADER_SIZE = 4 + 4 + 4 + 4 + 4; // in bytes

	private static final boolean IS_EXTENDED_XOM = hasXOMExtensions();

	/**
	 * Marker for non-existant systemID or publicID in DocType. XML and the
	 * nu.xom.Verifier regard " " as an illegal ID, so it can never occur in
	 * practice. Thus we can use it as an unambigous marker identifying a
	 * serialized null value.
	 */
	private static final String DOCTYPE_NULL_ID = " ";

	private static final boolean DEBUG = false; // VM does dead code elimination

	/** Reinitializes instance variables to virgin state. */
	private void reset() {
		// deserialization state:
		internedNames = null;
		nodeBuilder = null;
		factory = null;

		// serialization state:
		symbolTable = null;
		page = null;
		nodeTokens = null;
		indexData = null;
		isFirstPage = true;
		out = null;

		// better safe than sorry:
		try {
			if (decompressor != null) decompressor.end(); // free resources
		} finally {
			decompressor = null;
			try {
				if (compressor != null) compressor.end(); // free resources
			} finally {
				compressor = null;
			}
		}
	}

	/**
	 * Constructs an instance; An instance can be reused serially, but is not
	 * thread-safe, just like a {@link nu.xom.Builder}.
	 */
	public BinaryXMLCodec() {
	}

	/**
	 * Constructs a new streaming serializer that serializes bnux binary XML to
	 * the given underlying output stream, using the given ZLIB compression
	 * level.
	 * <p>
	 * An optional zlib compression level ranging from 0 (no ZLIB compression;
	 * best performance) to 1 (little ZLIB compression; reduced performance) to
	 * 9 (strongest ZLIB compression; worst performance) allows one to configure
	 * the CPU/memory consumption trade-off.
	 * <p>
	 * Unless there is a good reason to the contrary, you should always use
	 * level 0: the bnux algorithm typically already precompresses considerably.
	 *
	 * @param out
	 *            the underlying output stream to write to
	 * @param zlibCompressionLevel
	 *            a number in the range 0..9
	 * @return a streaming serializer
	 */
	public StreamingSerializer createStreamingSerializer(OutputStream out, int zlibCompressionLevel) {
		return new StreamingBinaryXMLSerializer(this, out, zlibCompressionLevel);
	}

	/**
	 * Returns whether or not the given input stream contains a bnux document.
	 * <p>
	 * A peek into the first 4 bytes is sufficient for unambigous detection, as
	 * standard textual XML cannot start with any arbitrary four byte
	 * combination.
	 * <p>
	 * Finally, the read bytes are put back onto the stream, so they can be
	 * reread as part of subsequent parsing attempts. Therefore, the input
	 * stream must support <code>input.mark()</code> and
	 * <code>input.reset()</code>. For example, a
	 * {@link java.io.BufferedInputStream} is a good choice.
	 *
	 * @param input
	 *            the stream to read from
	 * @return true if the stream contains a bnux document
	 * @throws IllegalArgumentException
	 *             if the underlying stream does not support
	 *             <code>input.mark()</code> and <code>input.reset()</code>.
	 * @throws IOException
	 *             if the underlying input stream encounters an I/O error
	 * @see InputStream#mark(int)
	 */
	public boolean isBnuxDocument(InputStream input) throws IOException {
		if (input == null)
			throw new IllegalArgumentException("input stream must not be null");
		if (!input.markSupported())
			throw new IllegalArgumentException("markSupported() must be true");

		int magicBytes = 4;
		input.mark(magicBytes);
		try {
			ArrayByteList list = new ArrayByteList(magicBytes);
			if (!list.ensureRemaining(input, magicBytes)) {
				return false; // stream contains less than 4 bytes
			}
			return list.getInt() == BNUX_MAGIC;
		} finally {
			input.reset(); // unread the header
		}
	}

	/**
	 * Equivalent to
	 * <code>deserialize(new ByteArrayInputStream(input), new NodeFactory())</code>.
	 *
	 * @param bnuxDocument
	 *            the bnux document to deserialize.
	 * @return the new XOM document obtained from deserialization.
	 * @throws BinaryParsingException
	 *             if the bnux document is unreadable or corrupt for some reason
	 */
	public Document deserialize(byte[] bnuxDocument) throws BinaryParsingException {
		if (bnuxDocument == null)
			throw new IllegalArgumentException("bnuxDocument must not be null");

		try {
			return deserialize(new ByteArrayInputStream(bnuxDocument), null);
		} catch (IOException e) {
			throw new BinaryParsingException(e); // can never happen
		}
	}

	/**
	 * Returns the XOM document obtained by deserializing the next binary XML
	 * document from the given input stream.
	 * <p>
	 * If the document is in ZLIB compressed bnux format, it will be
	 * auto-detected and auto-decompressed as part of deserialization.
	 * <p>
	 * This method exactly mimics the NodeFactory based behaviour of the XOM
	 * {@link nu.xom.Builder}. A NodeFactory enables efficient filtering and
	 * can avoid the need to build a main memory tree, which is particularly
	 * useful for large documents. For example, streaming XQueries over binary
	 * XML can be expressed via the NodeFactory generated by a
	 * {@link nux.xom.xquery.StreamingPathFilter}. Binary XML files can be
	 * converted to and from standard textual XML files via a
	 * {@link nux.xom.pool.XOMUtil#getRedirectingNodeFactory(StreamingSerializer)}. For
	 * other example factories, see {@link nux.xom.pool.XOMUtil}.
	 * <p>
	 * Bnux is a self-framing data format: It knows where the end of a document
	 * occurs. An input stream can contain any number of independent documents,
	 * one after another. Thus, this method reads from the stream as many bytes
	 * as required for the current document, but no more than that. Unlike SAX
	 * XML parsers and unlike a {@link nu.xom.Builder}, it does not read until
	 * end-of-stream (EOS), and it does not auto-close the input stream. If this
	 * method returns successfully, the input stream has been positioned one
	 * byte past the current bnux document, ready to deserialize the following
	 * document, if any. It is the responsibility of the caller to ensure the
	 * input stream gets properly closed when deemed appropriate.
	 *
	 * @param input
	 *            the stream to read and deserialize from
	 * @param factory
	 *            the node factory to stream into. May be <code>null</code> in
	 *            which case the default XOM NodeFactory is used, building the
	 *            complete XML document tree.
	 * @return the new XOM document obtained from deserialization.
	 * @throws BinaryParsingException
	 *             if the bnux document is unreadable or corrupt for some reason
	 * @throws IOException
	 *             if the underlying input stream encounters an I/O error
	 */
	public Document deserialize(InputStream input, NodeFactory factory)
			throws BinaryParsingException, IOException {

		if (input == null)
			throw new IllegalArgumentException("input stream must not be null");
		if (factory == null) factory = new NodeFactory();

		// read document header
		if (page == null) page = new ArrayByteList(256);
		page.clear();
		if (!page.ensureRemaining(input, 4 + 1 + 1 + 4))
			throw new BinaryParsingException("Missing bnux document header");

		int magic = page.getInt();
		if (magic != BNUX_MAGIC) throw new BinaryParsingException(
			"Bnux magic number mismatch: " + magic + ", must be: " + BNUX_MAGIC);

		int version = page.get();
		isCompressed = version < 0;
		if (isCompressed) version = -version;
		if (version != VERSION) throw new BinaryParsingException(
			"Bnux data format version mismatch: " + version + ", must be: " + VERSION);
		if (isCompressed) {
			if (decompressor == null) decompressor = new Inflater();
		}

		if (page.get() != DOC_TYPE) // surrogate hack to identify BEGIN_PAGE
			throw new BinaryParsingException("Illegal bnux page header marker");

		// prepare
		if (internedNames == null) internedNames = new LRUHashMap1(128);
		if (nodeBuilder == null) nodeBuilder = new NodeBuilder();

		// parse node token data and packed indexes, building the XOM tree
		this.factory = factory;
		try {
			return readDocument(page, input);
		} catch (Throwable t) {
			reset(); // better safe than sorry
			if (t instanceof Error) {
				throw (Error) t;
			} else if (t instanceof BinaryParsingException) {
				throw (BinaryParsingException) t;
			} else if (t instanceof IOException) {
				throw (IOException) t;
			} else {
				throw new BinaryParsingException(t);
			}
		} finally {
			this.symbols = null; // help gc
			this.textCache = null; // help gc
			this.nameCache = null; // help gc
			this.factory = null; // help gc
//			if (decompressor != null) decompressor.end();
//			decompressor = null;
		}
	}

	/**
	 * Returns the bnux binary XML document obtained by serializing the given
	 * XOM document.
	 * <p>
	 * An optional zlib compression level ranging from 0 (no ZLIB compression;
	 * best performance) to 1 (little ZLIB compression; reduced performance) to
	 * 9 (strongest ZLIB compression; worst performance) allows one to configure
	 * the CPU/memory consumption trade-off.
	 * <p>
	 * Unless there is a good reason to the contrary, you should always use
	 * level 0: the bnux algorithm typically already precompresses considerably.
	 *
	 * @param document
	 *            the XOM document to serialize
	 * @param zlibCompressionLevel
	 *            a number in the range 0..9
	 * @return the bnux document obtained from serialization.
	 * @throws IllegalArgumentException
	 *             if the compression level is out of range.
	 */
	public byte[] serialize(Document document, int zlibCompressionLevel)
			throws IllegalArgumentException {

		ByteArrayOutputStream result = new ByteArrayOutputStream(256);
		try {
			serialize(document, zlibCompressionLevel, result);
		} catch (IOException e) {
			throw new RuntimeException(e); // can never happen
		}
		return result.toByteArray();
	}

	/**
	 * Serializes the given XOM document as a bnux binary XML document onto
	 * the given output stream.
	 * <p>
	 * An optional zlib compression level ranging from 0 (no ZLIB compression;
	 * best performance) to 1 (little ZLIB compression; reduced performance) to
	 * 9 (strongest ZLIB compression; worst performance) allows one to configure
	 * the CPU/memory consumption trade-off.
	 * <p>
	 * Unless there is a good reason to the contrary, you should always use
	 * level 0: the bnux algorithm typically already precompresses considerably.
	 *
	 * @param document
	 *            the XOM document to serialize
	 * @param zlibCompressionLevel
	 *            a number in the range 0..9
	 * @param out
	 * 			 the output stream to write to
	 * @throws IllegalArgumentException
	 *             if the compression level is out of range.
	 * @throws IOException
	 *             if the underlying output stream encounters an I/O error
	 */
	public void serialize(Document document, int zlibCompressionLevel,
			OutputStream out) throws IllegalArgumentException, IOException {

		if (document == null)
			throw new IllegalArgumentException("XOM document must not be null");
		if (zlibCompressionLevel < 0 || zlibCompressionLevel > 9)
			throw new IllegalArgumentException("Compression level must be 0..9");
		if (out == null)
			throw new IllegalArgumentException("Output stream must not be null");

		try {
			setOutputStream(zlibCompressionLevel, out);
			writeDocument(document); // generate output
		} catch (Throwable t) {
			reset(); // better safe than sorry
			if (t instanceof Error) {
				throw (Error) t;
			} else if (t instanceof RuntimeException) {
				throw (RuntimeException) t;
			} else if (t instanceof IOException) {
				throw (IOException) t;
			} else {
				throw new RuntimeException(t);
			}
		} finally {
			this.symbolTable = null; // help gc
			this.out = null;
		}
	}

	final void setOutputStream(int zlibCompressionLevel, OutputStream out) {
		if (zlibCompressionLevel > 0) {
			if (compressor == null || zlibCompressionLevel != compressionLevel) {
				if (compressor != null) compressor.end(); // free resources
				compressor = new Deflater(zlibCompressionLevel);
			}
		}
		compressionLevel = zlibCompressionLevel;
		this.out = out;
	}

	/** Prepares reading from the next page. */
	private void readPage(ArrayByteList src, InputStream input)
			throws BinaryParsingException, IOException {

		if (DEBUG) System.err.println("reading page");
		if (!src.ensureRemaining(input, 4))
			throw new BinaryParsingException("Missing remaining bnux page size");
		int pageSize = src.getInt();
		if (src.remaining() != 0)
			throw new IllegalStateException("Internal codec bug");

		boolean isLastPage = pageSize < 0;
		if (isLastPage) pageSize = -pageSize;
//		if (DEBUG) System.err.println("pageSize = " + pageSize);
		if (!isLastPage) {
			pageSize++; // read one byte past page, fetching PAGE_BEGIN marker
		}
		if (!src.ensureRemaining(input, pageSize))
			throw new BinaryParsingException("Missing remaining bnux page body");

		if (isCompressed) decompress(src);

		int symbolTableSize = src.getInt();
		if (symbolTableSize < 0)
			throw new BinaryParsingException("Negative symbol table size");
		int decodedSize = src.getInt();
		if (decodedSize < 0)
			throw new BinaryParsingException("Negative decodedSize");
		int encodedSize = src.getInt();
		if (encodedSize < 0)
			throw new BinaryParsingException("Negative encodedSize");

		// read symbolTable
		this.symbols = null; // help gc
		if (decodedSize == encodedSize) { // safe trick, faster
			// Note that 7 bit ASCII is a proper subset of UTF-8
			this.symbols = src.getASCIIStrings(symbolTableSize);
		} else {
			this.symbols = src.getUTF8Strings(symbolTableSize);
		}
//		this.symbols = src.getUTF16Strings(symbolTableSize);
		if (DEBUG) System.err.println("read symbols = " + Arrays.asList(symbols));

		int magic = src.getInt();
		if (magic != BNUX_MAGIC) throw new BinaryParsingException(
			"Bnux magic number mismatch: " + magic + ", must be: " + BNUX_MAGIC);

		// reset caches in preparation for XML token decoding
		if (this.nameCache == null) {
			nameCache = new String[Math.min(64, symbolTableSize)];
		} else {
			for (int i=nameCache.length; --i >= 0; ) nameCache[i] = null;
		}

		if (factory.getClass() == NodeFactory.class) { // fast path
			if (this.textCache == null) {
				textCache = new Text[Math.min(256, symbolTableSize)];
			} else {
				for (int i=textCache.length; --i >= 0; ) textCache[i] = null;
			}
		}
//		this.nameCache = null; // help gc
//		this.nameCache = new String[Math.min(64, symbolTableSize)];
//		this.textCache = null; // help gc
//		if (factory.getClass() == NodeFactory.class) { // fast path
//			this.textCache = new Text[Math.min(128, symbolTableSize)];
//		}
	}

	private void decompress(ArrayByteList src) throws BinaryParsingException {
		if (nodeTokens == null) nodeTokens = new ArrayByteList();
		nodeTokens.clear();

		try {
			nodeTokens.add(decompressor, src);
		} catch (DataFormatException e) {
			String s = e.getMessage();
		    throw new BinaryParsingException(
		    		s != null ? s : "Invalid ZLIB data format", e);
		}

		src.swap(nodeTokens); // replace src with nodeTokens
		nodeTokens.clear();
	}

	/** Parses document from encoded src buffer; tokens appear in document order. */
	private Document readDocument(ArrayByteList src, InputStream input)
			throws BinaryParsingException, IOException {

		if (DEBUG) System.err.println("reading document");
		readPage(src, input);
		Document doc = factory.startMakingDocument();
//		doc.setBaseURI(symbols[src.getInt()]);
		doc.setBaseURI(getInternedName(src.getInt()));
		boolean hasRootElement = false;
		int i = 0;

		// add children of document, retaining the exact same order found in input
		while (src.remaining() > 0) {
			Nodes nodes;
			int type = src.get(); // look ahead
			if (DEBUG) System.err.println("reading type = " + toString(type));
			switch (type & 0x07) { // three low bits indicate node type
				case TEXT: {
					throw new BinaryParsingException("Unreachable text");
				}
				case ATTRIBUTE: {
					throw new BinaryParsingException("Unreachable attribute");
				}
				case BEGIN_ELEMENT: {
					if (factory.getClass() == NodeFactory.class) { // fast path
						Element root = readStartTag(src, type);
						readElement(src, root, input); // reads entire subtree
						nodes = new Nodes(root);
					} else { // slow path
						Element root = readStartTagF(src, type, true);
						if (root == null) {
							throw new NullPointerException("Factory failed to create root element.");
						}
						doc.setRootElement(root);
						readElementF(src, root, input);
						nodes = factory.finishMakingElement(root);
					}
					break;
				}
				case END_ELEMENT: {
					throw new BinaryParsingException("Unreachable end of element");
				}
				case COMMENT: {
					nodes = readCommentF(src, type);
					break;
				}
				case NAMESPACE_DECLARATION: {
					throw new BinaryParsingException("Unreachable namespace declaration");
				}
				case PROCESSING_INSTRUCTION: {
					nodes = readProcessingInstructionF(src);
					break;
				}
				case DOC_TYPE: {
					nodes = readDocTypeF(src);
					break;
				}
				default: {
					throw new BinaryParsingException("Illegal node type code=" + type);
				}
			}

			// append nodes:
			for (int j=0; j < nodes.size(); j++) {
				Node node = nodes.get(j);
				if (node instanceof Element) { // replace fake root with real root
					if (hasRootElement) {
						throw new IllegalAddException(
							"Factory returned multiple root elements");
					}
					doc.setRootElement((Element) node);
					hasRootElement = true;
				} else {
					doc.insertChild(node, i);
				}
				i++;
			}
		}

		if (!hasRootElement) throw new WellformednessException(
				"Factory attempted to remove the root element");
		factory.finishMakingDocument(doc);
		if (DEBUG) System.err.println("finished reading document");
		return doc;
	}

	/** Reads start tag and returns a corresponding empty element */
	private Element readStartTag(ArrayByteList src, int type) {
		String qname = readString(src, 4, type);
		String namespaceURI = readName(src, 6, type);
		return this.nodeBuilder.createElement(qname, namespaceURI);
//		return new Element(qname, namespaceURI);
	}

	private Element readStartTagF(ArrayByteList src, int type, boolean isRoot) {
		String qname = readString(src, 4, type);
		String namespaceURI = readName(src, 6, type);
		return isRoot ?
			factory.makeRootElement(qname, namespaceURI) :
			factory.startMakingElement(qname, namespaceURI);
	}

	/** Iterative pull parser reading an entire element subtree. */
	private void readElement(ArrayByteList src, Element current, InputStream input)
		throws BinaryParsingException, IOException {

		while (true) {
			Node node = null;
			Element down = null;
			int type = src.get(); // look ahead
//			if (DEBUG) System.err.println("reading type = " + toString(type));
			switch (type & 0x07) { // three low bits indicate node type
				case TEXT: {
					node = readText(src, type);
					break;
				}
				case ATTRIBUTE: {
					readAttribute(src, current, type);
					continue;
				}
				case BEGIN_ELEMENT: {
					down = readStartTag(src, type);
					node = down;
					break;
				}
				case END_ELEMENT: {
					current = (Element) current.getParent();
					if (current == null) return; // we're done with the root element
					continue;
				}
				case COMMENT: {
					node = readComment(src, type);
					break;
				}
				case NAMESPACE_DECLARATION: {
					readNamespaceDeclaration(src, current, type);
					continue;
				}
				case PROCESSING_INSTRUCTION: {
					node = readProcessingInstruction(src);
					break;
				}
				case DOC_TYPE: { // surrogate hack to identify BEGIN_PAGE
					readPage(src, input);
					continue;
				}
			}

//			if (DEBUG) System.err.println("read node=" + node.toXML());

			// assert node != null
			if (IS_EXTENDED_XOM) { // xom-1.1 + patch
				current.fastInsertChild(node, current.getChildCount());
			} else {
				current.insertChild(node, current.getChildCount());
			}

			if (down != null) current = down; // recurse down
		}
	}

	/** Iterative pull parser reading an entire element subtree (using NodeFactory). */
	private void readElementF(ArrayByteList src, Element current, InputStream input)
			throws BinaryParsingException, IOException {

//		final ArrayList stack = new ArrayList();
		final FastStack stack = new FastStack();
		stack.push(current);
		boolean addAttributesAndNamespaces = true;

		while (true) {
			Nodes nodes = null;
			int type = src.get(); // look ahead
//			if (DEBUG) System.err.println("reading type = " + toString(type));

			switch (type & 0x07) { // three low bits indicate node type
				case TEXT: {
					nodes = readTextF(src, type);
					break;
				}
				case ATTRIBUTE: {
					Element elem = addAttributesAndNamespaces ? current : null;
					nodes = readAttributeF(src, elem, type);
					break;
				}
				case BEGIN_ELEMENT: {
					Element elem = readStartTagF(src, type, false);
					stack.push(elem); // even if it's null
					if (elem != null) {
						current.insertChild(elem, current.getChildCount());
						current = elem; // recurse down
					}
					addAttributesAndNamespaces = elem != null;
					continue;
				}
				case END_ELEMENT: {
					Element elem = stack.pop();
					if (elem == null) {
						continue; // skip element
					}
					ParentNode parent = elem.getParent();
					if (parent == null) throwTamperedWithParent();
					if (parent instanceof Document) {
						return; // we're done with the root element
					}

					current = (Element) parent; // recurse up
					nodes = factory.finishMakingElement(elem);

					if (nodes.size()==1 && nodes.get(0)==elem) { // same node? (common case)
						continue; // optimization: no need to remove and then readd same element
					}

					if (current.getChildCount()-1 < 0) throwTamperedWithParent();
					current.removeChild(current.getChildCount()-1);
					break;
				}
				case COMMENT: {
					nodes = readCommentF(src, type);
					break;
				}
				case NAMESPACE_DECLARATION: {
					Element elem = addAttributesAndNamespaces ? current : null;
					readNamespaceDeclaration(src, elem, type);
					continue;
				}
				case PROCESSING_INSTRUCTION: {
					nodes = readProcessingInstructionF(src);
					break;
				}
				case DOC_TYPE: { // surrogate hack for BEGIN_PAGE
					readPage(src, input);
					continue;
				}
			}

			appendNodes(current, nodes);
		}
	}

	private static void appendNodes(Element elem, Nodes nodes) {
		if (nodes != null) {
			int size = nodes.size();
			for (int i=0; i < size; i++) {
				Node node = nodes.get(i);
				if (node instanceof Attribute) {
					elem.addAttribute((Attribute) node);
				} else {
					elem.insertChild(node, elem.getChildCount());
				}
			}
		}
	}

	private static void throwTamperedWithParent() {
		throw new XMLException("Factory has tampered with a parent pointer " +
				"of ancestor-or-self in finishMakingElement()");
	}

	private void readAttribute(ArrayByteList src, Element dst, int type) throws BinaryParsingException {
		String qname = readString(src, 4, type);
		String namespaceURI = readName(src, 6, type);
		String value = readString(src, 4, src.get());
		Attribute.Type attrType = Util.getAttributeType(src.get());
		Attribute attr = this.nodeBuilder.createAttribute(qname, namespaceURI, value, attrType);
//		Attribute attr = new Attribute(qname, namespaceURI, value, attrType);
		dst.addAttribute(attr);
	}

	private Nodes readAttributeF(ArrayByteList src, Element dst, int type) throws BinaryParsingException {
		String qname = readString(src, 4, type);
		String namespaceURI = readName(src, 6, type);
		String value = readString(src, 4, src.get());
		Attribute.Type attrType = Util.getAttributeType(src.get());
		if (dst == null) return null; // NONE;
		return factory.makeAttribute(qname, namespaceURI, value, attrType);
	}

	private Comment readComment(ArrayByteList src, int type) {
		return new Comment(readString(src, 4, type));
	}

	private Nodes readCommentF(ArrayByteList src, int type) {
		return factory.makeComment(readString(src, 4, type));
	}

	private void readNamespaceDeclaration(ArrayByteList src, Element dst, int type) {
		String prefix = readString(src, 4, type);
		String uri = readName(src, 6, type);
		if (dst != null) dst.addNamespaceDeclaration(prefix, uri);
	}

	private ProcessingInstruction readProcessingInstruction(ArrayByteList src) {
		int type = src.get(src.position() - 1);
		String target = readString(src, 4, type);
		String value = readString(src, 6, type);
		return new ProcessingInstruction(target, value);
	}

	private Nodes readProcessingInstructionF(ArrayByteList src) {
		int type = src.get(src.position() - 1);
		String target = readString(src, 4, type);
		String value = readString(src, 6, type);
		return factory.makeProcessingInstruction(target, value);
	}

	/** Does not pack indexes of doctype (infrequent anyway) */
	private Nodes readDocTypeF(ArrayByteList src) {
		String rootElementName = symbols[src.getInt()];
		String publicID = symbols[src.getInt()];
		if (DOCTYPE_NULL_ID.equals(publicID)) publicID = null;
		String systemID = symbols[src.getInt()];
		if (DOCTYPE_NULL_ID.equals(systemID)) systemID = null;
		String internalDTDSubset = symbols[src.getInt()];
		if (internalDTDSubset.length() == 0) internalDTDSubset = null;

		Nodes nodes = factory.makeDocType(rootElementName, publicID, systemID);
		for (int i=0; i < nodes.size(); i++) {
			if (nodes.get(i) instanceof DocType) {
				DocType docType = (DocType) nodes.get(i);
				if (docType.getInternalDTDSubset().length() == 0) {
					try {
						docType.setInternalDTDSubset(internalDTDSubset);
					} catch (IllegalAccessError e) {
						; // ignore; setInternalDTDSubset() is private in xom < 1.1
					}
				}
			}
		}
		return nodes;
	}

	// try to avoid XML reverification by caching repetitive Texts
	private Text readText(ArrayByteList src, int type) {
		int i = readSymbol(src, 4, type);
		Text text;
		if (i < textCache.length) {
			text = textCache[i];
			if (text != null) return new Text(text);
		}
		text = new Text(symbols[i]);
		if (i < textCache.length) textCache[i] = text;
		return text;
	}

	private Nodes readTextF(ArrayByteList src, int type) {
		return factory.makeText(readString(src, 4, type));
	}

	/** Reads string via packed index from symbolTable */
	private String readString(ArrayByteList src, int shift, int type) {
		int i = readSymbol(src, shift, type);
		if (i < 0) return "";
		return symbols[i];
	}

	/** Reads string via packed index from symbolTable */
	private static int readSymbol(ArrayByteList src, int shift, int type) {
		// assert shift == 4 || shift == 6
		if (Util.isInlinedIndex(type)) {
			if (shift == 6) return -1;
			return Util.getInlinedIndex(type);
		}

		switch ((type >>> shift) & 0x03) { // look at two bits indicating index length
			case 0 : return Util.getUnsignedByte(src.get());
			case 1 : return Util.getUnsignedShort(src.getShort());
			case 2 : return -1;
			default: return src.getInt();
		}
	}

	private String readName(ArrayByteList src, int shift, int type) {
		int i = readSymbol(src, shift, type);
		if (i < 0) return "";
		if (i < nameCache.length) {
			String name = nameCache[i];
			if (name == null) { // cache miss
				name = getInternedName(i);
				nameCache[i] = name;
			}
			return name;
		}
		return symbols[i];
	}

	private String getInternedName(int i) {
		String name = symbols[i];
		if (name.length() == 0) {
			name = "";
		} else {
			name = (String) internedNames.get(name);
			if (name == null) {
				name = symbols[i];
				internedNames.put(name, name);
			}
		}
		return name;
	}

	/** Writes nodeTokens, indexData, symbolTable to output stream */
	private void writePage(boolean isLastPage) throws IOException {
		if (DEBUG) System.err.println("writing page");
		Entry[] entries = symbolTable.getEntries();
		int numChars = symbolTable.numCharacters();

		// reorder entries and update indexData accordingly.
		packSort(entries, indexData);
//		if (DEBUG) printStatistics(entries);

		// add header to page
		page.ensureCapacity(page.size() + 1 + 4 + 4 + 4 + 4 + 4 + numChars*4 + entries.length +
				nodeTokens.size() + indexData.size() + numChars/100); // an educated guess
		page.add((byte) DOC_TYPE); // BEGIN_PAGE marker surrogate
		page.addInt(0); // pageSize dummy placeholder
		int pageOffset = page.size();
		page.addInt(entries.length);
		page.addInt(numChars + entries.length);  // decodedSize (+zero terminator)
		page.addInt(0); // encodedSize dummy placeholder
		int encodedOffset = page.size();

		// add symbolTable to page
		encodeSymbols(entries, page); // assert: no need to expand underlying array
		int encodedSize = page.size() - encodedOffset;
		page.setInt(encodedOffset-4, encodedSize); // replace dummy placeholder
		entries = null; // help gc

		// add node tokens and packed symbol indexes to page
		page.addInt(BNUX_MAGIC);
		encodeTokens(nodeTokens, indexData.asArray(), page);

		int pageSize;
		nodeTokens.clear();
		if (compressionLevel > 0) { // compress page body
			page.position(pageOffset);
			nodeTokens.add(compressor, page);
			page.remove(pageOffset, page.size());
			pageSize = nodeTokens.size();
		} else {
			pageSize = page.size() - pageOffset;
		}
		if (isLastPage) pageSize = -pageSize;
		page.setInt(pageOffset-4, pageSize); // replace pageSize dummy placeholder

		// having filled the buffers, flush them onto underlying output stream
		page.write(out);
		nodeTokens.write(out);

		// reset
		if (!isLastPage) symbolTable.clear();
		page.clear();
		nodeTokens.clear();
		indexData.clear();
		if (DEBUG) System.err.println("finished writing page");
	}

	private void writeDocument(Document doc) throws IOException {
		if (DEBUG) System.err.println("writing document");
		writeXMLDeclaration(doc.getBaseURI());
		for (int i = 0; i < doc.getChildCount(); i++) {
			Node node = doc.getChild(i);
			if (node instanceof Element) {
				writeElement((Element) node);
			} else if (node instanceof Comment) {
				writeComment((Comment) node);
			} else if (node instanceof ProcessingInstruction) {
				writeProcessingInstruction((ProcessingInstruction) node);
			} else if (node instanceof DocType) {
				writeDocType((DocType) node);
			} else {
				throw new IllegalAddException("Cannot write node type: " + node);
			}
		}
		writeEndDocument();
		if (DEBUG) System.err.println("finished writing document");
	}

	final void writeXMLDeclaration(String baseURI) {
		if (baseURI == null) baseURI = "";

		// setup
		symbolTable = new SymbolTable();
		if (nodeTokens == null) nodeTokens = new ArrayByteList();
		nodeTokens.clear();
		if (indexData == null) indexData = new ArrayIntList();
		indexData.clear();

		// write bnux document header
		if (page == null) page = new ArrayByteList(256);
		page.clear();
		page.ensureCapacity(DOCUMENT_HEADER_SIZE + PAGE_HEADER_SIZE + 1);
		page.addInt(BNUX_MAGIC);
		int version = VERSION;
		if (compressionLevel > 0) version = -version;
		page.add((byte)version);

		isFirstPage = true;
		writeIndex(baseURI);
	}

	final void writeEndDocument() throws IOException {
		flush(true);
	}

	// assertion: must not be called with !isLastPage
	// if we're not at a safe point, i.e. at nesting depth == 0
	final void flush(boolean isLastPage) throws IOException {
		try {
			if (nodeTokens.size() > 0) { // anything remaining to be written?
				writePage(isLastPage);
			}
			out.flush();
		} finally {
			if (isLastPage) {
				this.symbolTable = null; // help gc
				this.out = null;
			}
			nodeTokens.clear();
		}
	}

	/** Encodes a node into the intermediate unpacked binary form */
	private void writeChild(Node node) throws IOException {
		if (node instanceof Element) {
			writeElement((Element) node);
		} else if (node instanceof Text) {
			writeText((Text) node);
		} else if (node instanceof Comment) {
			writeComment((Comment) node);
		} else if (node instanceof ProcessingInstruction) {
			writeProcessingInstruction((ProcessingInstruction) node);
		} else {
			throw new IllegalAddException("Cannot write node type: " + node);
		}
	}

	final void writeElement(Element elem) throws IOException {
		writeStartTag(elem);

		for (int i = 0; i < elem.getChildCount(); i++) {
			writeChild(elem.getChild(i));
		}

		writeEndTag();
	}

	final void writeStartTag(Element elem) {
		writeIndex(elem.getNamespacePrefix(), elem.getLocalName());

		int type = BEGIN_ELEMENT;
		if (elem.getNamespaceURI().length() == 0) {
			type = Util.noNamespace(type);
		} else {
			writeIndex(elem.getNamespaceURI());
		}
		nodeTokens.add((byte)type);

		for (int i = 0; i < elem.getAttributeCount(); i++) {
			writeAttribute(elem.getAttribute(i));
		}

		if (IS_EXTENDED_XOM) {
			writeNamespaceDeclarationsFast(elem);
		} else {
			writeNamespaceDeclarations(elem);
		}
	}

	final void writeEndTag() throws IOException {
		if (nodeTokens.size() + indexData.size() + symbolTable.numCharacters()
				+ symbolTable.size() >= MAX_PAGE_CAPACITY) {
			writePage(false); // write nodeTokens, indexData, symbolTable to output stream
		}

		nodeTokens.add((byte)END_ELEMENT);
	}

	private void writeAttribute(Attribute attr) {
		writeIndex(attr.getNamespacePrefix(), attr.getLocalName());

		int type = ATTRIBUTE;
		if (attr.getNamespaceURI().length() == 0) {
			type = Util.noNamespace(type);
		} else {
			writeIndex(attr.getNamespaceURI());
		}

		writeIndex(attr.getValue());
		nodeTokens.add((byte)type);
		nodeTokens.add(Util.getAttributeTypeCode(attr));
	}

	final void writeComment(Comment comment) {
		nodeTokens.add((byte)COMMENT);
		writeIndex(comment.getValue());
	}

	/** Does not pack indexes of doctype (infrequent anyway) */
	final void writeDocType(DocType docType) {
		nodeTokens.add((byte)DOC_TYPE);
		writeIndex(docType.getRootElementName());
		writeIndex(docType.getPublicID() == null ? DOCTYPE_NULL_ID : docType.getPublicID());
		writeIndex(docType.getSystemID() == null ? DOCTYPE_NULL_ID : docType.getSystemID());
		writeIndex(docType.getInternalDTDSubset() == null ? "" : docType.getInternalDTDSubset());
	}

	// requires xom-1.1 + patches
	private void writeNamespaceDeclarationsFast(Element elem) {
		String[] decls = elem.getAdditionalNamespaceDeclarations();
		for (int i=0; i < decls.length; ) {
			nodeTokens.add((byte)NAMESPACE_DECLARATION);
			writeIndex(decls[i++]); // prefix
			writeIndex(decls[i++]); // URI
		}
	}

	private void writeNamespaceDeclarations(Element elem) {
		int count = elem.getNamespaceDeclarationCount();
		if (count == 1)
			return; // elem.getNamespaceURI() has already been written

		for (int i = 0; i < count; i++) {
			String prefix = elem.getNamespacePrefix(i);
			String uri = elem.getNamespaceURI(prefix);
			if (prefix.equals(elem.getNamespacePrefix()) && uri.equals(elem.getNamespaceURI())) {
//				if (DEBUG) System.err.println("********** NAMESPACE IGNORED ON WRITE ***************\n");
				continue;
			}
			nodeTokens.add((byte)NAMESPACE_DECLARATION);
			writeIndex(prefix);
			writeIndex(uri);
		}
	}

	final void writeProcessingInstruction(ProcessingInstruction pi) {
		nodeTokens.add((byte)PROCESSING_INSTRUCTION);
		writeIndex(pi.getTarget());
		writeIndex(pi.getValue());
	}

	final void writeText(Text text) {
		nodeTokens.add((byte)TEXT);
		writeIndex(text.getValue());
	}

	/** Puts symbol into symbolTable and appends the string's index to indexData */
	private final void writeIndex(String symbol) {
		writeIndex("", symbol);
	}

	/** Puts symbol into symbolTable and appends the string's index to indexData */
	private final void writeIndex(String prefix, String localName) {
		int index = symbolTable.addSymbol(prefix, localName);
		indexData.add(index);
	}

	/** Converts strings of symbolTable into UTF-8 bytes */
	private void encodeSymbols(Entry[] entries, ArrayByteList dst) {
		/*
		 * As an optimization, one could use dst.ensureCapacity(dst.size + 4 *
		 * sum(entries.key.length) + entries.length) then use plain array
		 * accesses via dst.addUTF8Entries(entries) instead of many small
		 * dst.add(byte) calls. Update: I benchmarked various variants of this
		 * idea; none of them turned out to be worthwhile, so, for the time
		 * being, we'll keep the simple version below.
		 */
//		if (DEBUG) System.err.println("encoding symbols = " + toString(entries));
		int len = entries.length;
		for (int i=0; i < len; i++) {
			Entry entry = entries[i];
			dst.addUTF8String(entry.getKey1(), entry.getKey2());
			//dst.addUTF16String(entry.getKey1(), entry.getKey2());
		}
	}

	/**
	 * Sorts entries descending by symbol frequency (# of occurances) and
	 * updates indexData accordingly. This allows to compress frequent 4 byte
	 * indexes into a single byte. FAQ: this sort is *not* the bottleneck.
	 */
	private void packSort(Entry[] entries, ArrayIntList indexData) {
		if (!DEBUG && entries.length <= 256) { // 0, 1, ... , 255
			return; // no need to sort indexes - all fit into one unsigned byte anyway
		}

		// Swap entries with frequency == 1 to end of array (typically Text nodes).
		// There's no need to sort those with O(N log N).
		int head = entries.length;
		for (int i=entries.length; --i >= 0; ) {
			Entry e = entries[i];
			if (e.getFrequency() == 1) {
				head--;
				entries[i] = entries[head];
				entries[head] = e;
			}
		}
//		if (DEBUG) System.err.println("len=" + entries.length + ", #f>1=" + (100.0f * head / entries.length));

		// sort remaining entries descending by frequency.
		Arrays.sort(entries, 0, head,
			new Comparator() {
				public final int compare(Object e1, Object e2) {
					int f1 = ((Entry) e1).getFrequency();
					int f2 = ((Entry) e2).getFrequency();
					return f2 - f1;
				}
			}
		);

		// reorder indexData with sorted indexes
		// since sort has moved entries[indexData[k]] to entries[i]
		int[] indexes = new int[entries.length];
		for (int i=entries.length; --i >= 0; ) {
			indexes[entries[i].getIndex()] = i;
		}

		int[] ix = indexData.asArray();
		for (int i=indexData.size(); --i >= 0; ) {
			ix[i] = indexes[ix[i]];
		}
		// post-condition: entries[indexData[k]] corresponds to entries[i]
	}

	/**
	 * Writes nodeTokens in document order; in the process stitches in packed
	 * indexes referring to symbols in the symbolTable.
	 */
	private void encodeTokens(ArrayByteList tokenList, int[] indexes, ArrayByteList dst) {
		byte[] tokens = tokenList.asArray();
		int size = tokenList.size();
		int i = 0;
		int j = 0;
		if (isFirstPage) dst.addInt(indexes[i++]); // document baseURI
		isFirstPage = false;

		while (j < size) {
			int type = tokens[j++];
			dst.add((byte)type);

//			if (DEBUG) System.err.println("encoding type = " + toString(type));
			switch (type & 0x07) {
				case TEXT: {
					Util.packOneIndex(dst, indexes[i++], type); // value
					break;
				}
				case ATTRIBUTE: {
					if (Util.hasNoNamespace(type)) { // qname
						Util.packOneIndex(dst, indexes[i++], type);
					} else { // qname, URI
						Util.packTwoIndexes(dst, indexes[i++], indexes[i++], type);
					}
					dst.add((byte)0);
					Util.packOneIndex(dst, indexes[i++], 0); // value
					dst.add(tokens[j++]); // attrType
					break;
				}
				case BEGIN_ELEMENT: {
					if (Util.hasNoNamespace(type)) {
						Util.packOneIndex(dst, indexes[i++], type); // qname
					} else {
						Util.packTwoIndexes(dst, indexes[i++], indexes[i++], type); // qname, URI
					}
					break;
				}
				case END_ELEMENT: {
					break; // nothing to do
				}
				case COMMENT: {
					Util.packOneIndex(dst, indexes[i++], type); // value
					break;
				}
				case NAMESPACE_DECLARATION: { // prefix, URI
					Util.packTwoIndexes(dst, indexes[i++], indexes[i++], type);
					break;
				}
				case PROCESSING_INSTRUCTION: { // target, value
					Util.packTwoIndexes(dst, indexes[i++], indexes[i++], type);
					break;
				}
				case DOC_TYPE: { // infrequent; no need to pack indexes
					dst.addInt(indexes[i++]); // rootElementName
					dst.addInt(indexes[i++]); // publicID
					dst.addInt(indexes[i++]); // systemID
					dst.addInt(indexes[i++]); // internalDTDSubset
					break;
				}
				default: {
					throw new IllegalArgumentException("illegal node type");
				}
			}
		}
	}

	private static int createMagicNumber() {
		// 0xE0, 0x01, 0xDF, 0xFE = -32, 1, -33, -2 = -536748034
		// Thanks to Paul.Sandoz@Sun.COM
		ArrayByteList magic = new ArrayByteList(4);
		magic.add((byte)0xE0);
		magic.add((byte)0x01);
		magic.add((byte)0xDF);
		magic.add((byte)0xFE);
		return magic.getInt();
	}

	/** does xom.jar contain our performance extensions? */
	private static boolean hasXOMExtensions() {
		try {
			ParentNode.class.getMethod("fastInsertChild", new Class[] {Node.class, Integer.TYPE});
			Element.class.getMethod("getAdditionalNamespaceDeclarations", null);
			return true;
		} catch (Throwable t) {
			return false;
		}
	}

	private static String toString(int type) { // DEBUG only
		switch (type & 0x07) {
			case TEXT: return "TEXT";
			case ATTRIBUTE: return "ATTRIBUTE";
			case BEGIN_ELEMENT: return "BEGIN_ELEMENT";
			case END_ELEMENT: return "END_ELEMENT";
			case COMMENT: return "COMMENT";
			case NAMESPACE_DECLARATION: return "NAMESPACE_DECLARATION";
			case PROCESSING_INSTRUCTION: return "PROCESSING_INSTRUCTION";
			case DOC_TYPE: return "DOC_TYPE";
			default: {
				throw new IllegalArgumentException(
						"Illegal node type code=" + (type & 0x07));
			}
		}
	}

	private static String toString(Entry[] entries) { // DEBUG only
		ArrayList list = new ArrayList();
		for (int i=0; i < entries.length; i++) {
			list.add(entries[i].getQualifiedName());
		}
		return list.toString();
	}


	///////////////////////////////////////////////////////////////////////////////
	// Nested classes:
	///////////////////////////////////////////////////////////////////////////////

	/**
	 * Table of unique symbols for tokenization on XML serialization. This is a
	 * classic text book hash algorithm, adapted to meet our specific
	 * performance needs. It's close to the one used by the JDK HashMap.
	 * Maintains a map of (String, String) ==> (index, frequency) associations.
	 */
	private static final class SymbolTable { // not a public class!

		private static final float LOAD_FACTOR = 0.75f;
		private static final int INITIAL_CAPACITY = 16;
		private Entry[] entries = new Entry[INITIAL_CAPACITY];
		private int threshold = (int) (INITIAL_CAPACITY * LOAD_FACTOR);
		private int size = 0;
		private int numChars = 0;

		/** Constructs and returns a table with default parameters. */
		public SymbolTable() {
		}

		/** Removes all entries from this table, retaining the current capacity. */
		public void clear() {
			size = 0;
			numChars = 0;
			Entry[] src = entries;
			for (int i=src.length; --i >= 0; ) src[i] = null;
		}

		/** Returns the total number of characters occupied by all symbol strings. */
		public int numCharacters() {
			return numChars;
		}

		/** Returns the number of symbols. */
		public int size() {
			return size;
		}

		/**
		 * Adds the given symbol to the table if not already present. Otherwise
		 * increments its frequency counter.
		 *
		 * A symbol is structured like a lexical XML QName.
		 * symbol: key1 + ":" + key2   (if key1 is non-empty string)
		 * symbol: key2                (if key1 is empty string)
		 *
		 * @return a sequence number N >= 0 indicating that the symbol was added
		 *         to this table as the N-th entry, in order.
		 */
		public int addSymbol(String key1, String key2) {
			// assert: key1 and key2 are non-null
			int hash = hash(key1, key2);
			int i = hash & (entries.length - 1);
			Entry entry = findEntry(key1, key2, entries[i], hash);
			if (entry != null) {
				entry.frequency++;
				return entry.index;
			}

			// not found; add entry for key --> (index=size, freq=1) mapping
			// new entry is inserted at head of chain
//			if (DEBUG) checkNULChar(key1);
//			if (DEBUG) checkNULChar(key2);
			numChars += key1.length() + key2.length();
			if (key1.length() != 0) numChars++;
			entries[i] = new Entry(key1, key2, hash, entries[i], size);
			if (size >= threshold) rehash();
			return size++;
		}

		private static Entry findEntry(String key1, String key2, Entry cursor, int hash) {
			while (cursor != null) { // scan collision chain
				if (hash == cursor.hash && eq(key2, cursor.key2) && eq(key1, cursor.key1)) {
					cursor.key1 = key1; // speeds up future lookups: equals() vs. ==
					cursor.key2 = key2; // speeds up future lookups: equals() vs. ==
					return cursor;
				}
				cursor = cursor.next;
			}
			return null;
		}

		/**
		 * Expands the capacity of this table, rehashing all entries into
		 * corresponding new slots.
		 */
		private void rehash() {
			Entry[] src = entries;
			int capacity = 2 * src.length;
			Entry[] dst = new Entry[capacity];

			for (int i = src.length; --i >= 0; ) {
				Entry e = src[i];
				while (e != null) { // walk collision chain
					int j = e.hash & (capacity - 1);
					Entry next = e.next;
					e.next = dst[j];
					dst[j] = e; // insert e at head of chain
					e = next;
				}
			}
			entries = dst;
			threshold = (int) (capacity * LOAD_FACTOR);
		}

		/**
		 * Returns all table entries, sorted ascending by entry.index. The
		 * result can subsequently be used to sort by symbol frequency, or
		 * similar. Much faster than an entrySet().iterator().next() loop would
		 * be.
		 */
		public Entry[] getEntries() {
			Entry[] dst = new Entry[size];
			Entry[] src = entries;
			for (int i = src.length; --i >= 0; ) {
				Entry e = src[i];
				while (e != null) { // walk collision chain
					dst[e.index] = e;
					e = e.next;
				}
			}
			return dst;
		}

		private static int hash(String key1, String key2) {
			int h = key2.hashCode();
			if (key1 != "") h = key1.hashCode() ^ h;
			return auxiliaryHash(h);
//			return auxiliaryHash(key1.hashCode() ^ key2.hashCode());
		}

		/**
		 * Auxiliary hash function that defends against poor base hash
		 * functions. Ensures more uniform hash distribution, hence reducing the
		 * probability of pathologically long collision chains, in particular
		 * for short key symbols that are quite similar to each other, or XML
		 * boundary whitespace (worst case scenario).
		 */
		private static int auxiliaryHash(int h) {
			h += ~(h << 9);
			h ^= (h >>> 14);
			h += (h << 4);
			h ^= (h >>> 10);
			return h;
		}

		private static boolean eq(String x, String y) {
			return x == y || x.equals(y);
		}

		/** Sanity check; Unnecessary since NULs have already been checked by nu.xom.Verifier. */
		private static void checkNULChar(String key) {
			int i = key.indexOf((char)0);
			if (i >= 0) {
				throw new IllegalArgumentException(
					"Symbol must not contain C0 control character NUL (char 0x00) [index:" + i
					+ " within '" + key + "']");
			}
		}

	}

	/**
	 * A value in the SymbolTable.
	 */
	private static final class Entry {

		String key1; // prefix or ""
		String key2; // localName or Text or Comment or similar
		final int hash;   // cache for symbol's hash code
		final int index;  // index to correlate Entry with indexList on XML serialization
		int frequency = 1;// number of occurances of symbol within current XML document
		Entry next;       // successor in collision chain, mapping to the same hash slot

		public Entry(String key1, String key2, int hash, Entry next, int index) {
			this.key1 = key1;
			this.key2 = key2;
			this.hash = hash;
			this.next = next;
			this.index = index;
		}

		public String getKey1()    { return key1; }
		public String getKey2()    { return key2; }
		public int getIndex()     { return index; }
		public int getFrequency() { return frequency; }

		public String getQualifiedName() { // DEBUG only
			if (key1.length() == 0) return key2;
			return key1 + ':' + key2;
		}
		public String toString() { // DEBUG only
			return "[key1=" + key1 + ", key2=" + key2 + ", freq=" + frequency + "]";
		}

	}


	///////////////////////////////////////////////////////////////////////////////
	// Nested classes:
	///////////////////////////////////////////////////////////////////////////////

	/**
	 * Fast replacement for ArrayList and java.util.Stack. Possibly premature
	 * and unnecessary?
	 */
	private static final class FastStack {
		private Element[] elements = new Element[10];
		private int size = 0;

		public Element pop() {
			Element elem = elements[size-1];
			elements[--size] = null; // help gc
			return elem;
		}

		public void push(Element elem) {
			if (size == elements.length) ensureCapacity(size + 1);
			elements[size++] = elem;
		}

		private void ensureCapacity(int minCapacity) {
			if (minCapacity > elements.length) {
				int newCapacity = Math.max(minCapacity, 2 * elements.length + 1);
				elements = subArray(0, size, newCapacity);
			}
		}

		/** Small helper method eliminating redundancy. */
		private Element[] subArray(int from, int length, int capacity) {
			Element[] subArray = new Element[capacity];
			System.arraycopy(elements, from, subArray, 0, length);
			return subArray;
		}

	}

}