1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Copyright 1999-2004 The Apache Software Foundation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 /*
21  * $Id: FastStringBuffer.java,v 1.2.4.1 2005/09/15 08:15:44 suresh_emailid Exp $
22  */
23 package com.sun.org.apache.xml.internal.utils;
24 
25 /**
26  * Bare-bones, unsafe, fast string buffer. No thread-safety, no
27  * parameter range checking, exposed fields. Note that in typical
28  * applications, thread-safety of a StringBuffer is a somewhat
29  * dubious concept in any case.
30  * <p>
31  * Note that Stree and DTM used a single FastStringBuffer as a string pool,
32  * by recording start and length indices within this single buffer. This
33  * minimizes heap overhead, but of course requires more work when retrieving
34  * the data.
35  * <p>
36  * FastStringBuffer operates as a "chunked buffer". Doing so
37  * reduces the need to recopy existing information when an append
38  * exceeds the space available; we just allocate another chunk and
39  * flow across to it. (The array of chunks may need to grow,
40  * admittedly, but that's a much smaller object.) Some excess
41  * recopying may arise when we extract Strings which cross chunk
42  * boundaries; larger chunks make that less frequent.
43  * <p>
44  * The size values are parameterized, to allow tuning this code. In
45  * theory, Result Tree Fragments might want to be tuned differently
46  * from the main document's text.
47  * <p>
48  * %REVIEW% An experiment in self-tuning is
49  * included in the code (using nested FastStringBuffers to achieve
50  * variation in chunk sizes), but this implementation has proven to
51  * be problematic when data may be being copied from the FSB into itself.
52  * We should either re-architect that to make this safe (if possible)
53  * or remove that code and clean up for performance/maintainability reasons.
54  * <p>
55  */
56 public class FastStringBuffer
57 {
58   // If nonzero, forces the inial chunk size.
59   /**/static final int DEBUG_FORCE_INIT_BITS=0;
60 
61         // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
62         // back into the same FSB (variable set from previous variable, for example)
63         // and blocksize changes in mid-copy... there's risk of severe malfunction in
64         // the read process, due to how the resizing code re-jiggers storage. Arggh.
65         // If we want to retain the variable-size-block feature, we need to reconsider
66         // that issue. For now, I have forced us into fixed-size mode.
67     static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
68 
69         /** Manifest constant: Suppress leading whitespace.
70          * This should be used when normalize-to-SAX is called for the first chunk of a
71          * multi-chunk output, or one following unsuppressed whitespace in a previous
72          * chunk.
73          * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
74          */
75         public static final int SUPPRESS_LEADING_WS=0x01;
76 
77         /** Manifest constant: Suppress trailing whitespace.
78          * This should be used when normalize-to-SAX is called for the last chunk of a
79          * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
80          */
81         public static final int SUPPRESS_TRAILING_WS=0x02;
82 
83         /** Manifest constant: Suppress both leading and trailing whitespace.
84          * This should be used when normalize-to-SAX is called for a complete string.
85          * (I'm not wild about the name of this one. Ideas welcome.)
86          * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
87          */
88         public static final int SUPPRESS_BOTH
89                 = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
90 
91         /** Manifest constant: Carry trailing whitespace of one chunk as leading
92          * whitespace of the next chunk. Used internally; I don't see any reason
93          * to make it public right now.
94          */
95         private static final int CARRY_WS=0x04;
96 
97         /**
98    * Field m_chunkBits sets our chunking strategy, by saying how many
99    * bits of index can be used within a single chunk before flowing over
100    * to the next chunk. For example, if m_chunkbits is set to 15, each
101    * chunk can contain up to 2^15 (32K) characters
102    */
103   int m_chunkBits = 15;
104 
105   /**
106    * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
107    * the largest permissible chunk size is in this particular FastStringBuffer
108    * hierarchy.
109    */
110   int m_maxChunkBits = 15;
111 
112   /**
113    * Field m_rechunkBits affects our chunk-growth strategy, by saying how
114    * many chunks should be allocated at one size before we encapsulate them
115    * into the first chunk of the next size up. For example, if m_rechunkBits
116    * is set to 3, then after 8 chunks at a given size we will rebundle
117    * them as the first element of a FastStringBuffer using a chunk size
118    * 8 times larger (chunkBits shifted left three bits).
119    */
120   int m_rebundleBits = 2;
121 
122   /**
123    * Field m_chunkSize establishes the maximum size of one chunk of the array
124    * as 2**chunkbits characters.
125    * (Which may also be the minimum size if we aren't tuning for storage)
126    */
127   int m_chunkSize;  // =1<<(m_chunkBits-1);
128 
129   /**
130    * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
131    * worth of low-order '1' bits, useful for shift-and-mask addressing
132    * within the chunks.
133    */
134   int m_chunkMask;  // =m_chunkSize-1;
135 
136   /**
137    * Field m_array holds the string buffer's text contents, using an
138    * array-of-arrays. Note that this array, and the arrays it contains, may be
139    * reallocated when necessary in order to allow the buffer to grow;
140    * references to them should be considered to be invalidated after any
141    * append. However, the only time these arrays are directly exposed
142    * is in the sendSAXcharacters call.
143    */
144   char[][] m_array;
145 
146   /**
147    * Field m_lastChunk is an index into m_array[], pointing to the last
148    * chunk of the Chunked Array currently in use. Note that additional
149    * chunks may actually be allocated, eg if the FastStringBuffer had
150    * previously been truncated or if someone issued an ensureSpace request.
151    * <p>
152    * The insertion point for append operations is addressed by the combination
153    * of m_lastChunk and m_firstFree.
154    */
155   int m_lastChunk = 0;
156 
157   /**
158    * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
159    * the first character in the Chunked Array which is not part of the
160    * FastStringBuffer's current content. Since m_array[][] is zero-based,
161    * the length of that content can be calculated as
162    * (m_lastChunk<<m_chunkBits) + m_firstFree
163    */
164   int m_firstFree = 0;
165 
166   /**
167    * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
168    * length equals m_chunkSize, and which replaces m_array[0]. This allows
169    * building a hierarchy of FastStringBuffers, where early appends use
170    * a smaller chunkSize (for less wasted memory overhead) but later
171    * ones use a larger chunkSize (for less heap activity overhead).
172    */
173   FastStringBuffer m_innerFSB = null;
174 
175   /**
176    * Construct a FastStringBuffer, with allocation policy as per parameters.
177    * <p>
178    * For coding convenience, I've expressed both allocation sizes in terms of
179    * a number of bits. That's needed for the final size of a chunk,
180    * to permit fast and efficient shift-and-mask addressing. It's less critical
181    * for the inital size, and may be reconsidered.
182    * <p>
183    * An alternative would be to accept integer sizes and round to powers of two;
184    * that really doesn't seem to buy us much, if anything.
185    *
186    * @param initChunkBits Length in characters of the initial allocation
187    * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
188    * characters.) Later chunks will use larger allocation units, to trade off
189    * allocation speed of large document against storage efficiency of small
190    * ones.
191    * @param maxChunkBits Number of character-offset bits that should be used for
192    * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
193    * characters.
194    * @param rebundleBits Number of character-offset bits that addressing should
195    * advance before we attempt to take a step from initChunkBits to maxChunkBits
196    */
FastStringBuffer(int initChunkBits, int maxChunkBits, int rebundleBits)197   public FastStringBuffer(int initChunkBits, int maxChunkBits,
198                           int rebundleBits)
199   {
200     if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
201 
202     // %REVIEW%
203     // Should this force to larger value, or smaller? Smaller less efficient, but if
204     // someone requested variable mode it's because they care about storage space.
205     // On the other hand, given the other changes I'm making, odds are that we should
206     // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
207     // anyway; we need a permanant solution.
208     //
209     if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
210     //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
211 
212     m_array = new char[16][];
213 
214     // Don't bite off more than we're prepared to swallow!
215     if (initChunkBits > maxChunkBits)
216       initChunkBits = maxChunkBits;
217 
218     m_chunkBits = initChunkBits;
219     m_maxChunkBits = maxChunkBits;
220     m_rebundleBits = rebundleBits;
221     m_chunkSize = 1 << (initChunkBits);
222     m_chunkMask = m_chunkSize - 1;
223     m_array[0] = new char[m_chunkSize];
224   }
225 
226   /**
227    * Construct a FastStringBuffer, using a default rebundleBits value.
228    *
229    * NEEDSDOC @param initChunkBits
230    * NEEDSDOC @param maxChunkBits
231    */
FastStringBuffer(int initChunkBits, int maxChunkBits)232   public FastStringBuffer(int initChunkBits, int maxChunkBits)
233   {
234     this(initChunkBits, maxChunkBits, 2);
235   }
236 
237   /**
238    * Construct a FastStringBuffer, using default maxChunkBits and
239    * rebundleBits values.
240    * <p>
241    * ISSUE: Should this call assert initial size, or fixed size?
242    * Now configured as initial, with a default for fixed.
243    *
244    * NEEDSDOC @param initChunkBits
245    */
FastStringBuffer(int initChunkBits)246   public FastStringBuffer(int initChunkBits)
247   {
248     this(initChunkBits, 15, 2);
249   }
250 
251   /**
252    * Construct a FastStringBuffer, using a default allocation policy.
253    */
FastStringBuffer()254   public FastStringBuffer()
255   {
256 
257     // 10 bits is 1K. 15 bits is 32K. Remember that these are character
258     // counts, so actual memory allocation unit is doubled for UTF-16 chars.
259     //
260     // For reference: In the original FastStringBuffer, we simply
261     // overallocated by blocksize (default 1KB) on each buffer-growth.
262     this(10, 15, 2);
263   }
264 
265   /**
266    * Get the length of the list. Synonym for length().
267    *
268    * @return the number of characters in the FastStringBuffer's content.
269    */
size()270   public final int size()
271   {
272     return (m_lastChunk << m_chunkBits) + m_firstFree;
273   }
274 
275   /**
276    * Get the length of the list. Synonym for size().
277    *
278    * @return the number of characters in the FastStringBuffer's content.
279    */
length()280   public final int length()
281   {
282     return (m_lastChunk << m_chunkBits) + m_firstFree;
283   }
284 
285   /**
286    * Discard the content of the FastStringBuffer, and most of the memory
287    * that was allocated by it, restoring the initial state. Note that this
288    * may eventually be different from setLength(0), which see.
289    */
reset()290   public final void reset()
291   {
292 
293     m_lastChunk = 0;
294     m_firstFree = 0;
295 
296     // Recover the original chunk size
297     FastStringBuffer innermost = this;
298 
299     while (innermost.m_innerFSB != null)
300     {
301       innermost = innermost.m_innerFSB;
302     }
303 
304     m_chunkBits = innermost.m_chunkBits;
305     m_chunkSize = innermost.m_chunkSize;
306     m_chunkMask = innermost.m_chunkMask;
307 
308     // Discard the hierarchy
309     m_innerFSB = null;
310     m_array = new char[16][0];
311     m_array[0] = new char[m_chunkSize];
312   }
313 
314   /**
315    * Directly set how much of the FastStringBuffer's storage is to be
316    * considered part of its content. This is a fast but hazardous
317    * operation. It is not protected against negative values, or values
318    * greater than the amount of storage currently available... and even
319    * if additional storage does exist, its contents are unpredictable.
320    * The only safe use for our setLength() is to truncate the FastStringBuffer
321    * to a shorter string.
322    *
323    * @param l New length. If l<0 or l>=getLength(), this operation will
324    * not report an error but future operations will almost certainly fail.
325    */
setLength(int l)326   public final void setLength(int l)
327   {
328     m_lastChunk = l >>> m_chunkBits;
329 
330     if (m_lastChunk == 0 && m_innerFSB != null)
331     {
332       // Replace this FSB with the appropriate inner FSB, truncated
333       m_innerFSB.setLength(l, this);
334     }
335     else
336     {
337       m_firstFree = l & m_chunkMask;
338 
339           // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
340           // us pointing at the start of a chunk which has not yet been allocated. Rather than
341           // pay the cost of dealing with that in the append loops (more scattered and more
342           // inner-loop), we correct it here by moving to the safe side of that
343           // line -- as we would have left the indexes had we appended up to that point.
344       if(m_firstFree==0 && m_lastChunk>0)
345       {
346         --m_lastChunk;
347         m_firstFree=m_chunkSize;
348       }
349     }
350   }
351 
352   /**
353    * Subroutine for the public setLength() method. Deals with the fact
354    * that truncation may require restoring one of the innerFSBs
355    *
356    * NEEDSDOC @param l
357    * NEEDSDOC @param rootFSB
358    */
setLength(int l, FastStringBuffer rootFSB)359   private final void setLength(int l, FastStringBuffer rootFSB)
360   {
361 
362     m_lastChunk = l >>> m_chunkBits;
363 
364     if (m_lastChunk == 0 && m_innerFSB != null)
365     {
366       m_innerFSB.setLength(l, rootFSB);
367     }
368     else
369     {
370 
371       // Undo encapsulation -- pop the innerFSB data back up to root.
372       // Inefficient, but attempts to keep the code simple.
373       rootFSB.m_chunkBits = m_chunkBits;
374       rootFSB.m_maxChunkBits = m_maxChunkBits;
375       rootFSB.m_rebundleBits = m_rebundleBits;
376       rootFSB.m_chunkSize = m_chunkSize;
377       rootFSB.m_chunkMask = m_chunkMask;
378       rootFSB.m_array = m_array;
379       rootFSB.m_innerFSB = m_innerFSB;
380       rootFSB.m_lastChunk = m_lastChunk;
381 
382       // Finally, truncate this sucker.
383       rootFSB.m_firstFree = l & m_chunkMask;
384     }
385   }
386 
387   /**
388    * Note that this operation has been somewhat deoptimized by the shift to a
389    * chunked array, as there is no factory method to produce a String object
390    * directly from an array of arrays and hence a double copy is needed.
391    * By using ensureCapacity we hope to minimize the heap overhead of building
392    * the intermediate StringBuffer.
393    * <p>
394    * (It really is a pity that Java didn't design String as a final subclass
395    * of MutableString, rather than having StringBuffer be a separate hierarchy.
396    * We'd avoid a <strong>lot</strong> of double-buffering.)
397    *
398    * @return the contents of the FastStringBuffer as a standard Java string.
399    */
toString()400   public final String toString()
401   {
402 
403     int length = (m_lastChunk << m_chunkBits) + m_firstFree;
404 
405     return getString(new StringBuffer(length), 0, 0, length).toString();
406   }
407 
408   /**
409    * Append a single character onto the FastStringBuffer, growing the
410    * storage if necessary.
411    * <p>
412    * NOTE THAT after calling append(), previously obtained
413    * references to m_array[][] may no longer be valid....
414    * though in fact they should be in this instance.
415    *
416    * @param value character to be appended.
417    */
append(char value)418   public final void append(char value)
419   {
420 
421     char[] chunk;
422 
423     // We may have preallocated chunks. If so, all but last should
424     // be at full size.
425     boolean lastchunk = (m_lastChunk + 1 == m_array.length);
426 
427     if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
428       chunk = m_array[m_lastChunk];
429     else
430     {
431 
432       // Extend array?
433       int i = m_array.length;
434 
435       if (m_lastChunk + 1 == i)
436       {
437         char[][] newarray = new char[i + 16][];
438 
439         System.arraycopy(m_array, 0, newarray, 0, i);
440 
441         m_array = newarray;
442       }
443 
444       // Advance one chunk
445       chunk = m_array[++m_lastChunk];
446 
447       if (chunk == null)
448       {
449 
450         // Hierarchical encapsulation
451         if (m_lastChunk == 1 << m_rebundleBits
452                 && m_chunkBits < m_maxChunkBits)
453         {
454 
455           // Should do all the work of both encapsulating
456           // existing data and establishing new sizes/offsets
457           m_innerFSB = new FastStringBuffer(this);
458         }
459 
460         // Add a chunk.
461         chunk = m_array[m_lastChunk] = new char[m_chunkSize];
462       }
463 
464       m_firstFree = 0;
465     }
466 
467     // Space exists in the chunk. Append the character.
468     chunk[m_firstFree++] = value;
469   }
470 
471   /**
472    * Append the contents of a String onto the FastStringBuffer,
473    * growing the storage if necessary.
474    * <p>
475    * NOTE THAT after calling append(), previously obtained
476    * references to m_array[] may no longer be valid.
477    *
478    * @param value String whose contents are to be appended.
479    */
append(String value)480   public final void append(String value)
481   {
482 
483     if (value == null)
484       return;
485     int strlen = value.length();
486 
487     if (0 == strlen)
488       return;
489 
490     int copyfrom = 0;
491     char[] chunk = m_array[m_lastChunk];
492     int available = m_chunkSize - m_firstFree;
493 
494     // Repeat while data remains to be copied
495     while (strlen > 0)
496     {
497 
498       // Copy what fits
499       if (available > strlen)
500         available = strlen;
501 
502       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
503                      m_firstFree);
504 
505       strlen -= available;
506       copyfrom += available;
507 
508       // If there's more left, allocate another chunk and continue
509       if (strlen > 0)
510       {
511 
512         // Extend array?
513         int i = m_array.length;
514 
515         if (m_lastChunk + 1 == i)
516         {
517           char[][] newarray = new char[i + 16][];
518 
519           System.arraycopy(m_array, 0, newarray, 0, i);
520 
521           m_array = newarray;
522         }
523 
524         // Advance one chunk
525         chunk = m_array[++m_lastChunk];
526 
527         if (chunk == null)
528         {
529 
530           // Hierarchical encapsulation
531           if (m_lastChunk == 1 << m_rebundleBits
532                   && m_chunkBits < m_maxChunkBits)
533           {
534 
535             // Should do all the work of both encapsulating
536             // existing data and establishing new sizes/offsets
537             m_innerFSB = new FastStringBuffer(this);
538           }
539 
540           // Add a chunk.
541           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
542         }
543 
544         available = m_chunkSize;
545         m_firstFree = 0;
546       }
547     }
548 
549     // Adjust the insert point in the last chunk, when we've reached it.
550     m_firstFree += available;
551   }
552 
553   /**
554    * Append the contents of a StringBuffer onto the FastStringBuffer,
555    * growing the storage if necessary.
556    * <p>
557    * NOTE THAT after calling append(), previously obtained
558    * references to m_array[] may no longer be valid.
559    *
560    * @param value StringBuffer whose contents are to be appended.
561    */
append(StringBuffer value)562   public final void append(StringBuffer value)
563   {
564 
565     if (value == null)
566       return;
567     int strlen = value.length();
568 
569     if (0 == strlen)
570       return;
571 
572     int copyfrom = 0;
573     char[] chunk = m_array[m_lastChunk];
574     int available = m_chunkSize - m_firstFree;
575 
576     // Repeat while data remains to be copied
577     while (strlen > 0)
578     {
579 
580       // Copy what fits
581       if (available > strlen)
582         available = strlen;
583 
584       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
585                      m_firstFree);
586 
587       strlen -= available;
588       copyfrom += available;
589 
590       // If there's more left, allocate another chunk and continue
591       if (strlen > 0)
592       {
593 
594         // Extend array?
595         int i = m_array.length;
596 
597         if (m_lastChunk + 1 == i)
598         {
599           char[][] newarray = new char[i + 16][];
600 
601           System.arraycopy(m_array, 0, newarray, 0, i);
602 
603           m_array = newarray;
604         }
605 
606         // Advance one chunk
607         chunk = m_array[++m_lastChunk];
608 
609         if (chunk == null)
610         {
611 
612           // Hierarchical encapsulation
613           if (m_lastChunk == 1 << m_rebundleBits
614                   && m_chunkBits < m_maxChunkBits)
615           {
616 
617             // Should do all the work of both encapsulating
618             // existing data and establishing new sizes/offsets
619             m_innerFSB = new FastStringBuffer(this);
620           }
621 
622           // Add a chunk.
623           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
624         }
625 
626         available = m_chunkSize;
627         m_firstFree = 0;
628       }
629     }
630 
631     // Adjust the insert point in the last chunk, when we've reached it.
632     m_firstFree += available;
633   }
634 
635   /**
636    * Append part of the contents of a Character Array onto the
637    * FastStringBuffer,  growing the storage if necessary.
638    * <p>
639    * NOTE THAT after calling append(), previously obtained
640    * references to m_array[] may no longer be valid.
641    *
642    * @param chars character array from which data is to be copied
643    * @param start offset in chars of first character to be copied,
644    * zero-based.
645    * @param length number of characters to be copied
646    */
append(char[] chars, int start, int length)647   public final void append(char[] chars, int start, int length)
648   {
649 
650     int strlen = length;
651 
652     if (0 == strlen)
653       return;
654 
655     int copyfrom = start;
656     char[] chunk = m_array[m_lastChunk];
657     int available = m_chunkSize - m_firstFree;
658 
659     // Repeat while data remains to be copied
660     while (strlen > 0)
661     {
662 
663       // Copy what fits
664       if (available > strlen)
665         available = strlen;
666 
667       System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
668                        available);
669 
670       strlen -= available;
671       copyfrom += available;
672 
673       // If there's more left, allocate another chunk and continue
674       if (strlen > 0)
675       {
676 
677         // Extend array?
678         int i = m_array.length;
679 
680         if (m_lastChunk + 1 == i)
681         {
682           char[][] newarray = new char[i + 16][];
683 
684           System.arraycopy(m_array, 0, newarray, 0, i);
685 
686           m_array = newarray;
687         }
688 
689         // Advance one chunk
690         chunk = m_array[++m_lastChunk];
691 
692         if (chunk == null)
693         {
694 
695           // Hierarchical encapsulation
696           if (m_lastChunk == 1 << m_rebundleBits
697                   && m_chunkBits < m_maxChunkBits)
698           {
699 
700             // Should do all the work of both encapsulating
701             // existing data and establishing new sizes/offsets
702             m_innerFSB = new FastStringBuffer(this);
703           }
704 
705           // Add a chunk.
706           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
707         }
708 
709         available = m_chunkSize;
710         m_firstFree = 0;
711       }
712     }
713 
714     // Adjust the insert point in the last chunk, when we've reached it.
715     m_firstFree += available;
716   }
717 
718   /**
719    * Append the contents of another FastStringBuffer onto
720    * this FastStringBuffer, growing the storage if necessary.
721    * <p>
722    * NOTE THAT after calling append(), previously obtained
723    * references to m_array[] may no longer be valid.
724    *
725    * @param value FastStringBuffer whose contents are
726    * to be appended.
727    */
append(FastStringBuffer value)728   public final void append(FastStringBuffer value)
729   {
730 
731     // Complicating factor here is that the two buffers may use
732     // different chunk sizes, and even if they're the same we're
733     // probably on a different alignment due to previously appended
734     // data. We have to work through the source in bite-sized chunks.
735     if (value == null)
736       return;
737     int strlen = value.length();
738 
739     if (0 == strlen)
740       return;
741 
742     int copyfrom = 0;
743     char[] chunk = m_array[m_lastChunk];
744     int available = m_chunkSize - m_firstFree;
745 
746     // Repeat while data remains to be copied
747     while (strlen > 0)
748     {
749 
750       // Copy what fits
751       if (available > strlen)
752         available = strlen;
753 
754       int sourcechunk = (copyfrom + value.m_chunkSize - 1)
755                         >>> value.m_chunkBits;
756       int sourcecolumn = copyfrom & value.m_chunkMask;
757       int runlength = value.m_chunkSize - sourcecolumn;
758 
759       if (runlength > available)
760         runlength = available;
761 
762       System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
763                        m_array[m_lastChunk], m_firstFree, runlength);
764 
765       if (runlength != available)
766         System.arraycopy(value.m_array[sourcechunk + 1], 0,
767                          m_array[m_lastChunk], m_firstFree + runlength,
768                          available - runlength);
769 
770       strlen -= available;
771       copyfrom += available;
772 
773       // If there's more left, allocate another chunk and continue
774       if (strlen > 0)
775       {
776 
777         // Extend array?
778         int i = m_array.length;
779 
780         if (m_lastChunk + 1 == i)
781         {
782           char[][] newarray = new char[i + 16][];
783 
784           System.arraycopy(m_array, 0, newarray, 0, i);
785 
786           m_array = newarray;
787         }
788 
789         // Advance one chunk
790         chunk = m_array[++m_lastChunk];
791 
792         if (chunk == null)
793         {
794 
795           // Hierarchical encapsulation
796           if (m_lastChunk == 1 << m_rebundleBits
797                   && m_chunkBits < m_maxChunkBits)
798           {
799 
800             // Should do all the work of both encapsulating
801             // existing data and establishing new sizes/offsets
802             m_innerFSB = new FastStringBuffer(this);
803           }
804 
805           // Add a chunk.
806           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
807         }
808 
809         available = m_chunkSize;
810         m_firstFree = 0;
811       }
812     }
813 
814     // Adjust the insert point in the last chunk, when we've reached it.
815     m_firstFree += available;
816   }
817 
818   /**
819    * @return true if the specified range of characters are all whitespace,
820    * as defined by XMLCharacterRecognizer.
821    * <p>
822    * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
823    *
824    * @param start Offset of first character in the range.
825    * @param length Number of characters to send.
826    */
isWhitespace(int start, int length)827   public boolean isWhitespace(int start, int length)
828   {
829 
830     int sourcechunk = start >>> m_chunkBits;
831     int sourcecolumn = start & m_chunkMask;
832     int available = m_chunkSize - sourcecolumn;
833     boolean chunkOK;
834 
835     while (length > 0)
836     {
837       int runlength = (length <= available) ? length : available;
838 
839       if (sourcechunk == 0 && m_innerFSB != null)
840         chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
841       else
842         chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(
843           m_array[sourcechunk], sourcecolumn, runlength);
844 
845       if (!chunkOK)
846         return false;
847 
848       length -= runlength;
849 
850       ++sourcechunk;
851 
852       sourcecolumn = 0;
853       available = m_chunkSize;
854     }
855 
856     return true;
857   }
858 
859   /**
860    * @param start Offset of first character in the range.
861    * @param length Number of characters to send.
862    * @return a new String object initialized from the specified range of
863    * characters.
864    */
getString(int start, int length)865   public String getString(int start, int length)
866   {
867     int startColumn = start & m_chunkMask;
868     int startChunk = start >>> m_chunkBits;
869     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
870       return getOneChunkString(startChunk, startColumn, length);
871     }
872     return getString(new StringBuffer(length), startChunk, startColumn,
873                      length).toString();
874   }
875 
getOneChunkString(int startChunk, int startColumn, int length)876   protected String getOneChunkString(int startChunk, int startColumn,
877                                      int length) {
878     return new String(m_array[startChunk], startColumn, length);
879   }
880 
881   /**
882    * @param sb StringBuffer to be appended to
883    * @param start Offset of first character in the range.
884    * @param length Number of characters to send.
885    * @return sb with the requested text appended to it
886    */
getString(StringBuffer sb, int start, int length)887   StringBuffer getString(StringBuffer sb, int start, int length)
888   {
889     return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
890   }
891 
892   /**
893    * Internal support for toString() and getString().
894    * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
895    * and returns a StringBuffer supplied by the caller. This simplifies
896    * m_innerFSB support.
897    * <p>
898    * Note that this operation has been somewhat deoptimized by the shift to a
899    * chunked array, as there is no factory method to produce a String object
900    * directly from an array of arrays and hence a double copy is needed.
901    * By presetting length we hope to minimize the heap overhead of building
902    * the intermediate StringBuffer.
903    * <p>
904    * (It really is a pity that Java didn't design String as a final subclass
905    * of MutableString, rather than having StringBuffer be a separate hierarchy.
906    * We'd avoid a <strong>lot</strong> of double-buffering.)
907    *
908    *
909    * @param sb
910    * @param startChunk
911    * @param startColumn
912    * @param length
913    *
914    * @return the contents of the FastStringBuffer as a standard Java string.
915    */
getString(StringBuffer sb, int startChunk, int startColumn, int length)916   StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
917                          int length)
918   {
919 
920     int stop = (startChunk << m_chunkBits) + startColumn + length;
921     int stopChunk = stop >>> m_chunkBits;
922     int stopColumn = stop & m_chunkMask;
923 
924     // Factored out
925     //StringBuffer sb=new StringBuffer(length);
926     for (int i = startChunk; i < stopChunk; ++i)
927     {
928       if (i == 0 && m_innerFSB != null)
929         m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
930       else
931         sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
932 
933       startColumn = 0;  // after first chunk
934     }
935 
936     if (stopChunk == 0 && m_innerFSB != null)
937       m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
938     else if (stopColumn > startColumn)
939       sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
940 
941     return sb;
942   }
943 
944   /**
945    * Get a single character from the string buffer.
946    *
947    *
948    * @param pos character position requested.
949    * @return A character from the requested position.
950    */
charAt(int pos)951   public char charAt(int pos)
952   {
953     int startChunk = pos >>> m_chunkBits;
954 
955     if (startChunk == 0 && m_innerFSB != null)
956       return m_innerFSB.charAt(pos & m_chunkMask);
957     else
958       return m_array[startChunk][pos & m_chunkMask];
959   }
960 
961   /**
962    * Sends the specified range of characters as one or more SAX characters()
963    * events.
964    * Note that the buffer reference passed to the ContentHandler may be
965    * invalidated if the FastStringBuffer is edited; it's the user's
966    * responsibility to manage access to the FastStringBuffer to prevent this
967    * problem from arising.
968    * <p>
969    * Note too that there is no promise that the output will be sent as a
970    * single call. As is always true in SAX, one logical string may be split
971    * across multiple blocks of memory and hence delivered as several
972    * successive events.
973    *
974    * @param ch SAX ContentHandler object to receive the event.
975    * @param start Offset of first character in the range.
976    * @param length Number of characters to send.
977    * @exception org.xml.sax.SAXException may be thrown by handler's
978    * characters() method.
979    */
sendSAXcharacters( org.xml.sax.ContentHandler ch, int start, int length)980   public void sendSAXcharacters(
981           org.xml.sax.ContentHandler ch, int start, int length)
982             throws org.xml.sax.SAXException
983   {
984 
985     int startChunk = start >>> m_chunkBits;
986     int startColumn = start & m_chunkMask;
987     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
988         ch.characters(m_array[startChunk], startColumn, length);
989         return;
990     }
991 
992     int stop = start + length;
993     int stopChunk = stop >>> m_chunkBits;
994     int stopColumn = stop & m_chunkMask;
995 
996     for (int i = startChunk; i < stopChunk; ++i)
997     {
998       if (i == 0 && m_innerFSB != null)
999         m_innerFSB.sendSAXcharacters(ch, startColumn,
1000                                      m_chunkSize - startColumn);
1001       else
1002         ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1003 
1004       startColumn = 0;  // after first chunk
1005     }
1006 
1007     // Last, or only, chunk
1008     if (stopChunk == 0 && m_innerFSB != null)
1009       m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1010     else if (stopColumn > startColumn)
1011     {
1012       ch.characters(m_array[stopChunk], startColumn,
1013                     stopColumn - startColumn);
1014     }
1015   }
1016 
1017   /**
1018    * Sends the specified range of characters as one or more SAX characters()
1019    * events, normalizing the characters according to XSLT rules.
1020    *
1021    * @param ch SAX ContentHandler object to receive the event.
1022    * @param start Offset of first character in the range.
1023    * @param length Number of characters to send.
1024    * @return normalization status to apply to next chunk (because we may
1025    * have been called recursively to process an inner FSB):
1026    * <dl>
1027    * <dt>0</dt>
1028    * <dd>if this output did not end in retained whitespace, and thus whitespace
1029    * at the start of the following chunk (if any) should be converted to a
1030    * single space.
1031    * <dt>SUPPRESS_LEADING_WS</dt>
1032    * <dd>if this output ended in retained whitespace, and thus whitespace
1033    * at the start of the following chunk (if any) should be completely
1034    * suppressed.</dd>
1035    * </dd>
1036    * </dl>
1037    * @exception org.xml.sax.SAXException may be thrown by handler's
1038    * characters() method.
1039    */
sendNormalizedSAXcharacters( org.xml.sax.ContentHandler ch, int start, int length)1040   public int sendNormalizedSAXcharacters(
1041           org.xml.sax.ContentHandler ch, int start, int length)
1042             throws org.xml.sax.SAXException
1043   {
1044         // This call always starts at the beginning of the
1045     // string being written out, either because it was called directly or
1046     // because it was an m_innerFSB recursion. This is important since
1047         // it gives us a well-known initial state for this flag:
1048         int stateForNextChunk=SUPPRESS_LEADING_WS;
1049 
1050     int stop = start + length;
1051     int startChunk = start >>> m_chunkBits;
1052     int startColumn = start & m_chunkMask;
1053     int stopChunk = stop >>> m_chunkBits;
1054     int stopColumn = stop & m_chunkMask;
1055 
1056     for (int i = startChunk; i < stopChunk; ++i)
1057     {
1058       if (i == 0 && m_innerFSB != null)
1059                                 stateForNextChunk=
1060         m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1061                                      m_chunkSize - startColumn);
1062       else
1063                                 stateForNextChunk=
1064         sendNormalizedSAXcharacters(m_array[i], startColumn,
1065                                     m_chunkSize - startColumn,
1066                                                                                                                                                 ch,stateForNextChunk);
1067 
1068       startColumn = 0;  // after first chunk
1069     }
1070 
1071     // Last, or only, chunk
1072     if (stopChunk == 0 && m_innerFSB != null)
1073                         stateForNextChunk= // %REVIEW% Is this update really needed?
1074       m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1075     else if (stopColumn > startColumn)
1076     {
1077                         stateForNextChunk= // %REVIEW% Is this update really needed?
1078       sendNormalizedSAXcharacters(m_array[stopChunk],
1079                                                                                                                                         startColumn, stopColumn - startColumn,
1080                                                                                                                                         ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1081     }
1082                 return stateForNextChunk;
1083   }
1084 
1085   static final char[] SINGLE_SPACE = {' '};
1086 
1087   /**
1088    * Internal method to directly normalize and dispatch the character array.
1089    * This version is aware of the fact that it may be called several times
1090    * in succession if the data is made up of multiple "chunks", and thus
1091    * must actively manage the handling of leading and trailing whitespace.
1092    *
1093    * Note: The recursion is due to the possible recursion of inner FSBs.
1094    *
1095    * @param ch The characters from the XML document.
1096    * @param start The start position in the array.
1097    * @param length The number of characters to read from the array.
1098    * @param handler SAX ContentHandler object to receive the event.
1099    * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1100    * This is a bitfield contining two flags, bitwise-ORed together:
1101    * <dl>
1102    * <dt>SUPPRESS_LEADING_WS</dt>
1103    * <dd>When false, causes leading whitespace to be converted to a single
1104    * space; when true, causes it to be discarded entirely.
1105    * Should be set TRUE for the first chunk, and (in multi-chunk output)
1106    * whenever the previous chunk ended in retained whitespace.</dd>
1107    * <dt>SUPPRESS_TRAILING_WS</dt>
1108    * <dd>When false, causes trailing whitespace to be converted to a single
1109    * space; when true, causes it to be discarded entirely.
1110    * Should be set TRUE for the last or only chunk.
1111    * </dd>
1112    * </dl>
1113    * @return normalization status, as in the edgeTreatmentFlags parameter:
1114    * <dl>
1115    * <dt>0</dt>
1116    * <dd>if this output did not end in retained whitespace, and thus whitespace
1117    * at the start of the following chunk (if any) should be converted to a
1118    * single space.
1119    * <dt>SUPPRESS_LEADING_WS</dt>
1120    * <dd>if this output ended in retained whitespace, and thus whitespace
1121    * at the start of the following chunk (if any) should be completely
1122    * suppressed.</dd>
1123    * </dd>
1124    * </dl>
1125    *
1126    *
1127    * @exception org.xml.sax.SAXException Any SAX exception, possibly
1128    *            wrapping another exception.
1129    */
sendNormalizedSAXcharacters(char ch[], int start, int length, org.xml.sax.ContentHandler handler, int edgeTreatmentFlags)1130   static int sendNormalizedSAXcharacters(char ch[],
1131              int start, int length,
1132              org.xml.sax.ContentHandler handler,
1133                                                  int edgeTreatmentFlags)
1134           throws org.xml.sax.SAXException
1135   {
1136      boolean processingLeadingWhitespace =
1137                        ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1138      boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1139      boolean suppressTrailingWhitespace =
1140                        ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
1141      int currPos = start;
1142      int limit = start+length;
1143 
1144      // Strip any leading spaces first, if required
1145      if (processingLeadingWhitespace) {
1146          for (; currPos < limit
1147                 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1148               currPos++) { }
1149 
1150          // If we've only encountered leading spaces, the
1151          // current state remains unchanged
1152          if (currPos == limit) {
1153              return edgeTreatmentFlags;
1154          }
1155      }
1156 
1157      // If we get here, there are no more leading spaces to strip
1158      while (currPos < limit) {
1159          int startNonWhitespace = currPos;
1160 
1161          // Grab a chunk of non-whitespace characters
1162          for (; currPos < limit
1163                 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1164               currPos++) { }
1165 
1166          // Non-whitespace seen - emit them, along with a single
1167          // space for any preceding whitespace characters
1168          if (startNonWhitespace != currPos) {
1169              if (seenWhitespace) {
1170                  handler.characters(SINGLE_SPACE, 0, 1);
1171                  seenWhitespace = false;
1172              }
1173              handler.characters(ch, startNonWhitespace,
1174                                 currPos - startNonWhitespace);
1175          }
1176 
1177          int startWhitespace = currPos;
1178 
1179          // Consume any whitespace characters
1180          for (; currPos < limit
1181                 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1182               currPos++) { }
1183 
1184          if (startWhitespace != currPos) {
1185              seenWhitespace = true;
1186          }
1187      }
1188 
1189      return (seenWhitespace ? CARRY_WS : 0)
1190             | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1191   }
1192 
1193   /**
1194    * Directly normalize and dispatch the character array.
1195    *
1196    * @param ch The characters from the XML document.
1197    * @param start The start position in the array.
1198    * @param length The number of characters to read from the array.
1199    * @param handler SAX ContentHandler object to receive the event.
1200    * @exception org.xml.sax.SAXException Any SAX exception, possibly
1201    *            wrapping another exception.
1202    */
sendNormalizedSAXcharacters(char ch[], int start, int length, org.xml.sax.ContentHandler handler)1203   public static void sendNormalizedSAXcharacters(char ch[],
1204              int start, int length,
1205              org.xml.sax.ContentHandler handler)
1206           throws org.xml.sax.SAXException
1207   {
1208                 sendNormalizedSAXcharacters(ch, start, length,
1209              handler, SUPPRESS_BOTH);
1210         }
1211 
1212         /**
1213    * Sends the specified range of characters as sax Comment.
1214    * <p>
1215    * Note that, unlike sendSAXcharacters, this has to be done as a single
1216    * call to LexicalHandler#comment.
1217    *
1218    * @param ch SAX LexicalHandler object to receive the event.
1219    * @param start Offset of first character in the range.
1220    * @param length Number of characters to send.
1221    * @exception org.xml.sax.SAXException may be thrown by handler's
1222    * characters() method.
1223    */
sendSAXComment( org.xml.sax.ext.LexicalHandler ch, int start, int length)1224   public void sendSAXComment(
1225           org.xml.sax.ext.LexicalHandler ch, int start, int length)
1226             throws org.xml.sax.SAXException
1227   {
1228 
1229     // %OPT% Do it this way for now...
1230     String comment = getString(start, length);
1231     ch.comment(comment.toCharArray(), 0, length);
1232   }
1233 
1234   /**
1235    * Copies characters from this string into the destination character
1236    * array.
1237    *
1238    * @param      srcBegin   index of the first character in the string
1239    *                        to copy.
1240    * @param      srcEnd     index after the last character in the string
1241    *                        to copy.
1242    * @param      dst        the destination array.
1243    * @param      dstBegin   the start offset in the destination array.
1244    * @exception IndexOutOfBoundsException If any of the following
1245    *            is true:
1246    *            <ul><li><code>srcBegin</code> is negative.
1247    *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1248    *            <li><code>srcEnd</code> is greater than the length of this
1249    *                string
1250    *            <li><code>dstBegin</code> is negative
1251    *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1252    *                <code>dst.length</code></ul>
1253    * @exception NullPointerException if <code>dst</code> is <code>null</code>
1254    */
getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)1255   private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1256   {
1257     // %TBD% Joe needs to write this function.  Make public when implemented.
1258   }
1259 
1260   /**
1261    * Encapsulation c'tor. After this is called, the source FastStringBuffer
1262    * will be reset to use the new object as its m_innerFSB, and will have
1263    * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1264    * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1265    *
1266    * NEEDSDOC @param source
1267    */
FastStringBuffer(FastStringBuffer source)1268   private FastStringBuffer(FastStringBuffer source)
1269   {
1270 
1271     // Copy existing information into new encapsulation
1272     m_chunkBits = source.m_chunkBits;
1273     m_maxChunkBits = source.m_maxChunkBits;
1274     m_rebundleBits = source.m_rebundleBits;
1275     m_chunkSize = source.m_chunkSize;
1276     m_chunkMask = source.m_chunkMask;
1277     m_array = source.m_array;
1278     m_innerFSB = source.m_innerFSB;
1279 
1280     // These have to be adjusted because we're calling just at the time
1281     // when we would be about to allocate another chunk
1282     m_lastChunk = source.m_lastChunk - 1;
1283     m_firstFree = source.m_chunkSize;
1284 
1285     // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1286     source.m_array = new char[16][];
1287     source.m_innerFSB = this;
1288 
1289     // Since we encapsulated just as we were about to append another
1290     // chunk, return ready to create the chunk after the innerFSB
1291     // -- 1, not 0.
1292     source.m_lastChunk = 1;
1293     source.m_firstFree = 0;
1294     source.m_chunkBits += m_rebundleBits;
1295     source.m_chunkSize = 1 << (source.m_chunkBits);
1296     source.m_chunkMask = source.m_chunkSize - 1;
1297   }
1298 }
1299