1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2004 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 /* 21 * $Id: FastStringBuffer.java,v 1.2.4.1 2005/09/15 08:15:44 suresh_emailid Exp $ 22 */ 23 package com.sun.org.apache.xml.internal.utils; 24 25 /** 26 * Bare-bones, unsafe, fast string buffer. No thread-safety, no 27 * parameter range checking, exposed fields. Note that in typical 28 * applications, thread-safety of a StringBuffer is a somewhat 29 * dubious concept in any case. 30 * <p> 31 * Note that Stree and DTM used a single FastStringBuffer as a string pool, 32 * by recording start and length indices within this single buffer. This 33 * minimizes heap overhead, but of course requires more work when retrieving 34 * the data. 35 * <p> 36 * FastStringBuffer operates as a "chunked buffer". Doing so 37 * reduces the need to recopy existing information when an append 38 * exceeds the space available; we just allocate another chunk and 39 * flow across to it. (The array of chunks may need to grow, 40 * admittedly, but that's a much smaller object.) Some excess 41 * recopying may arise when we extract Strings which cross chunk 42 * boundaries; larger chunks make that less frequent. 43 * <p> 44 * The size values are parameterized, to allow tuning this code. In 45 * theory, Result Tree Fragments might want to be tuned differently 46 * from the main document's text. 47 * <p> 48 * %REVIEW% An experiment in self-tuning is 49 * included in the code (using nested FastStringBuffers to achieve 50 * variation in chunk sizes), but this implementation has proven to 51 * be problematic when data may be being copied from the FSB into itself. 52 * We should either re-architect that to make this safe (if possible) 53 * or remove that code and clean up for performance/maintainability reasons. 54 * <p> 55 */ 56 public class FastStringBuffer 57 { 58 // If nonzero, forces the inial chunk size. 59 /**/static final int DEBUG_FORCE_INIT_BITS=0; 60 61 // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied 62 // back into the same FSB (variable set from previous variable, for example) 63 // and blocksize changes in mid-copy... there's risk of severe malfunction in 64 // the read process, due to how the resizing code re-jiggers storage. Arggh. 65 // If we want to retain the variable-size-block feature, we need to reconsider 66 // that issue. For now, I have forced us into fixed-size mode. 67 static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true; 68 69 /** Manifest constant: Suppress leading whitespace. 70 * This should be used when normalize-to-SAX is called for the first chunk of a 71 * multi-chunk output, or one following unsuppressed whitespace in a previous 72 * chunk. 73 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int) 74 */ 75 public static final int SUPPRESS_LEADING_WS=0x01; 76 77 /** Manifest constant: Suppress trailing whitespace. 78 * This should be used when normalize-to-SAX is called for the last chunk of a 79 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS. 80 */ 81 public static final int SUPPRESS_TRAILING_WS=0x02; 82 83 /** Manifest constant: Suppress both leading and trailing whitespace. 84 * This should be used when normalize-to-SAX is called for a complete string. 85 * (I'm not wild about the name of this one. Ideas welcome.) 86 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int) 87 */ 88 public static final int SUPPRESS_BOTH 89 = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS; 90 91 /** Manifest constant: Carry trailing whitespace of one chunk as leading 92 * whitespace of the next chunk. Used internally; I don't see any reason 93 * to make it public right now. 94 */ 95 private static final int CARRY_WS=0x04; 96 97 /** 98 * Field m_chunkBits sets our chunking strategy, by saying how many 99 * bits of index can be used within a single chunk before flowing over 100 * to the next chunk. For example, if m_chunkbits is set to 15, each 101 * chunk can contain up to 2^15 (32K) characters 102 */ 103 int m_chunkBits = 15; 104 105 /** 106 * Field m_maxChunkBits affects our chunk-growth strategy, by saying what 107 * the largest permissible chunk size is in this particular FastStringBuffer 108 * hierarchy. 109 */ 110 int m_maxChunkBits = 15; 111 112 /** 113 * Field m_rechunkBits affects our chunk-growth strategy, by saying how 114 * many chunks should be allocated at one size before we encapsulate them 115 * into the first chunk of the next size up. For example, if m_rechunkBits 116 * is set to 3, then after 8 chunks at a given size we will rebundle 117 * them as the first element of a FastStringBuffer using a chunk size 118 * 8 times larger (chunkBits shifted left three bits). 119 */ 120 int m_rebundleBits = 2; 121 122 /** 123 * Field m_chunkSize establishes the maximum size of one chunk of the array 124 * as 2**chunkbits characters. 125 * (Which may also be the minimum size if we aren't tuning for storage) 126 */ 127 int m_chunkSize; // =1<<(m_chunkBits-1); 128 129 /** 130 * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits 131 * worth of low-order '1' bits, useful for shift-and-mask addressing 132 * within the chunks. 133 */ 134 int m_chunkMask; // =m_chunkSize-1; 135 136 /** 137 * Field m_array holds the string buffer's text contents, using an 138 * array-of-arrays. Note that this array, and the arrays it contains, may be 139 * reallocated when necessary in order to allow the buffer to grow; 140 * references to them should be considered to be invalidated after any 141 * append. However, the only time these arrays are directly exposed 142 * is in the sendSAXcharacters call. 143 */ 144 char[][] m_array; 145 146 /** 147 * Field m_lastChunk is an index into m_array[], pointing to the last 148 * chunk of the Chunked Array currently in use. Note that additional 149 * chunks may actually be allocated, eg if the FastStringBuffer had 150 * previously been truncated or if someone issued an ensureSpace request. 151 * <p> 152 * The insertion point for append operations is addressed by the combination 153 * of m_lastChunk and m_firstFree. 154 */ 155 int m_lastChunk = 0; 156 157 /** 158 * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to 159 * the first character in the Chunked Array which is not part of the 160 * FastStringBuffer's current content. Since m_array[][] is zero-based, 161 * the length of that content can be calculated as 162 * (m_lastChunk<<m_chunkBits) + m_firstFree 163 */ 164 int m_firstFree = 0; 165 166 /** 167 * Field m_innerFSB, when non-null, is a FastStringBuffer whose total 168 * length equals m_chunkSize, and which replaces m_array[0]. This allows 169 * building a hierarchy of FastStringBuffers, where early appends use 170 * a smaller chunkSize (for less wasted memory overhead) but later 171 * ones use a larger chunkSize (for less heap activity overhead). 172 */ 173 FastStringBuffer m_innerFSB = null; 174 175 /** 176 * Construct a FastStringBuffer, with allocation policy as per parameters. 177 * <p> 178 * For coding convenience, I've expressed both allocation sizes in terms of 179 * a number of bits. That's needed for the final size of a chunk, 180 * to permit fast and efficient shift-and-mask addressing. It's less critical 181 * for the inital size, and may be reconsidered. 182 * <p> 183 * An alternative would be to accept integer sizes and round to powers of two; 184 * that really doesn't seem to buy us much, if anything. 185 * 186 * @param initChunkBits Length in characters of the initial allocation 187 * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024 188 * characters.) Later chunks will use larger allocation units, to trade off 189 * allocation speed of large document against storage efficiency of small 190 * ones. 191 * @param maxChunkBits Number of character-offset bits that should be used for 192 * addressing within a chunk. Maximum length of a chunk is 2^chunkBits 193 * characters. 194 * @param rebundleBits Number of character-offset bits that addressing should 195 * advance before we attempt to take a step from initChunkBits to maxChunkBits 196 */ FastStringBuffer(int initChunkBits, int maxChunkBits, int rebundleBits)197 public FastStringBuffer(int initChunkBits, int maxChunkBits, 198 int rebundleBits) 199 { 200 if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS; 201 202 // %REVIEW% 203 // Should this force to larger value, or smaller? Smaller less efficient, but if 204 // someone requested variable mode it's because they care about storage space. 205 // On the other hand, given the other changes I'm making, odds are that we should 206 // adopt the larger size. Dither, dither, dither... This is just stopgap workaround 207 // anyway; we need a permanant solution. 208 // 209 if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits; 210 //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits; 211 212 m_array = new char[16][]; 213 214 // Don't bite off more than we're prepared to swallow! 215 if (initChunkBits > maxChunkBits) 216 initChunkBits = maxChunkBits; 217 218 m_chunkBits = initChunkBits; 219 m_maxChunkBits = maxChunkBits; 220 m_rebundleBits = rebundleBits; 221 m_chunkSize = 1 << (initChunkBits); 222 m_chunkMask = m_chunkSize - 1; 223 m_array[0] = new char[m_chunkSize]; 224 } 225 226 /** 227 * Construct a FastStringBuffer, using a default rebundleBits value. 228 * 229 * NEEDSDOC @param initChunkBits 230 * NEEDSDOC @param maxChunkBits 231 */ FastStringBuffer(int initChunkBits, int maxChunkBits)232 public FastStringBuffer(int initChunkBits, int maxChunkBits) 233 { 234 this(initChunkBits, maxChunkBits, 2); 235 } 236 237 /** 238 * Construct a FastStringBuffer, using default maxChunkBits and 239 * rebundleBits values. 240 * <p> 241 * ISSUE: Should this call assert initial size, or fixed size? 242 * Now configured as initial, with a default for fixed. 243 * 244 * NEEDSDOC @param initChunkBits 245 */ FastStringBuffer(int initChunkBits)246 public FastStringBuffer(int initChunkBits) 247 { 248 this(initChunkBits, 15, 2); 249 } 250 251 /** 252 * Construct a FastStringBuffer, using a default allocation policy. 253 */ FastStringBuffer()254 public FastStringBuffer() 255 { 256 257 // 10 bits is 1K. 15 bits is 32K. Remember that these are character 258 // counts, so actual memory allocation unit is doubled for UTF-16 chars. 259 // 260 // For reference: In the original FastStringBuffer, we simply 261 // overallocated by blocksize (default 1KB) on each buffer-growth. 262 this(10, 15, 2); 263 } 264 265 /** 266 * Get the length of the list. Synonym for length(). 267 * 268 * @return the number of characters in the FastStringBuffer's content. 269 */ size()270 public final int size() 271 { 272 return (m_lastChunk << m_chunkBits) + m_firstFree; 273 } 274 275 /** 276 * Get the length of the list. Synonym for size(). 277 * 278 * @return the number of characters in the FastStringBuffer's content. 279 */ length()280 public final int length() 281 { 282 return (m_lastChunk << m_chunkBits) + m_firstFree; 283 } 284 285 /** 286 * Discard the content of the FastStringBuffer, and most of the memory 287 * that was allocated by it, restoring the initial state. Note that this 288 * may eventually be different from setLength(0), which see. 289 */ reset()290 public final void reset() 291 { 292 293 m_lastChunk = 0; 294 m_firstFree = 0; 295 296 // Recover the original chunk size 297 FastStringBuffer innermost = this; 298 299 while (innermost.m_innerFSB != null) 300 { 301 innermost = innermost.m_innerFSB; 302 } 303 304 m_chunkBits = innermost.m_chunkBits; 305 m_chunkSize = innermost.m_chunkSize; 306 m_chunkMask = innermost.m_chunkMask; 307 308 // Discard the hierarchy 309 m_innerFSB = null; 310 m_array = new char[16][0]; 311 m_array[0] = new char[m_chunkSize]; 312 } 313 314 /** 315 * Directly set how much of the FastStringBuffer's storage is to be 316 * considered part of its content. This is a fast but hazardous 317 * operation. It is not protected against negative values, or values 318 * greater than the amount of storage currently available... and even 319 * if additional storage does exist, its contents are unpredictable. 320 * The only safe use for our setLength() is to truncate the FastStringBuffer 321 * to a shorter string. 322 * 323 * @param l New length. If l<0 or l>=getLength(), this operation will 324 * not report an error but future operations will almost certainly fail. 325 */ setLength(int l)326 public final void setLength(int l) 327 { 328 m_lastChunk = l >>> m_chunkBits; 329 330 if (m_lastChunk == 0 && m_innerFSB != null) 331 { 332 // Replace this FSB with the appropriate inner FSB, truncated 333 m_innerFSB.setLength(l, this); 334 } 335 else 336 { 337 m_firstFree = l & m_chunkMask; 338 339 // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving 340 // us pointing at the start of a chunk which has not yet been allocated. Rather than 341 // pay the cost of dealing with that in the append loops (more scattered and more 342 // inner-loop), we correct it here by moving to the safe side of that 343 // line -- as we would have left the indexes had we appended up to that point. 344 if(m_firstFree==0 && m_lastChunk>0) 345 { 346 --m_lastChunk; 347 m_firstFree=m_chunkSize; 348 } 349 } 350 } 351 352 /** 353 * Subroutine for the public setLength() method. Deals with the fact 354 * that truncation may require restoring one of the innerFSBs 355 * 356 * NEEDSDOC @param l 357 * NEEDSDOC @param rootFSB 358 */ setLength(int l, FastStringBuffer rootFSB)359 private final void setLength(int l, FastStringBuffer rootFSB) 360 { 361 362 m_lastChunk = l >>> m_chunkBits; 363 364 if (m_lastChunk == 0 && m_innerFSB != null) 365 { 366 m_innerFSB.setLength(l, rootFSB); 367 } 368 else 369 { 370 371 // Undo encapsulation -- pop the innerFSB data back up to root. 372 // Inefficient, but attempts to keep the code simple. 373 rootFSB.m_chunkBits = m_chunkBits; 374 rootFSB.m_maxChunkBits = m_maxChunkBits; 375 rootFSB.m_rebundleBits = m_rebundleBits; 376 rootFSB.m_chunkSize = m_chunkSize; 377 rootFSB.m_chunkMask = m_chunkMask; 378 rootFSB.m_array = m_array; 379 rootFSB.m_innerFSB = m_innerFSB; 380 rootFSB.m_lastChunk = m_lastChunk; 381 382 // Finally, truncate this sucker. 383 rootFSB.m_firstFree = l & m_chunkMask; 384 } 385 } 386 387 /** 388 * Note that this operation has been somewhat deoptimized by the shift to a 389 * chunked array, as there is no factory method to produce a String object 390 * directly from an array of arrays and hence a double copy is needed. 391 * By using ensureCapacity we hope to minimize the heap overhead of building 392 * the intermediate StringBuffer. 393 * <p> 394 * (It really is a pity that Java didn't design String as a final subclass 395 * of MutableString, rather than having StringBuffer be a separate hierarchy. 396 * We'd avoid a <strong>lot</strong> of double-buffering.) 397 * 398 * @return the contents of the FastStringBuffer as a standard Java string. 399 */ toString()400 public final String toString() 401 { 402 403 int length = (m_lastChunk << m_chunkBits) + m_firstFree; 404 405 return getString(new StringBuffer(length), 0, 0, length).toString(); 406 } 407 408 /** 409 * Append a single character onto the FastStringBuffer, growing the 410 * storage if necessary. 411 * <p> 412 * NOTE THAT after calling append(), previously obtained 413 * references to m_array[][] may no longer be valid.... 414 * though in fact they should be in this instance. 415 * 416 * @param value character to be appended. 417 */ append(char value)418 public final void append(char value) 419 { 420 421 char[] chunk; 422 423 // We may have preallocated chunks. If so, all but last should 424 // be at full size. 425 boolean lastchunk = (m_lastChunk + 1 == m_array.length); 426 427 if (m_firstFree < m_chunkSize) // Simplified test single-character-fits 428 chunk = m_array[m_lastChunk]; 429 else 430 { 431 432 // Extend array? 433 int i = m_array.length; 434 435 if (m_lastChunk + 1 == i) 436 { 437 char[][] newarray = new char[i + 16][]; 438 439 System.arraycopy(m_array, 0, newarray, 0, i); 440 441 m_array = newarray; 442 } 443 444 // Advance one chunk 445 chunk = m_array[++m_lastChunk]; 446 447 if (chunk == null) 448 { 449 450 // Hierarchical encapsulation 451 if (m_lastChunk == 1 << m_rebundleBits 452 && m_chunkBits < m_maxChunkBits) 453 { 454 455 // Should do all the work of both encapsulating 456 // existing data and establishing new sizes/offsets 457 m_innerFSB = new FastStringBuffer(this); 458 } 459 460 // Add a chunk. 461 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 462 } 463 464 m_firstFree = 0; 465 } 466 467 // Space exists in the chunk. Append the character. 468 chunk[m_firstFree++] = value; 469 } 470 471 /** 472 * Append the contents of a String onto the FastStringBuffer, 473 * growing the storage if necessary. 474 * <p> 475 * NOTE THAT after calling append(), previously obtained 476 * references to m_array[] may no longer be valid. 477 * 478 * @param value String whose contents are to be appended. 479 */ append(String value)480 public final void append(String value) 481 { 482 483 if (value == null) 484 return; 485 int strlen = value.length(); 486 487 if (0 == strlen) 488 return; 489 490 int copyfrom = 0; 491 char[] chunk = m_array[m_lastChunk]; 492 int available = m_chunkSize - m_firstFree; 493 494 // Repeat while data remains to be copied 495 while (strlen > 0) 496 { 497 498 // Copy what fits 499 if (available > strlen) 500 available = strlen; 501 502 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk], 503 m_firstFree); 504 505 strlen -= available; 506 copyfrom += available; 507 508 // If there's more left, allocate another chunk and continue 509 if (strlen > 0) 510 { 511 512 // Extend array? 513 int i = m_array.length; 514 515 if (m_lastChunk + 1 == i) 516 { 517 char[][] newarray = new char[i + 16][]; 518 519 System.arraycopy(m_array, 0, newarray, 0, i); 520 521 m_array = newarray; 522 } 523 524 // Advance one chunk 525 chunk = m_array[++m_lastChunk]; 526 527 if (chunk == null) 528 { 529 530 // Hierarchical encapsulation 531 if (m_lastChunk == 1 << m_rebundleBits 532 && m_chunkBits < m_maxChunkBits) 533 { 534 535 // Should do all the work of both encapsulating 536 // existing data and establishing new sizes/offsets 537 m_innerFSB = new FastStringBuffer(this); 538 } 539 540 // Add a chunk. 541 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 542 } 543 544 available = m_chunkSize; 545 m_firstFree = 0; 546 } 547 } 548 549 // Adjust the insert point in the last chunk, when we've reached it. 550 m_firstFree += available; 551 } 552 553 /** 554 * Append the contents of a StringBuffer onto the FastStringBuffer, 555 * growing the storage if necessary. 556 * <p> 557 * NOTE THAT after calling append(), previously obtained 558 * references to m_array[] may no longer be valid. 559 * 560 * @param value StringBuffer whose contents are to be appended. 561 */ append(StringBuffer value)562 public final void append(StringBuffer value) 563 { 564 565 if (value == null) 566 return; 567 int strlen = value.length(); 568 569 if (0 == strlen) 570 return; 571 572 int copyfrom = 0; 573 char[] chunk = m_array[m_lastChunk]; 574 int available = m_chunkSize - m_firstFree; 575 576 // Repeat while data remains to be copied 577 while (strlen > 0) 578 { 579 580 // Copy what fits 581 if (available > strlen) 582 available = strlen; 583 584 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk], 585 m_firstFree); 586 587 strlen -= available; 588 copyfrom += available; 589 590 // If there's more left, allocate another chunk and continue 591 if (strlen > 0) 592 { 593 594 // Extend array? 595 int i = m_array.length; 596 597 if (m_lastChunk + 1 == i) 598 { 599 char[][] newarray = new char[i + 16][]; 600 601 System.arraycopy(m_array, 0, newarray, 0, i); 602 603 m_array = newarray; 604 } 605 606 // Advance one chunk 607 chunk = m_array[++m_lastChunk]; 608 609 if (chunk == null) 610 { 611 612 // Hierarchical encapsulation 613 if (m_lastChunk == 1 << m_rebundleBits 614 && m_chunkBits < m_maxChunkBits) 615 { 616 617 // Should do all the work of both encapsulating 618 // existing data and establishing new sizes/offsets 619 m_innerFSB = new FastStringBuffer(this); 620 } 621 622 // Add a chunk. 623 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 624 } 625 626 available = m_chunkSize; 627 m_firstFree = 0; 628 } 629 } 630 631 // Adjust the insert point in the last chunk, when we've reached it. 632 m_firstFree += available; 633 } 634 635 /** 636 * Append part of the contents of a Character Array onto the 637 * FastStringBuffer, growing the storage if necessary. 638 * <p> 639 * NOTE THAT after calling append(), previously obtained 640 * references to m_array[] may no longer be valid. 641 * 642 * @param chars character array from which data is to be copied 643 * @param start offset in chars of first character to be copied, 644 * zero-based. 645 * @param length number of characters to be copied 646 */ append(char[] chars, int start, int length)647 public final void append(char[] chars, int start, int length) 648 { 649 650 int strlen = length; 651 652 if (0 == strlen) 653 return; 654 655 int copyfrom = start; 656 char[] chunk = m_array[m_lastChunk]; 657 int available = m_chunkSize - m_firstFree; 658 659 // Repeat while data remains to be copied 660 while (strlen > 0) 661 { 662 663 // Copy what fits 664 if (available > strlen) 665 available = strlen; 666 667 System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree, 668 available); 669 670 strlen -= available; 671 copyfrom += available; 672 673 // If there's more left, allocate another chunk and continue 674 if (strlen > 0) 675 { 676 677 // Extend array? 678 int i = m_array.length; 679 680 if (m_lastChunk + 1 == i) 681 { 682 char[][] newarray = new char[i + 16][]; 683 684 System.arraycopy(m_array, 0, newarray, 0, i); 685 686 m_array = newarray; 687 } 688 689 // Advance one chunk 690 chunk = m_array[++m_lastChunk]; 691 692 if (chunk == null) 693 { 694 695 // Hierarchical encapsulation 696 if (m_lastChunk == 1 << m_rebundleBits 697 && m_chunkBits < m_maxChunkBits) 698 { 699 700 // Should do all the work of both encapsulating 701 // existing data and establishing new sizes/offsets 702 m_innerFSB = new FastStringBuffer(this); 703 } 704 705 // Add a chunk. 706 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 707 } 708 709 available = m_chunkSize; 710 m_firstFree = 0; 711 } 712 } 713 714 // Adjust the insert point in the last chunk, when we've reached it. 715 m_firstFree += available; 716 } 717 718 /** 719 * Append the contents of another FastStringBuffer onto 720 * this FastStringBuffer, growing the storage if necessary. 721 * <p> 722 * NOTE THAT after calling append(), previously obtained 723 * references to m_array[] may no longer be valid. 724 * 725 * @param value FastStringBuffer whose contents are 726 * to be appended. 727 */ append(FastStringBuffer value)728 public final void append(FastStringBuffer value) 729 { 730 731 // Complicating factor here is that the two buffers may use 732 // different chunk sizes, and even if they're the same we're 733 // probably on a different alignment due to previously appended 734 // data. We have to work through the source in bite-sized chunks. 735 if (value == null) 736 return; 737 int strlen = value.length(); 738 739 if (0 == strlen) 740 return; 741 742 int copyfrom = 0; 743 char[] chunk = m_array[m_lastChunk]; 744 int available = m_chunkSize - m_firstFree; 745 746 // Repeat while data remains to be copied 747 while (strlen > 0) 748 { 749 750 // Copy what fits 751 if (available > strlen) 752 available = strlen; 753 754 int sourcechunk = (copyfrom + value.m_chunkSize - 1) 755 >>> value.m_chunkBits; 756 int sourcecolumn = copyfrom & value.m_chunkMask; 757 int runlength = value.m_chunkSize - sourcecolumn; 758 759 if (runlength > available) 760 runlength = available; 761 762 System.arraycopy(value.m_array[sourcechunk], sourcecolumn, 763 m_array[m_lastChunk], m_firstFree, runlength); 764 765 if (runlength != available) 766 System.arraycopy(value.m_array[sourcechunk + 1], 0, 767 m_array[m_lastChunk], m_firstFree + runlength, 768 available - runlength); 769 770 strlen -= available; 771 copyfrom += available; 772 773 // If there's more left, allocate another chunk and continue 774 if (strlen > 0) 775 { 776 777 // Extend array? 778 int i = m_array.length; 779 780 if (m_lastChunk + 1 == i) 781 { 782 char[][] newarray = new char[i + 16][]; 783 784 System.arraycopy(m_array, 0, newarray, 0, i); 785 786 m_array = newarray; 787 } 788 789 // Advance one chunk 790 chunk = m_array[++m_lastChunk]; 791 792 if (chunk == null) 793 { 794 795 // Hierarchical encapsulation 796 if (m_lastChunk == 1 << m_rebundleBits 797 && m_chunkBits < m_maxChunkBits) 798 { 799 800 // Should do all the work of both encapsulating 801 // existing data and establishing new sizes/offsets 802 m_innerFSB = new FastStringBuffer(this); 803 } 804 805 // Add a chunk. 806 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 807 } 808 809 available = m_chunkSize; 810 m_firstFree = 0; 811 } 812 } 813 814 // Adjust the insert point in the last chunk, when we've reached it. 815 m_firstFree += available; 816 } 817 818 /** 819 * @return true if the specified range of characters are all whitespace, 820 * as defined by XMLCharacterRecognizer. 821 * <p> 822 * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE. 823 * 824 * @param start Offset of first character in the range. 825 * @param length Number of characters to send. 826 */ isWhitespace(int start, int length)827 public boolean isWhitespace(int start, int length) 828 { 829 830 int sourcechunk = start >>> m_chunkBits; 831 int sourcecolumn = start & m_chunkMask; 832 int available = m_chunkSize - sourcecolumn; 833 boolean chunkOK; 834 835 while (length > 0) 836 { 837 int runlength = (length <= available) ? length : available; 838 839 if (sourcechunk == 0 && m_innerFSB != null) 840 chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength); 841 else 842 chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace( 843 m_array[sourcechunk], sourcecolumn, runlength); 844 845 if (!chunkOK) 846 return false; 847 848 length -= runlength; 849 850 ++sourcechunk; 851 852 sourcecolumn = 0; 853 available = m_chunkSize; 854 } 855 856 return true; 857 } 858 859 /** 860 * @param start Offset of first character in the range. 861 * @param length Number of characters to send. 862 * @return a new String object initialized from the specified range of 863 * characters. 864 */ getString(int start, int length)865 public String getString(int start, int length) 866 { 867 int startColumn = start & m_chunkMask; 868 int startChunk = start >>> m_chunkBits; 869 if (startColumn + length < m_chunkMask && m_innerFSB == null) { 870 return getOneChunkString(startChunk, startColumn, length); 871 } 872 return getString(new StringBuffer(length), startChunk, startColumn, 873 length).toString(); 874 } 875 getOneChunkString(int startChunk, int startColumn, int length)876 protected String getOneChunkString(int startChunk, int startColumn, 877 int length) { 878 return new String(m_array[startChunk], startColumn, length); 879 } 880 881 /** 882 * @param sb StringBuffer to be appended to 883 * @param start Offset of first character in the range. 884 * @param length Number of characters to send. 885 * @return sb with the requested text appended to it 886 */ getString(StringBuffer sb, int start, int length)887 StringBuffer getString(StringBuffer sb, int start, int length) 888 { 889 return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length); 890 } 891 892 /** 893 * Internal support for toString() and getString(). 894 * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into 895 * and returns a StringBuffer supplied by the caller. This simplifies 896 * m_innerFSB support. 897 * <p> 898 * Note that this operation has been somewhat deoptimized by the shift to a 899 * chunked array, as there is no factory method to produce a String object 900 * directly from an array of arrays and hence a double copy is needed. 901 * By presetting length we hope to minimize the heap overhead of building 902 * the intermediate StringBuffer. 903 * <p> 904 * (It really is a pity that Java didn't design String as a final subclass 905 * of MutableString, rather than having StringBuffer be a separate hierarchy. 906 * We'd avoid a <strong>lot</strong> of double-buffering.) 907 * 908 * 909 * @param sb 910 * @param startChunk 911 * @param startColumn 912 * @param length 913 * 914 * @return the contents of the FastStringBuffer as a standard Java string. 915 */ getString(StringBuffer sb, int startChunk, int startColumn, int length)916 StringBuffer getString(StringBuffer sb, int startChunk, int startColumn, 917 int length) 918 { 919 920 int stop = (startChunk << m_chunkBits) + startColumn + length; 921 int stopChunk = stop >>> m_chunkBits; 922 int stopColumn = stop & m_chunkMask; 923 924 // Factored out 925 //StringBuffer sb=new StringBuffer(length); 926 for (int i = startChunk; i < stopChunk; ++i) 927 { 928 if (i == 0 && m_innerFSB != null) 929 m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn); 930 else 931 sb.append(m_array[i], startColumn, m_chunkSize - startColumn); 932 933 startColumn = 0; // after first chunk 934 } 935 936 if (stopChunk == 0 && m_innerFSB != null) 937 m_innerFSB.getString(sb, startColumn, stopColumn - startColumn); 938 else if (stopColumn > startColumn) 939 sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn); 940 941 return sb; 942 } 943 944 /** 945 * Get a single character from the string buffer. 946 * 947 * 948 * @param pos character position requested. 949 * @return A character from the requested position. 950 */ charAt(int pos)951 public char charAt(int pos) 952 { 953 int startChunk = pos >>> m_chunkBits; 954 955 if (startChunk == 0 && m_innerFSB != null) 956 return m_innerFSB.charAt(pos & m_chunkMask); 957 else 958 return m_array[startChunk][pos & m_chunkMask]; 959 } 960 961 /** 962 * Sends the specified range of characters as one or more SAX characters() 963 * events. 964 * Note that the buffer reference passed to the ContentHandler may be 965 * invalidated if the FastStringBuffer is edited; it's the user's 966 * responsibility to manage access to the FastStringBuffer to prevent this 967 * problem from arising. 968 * <p> 969 * Note too that there is no promise that the output will be sent as a 970 * single call. As is always true in SAX, one logical string may be split 971 * across multiple blocks of memory and hence delivered as several 972 * successive events. 973 * 974 * @param ch SAX ContentHandler object to receive the event. 975 * @param start Offset of first character in the range. 976 * @param length Number of characters to send. 977 * @exception org.xml.sax.SAXException may be thrown by handler's 978 * characters() method. 979 */ sendSAXcharacters( org.xml.sax.ContentHandler ch, int start, int length)980 public void sendSAXcharacters( 981 org.xml.sax.ContentHandler ch, int start, int length) 982 throws org.xml.sax.SAXException 983 { 984 985 int startChunk = start >>> m_chunkBits; 986 int startColumn = start & m_chunkMask; 987 if (startColumn + length < m_chunkMask && m_innerFSB == null) { 988 ch.characters(m_array[startChunk], startColumn, length); 989 return; 990 } 991 992 int stop = start + length; 993 int stopChunk = stop >>> m_chunkBits; 994 int stopColumn = stop & m_chunkMask; 995 996 for (int i = startChunk; i < stopChunk; ++i) 997 { 998 if (i == 0 && m_innerFSB != null) 999 m_innerFSB.sendSAXcharacters(ch, startColumn, 1000 m_chunkSize - startColumn); 1001 else 1002 ch.characters(m_array[i], startColumn, m_chunkSize - startColumn); 1003 1004 startColumn = 0; // after first chunk 1005 } 1006 1007 // Last, or only, chunk 1008 if (stopChunk == 0 && m_innerFSB != null) 1009 m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn); 1010 else if (stopColumn > startColumn) 1011 { 1012 ch.characters(m_array[stopChunk], startColumn, 1013 stopColumn - startColumn); 1014 } 1015 } 1016 1017 /** 1018 * Sends the specified range of characters as one or more SAX characters() 1019 * events, normalizing the characters according to XSLT rules. 1020 * 1021 * @param ch SAX ContentHandler object to receive the event. 1022 * @param start Offset of first character in the range. 1023 * @param length Number of characters to send. 1024 * @return normalization status to apply to next chunk (because we may 1025 * have been called recursively to process an inner FSB): 1026 * <dl> 1027 * <dt>0</dt> 1028 * <dd>if this output did not end in retained whitespace, and thus whitespace 1029 * at the start of the following chunk (if any) should be converted to a 1030 * single space. 1031 * <dt>SUPPRESS_LEADING_WS</dt> 1032 * <dd>if this output ended in retained whitespace, and thus whitespace 1033 * at the start of the following chunk (if any) should be completely 1034 * suppressed.</dd> 1035 * </dd> 1036 * </dl> 1037 * @exception org.xml.sax.SAXException may be thrown by handler's 1038 * characters() method. 1039 */ sendNormalizedSAXcharacters( org.xml.sax.ContentHandler ch, int start, int length)1040 public int sendNormalizedSAXcharacters( 1041 org.xml.sax.ContentHandler ch, int start, int length) 1042 throws org.xml.sax.SAXException 1043 { 1044 // This call always starts at the beginning of the 1045 // string being written out, either because it was called directly or 1046 // because it was an m_innerFSB recursion. This is important since 1047 // it gives us a well-known initial state for this flag: 1048 int stateForNextChunk=SUPPRESS_LEADING_WS; 1049 1050 int stop = start + length; 1051 int startChunk = start >>> m_chunkBits; 1052 int startColumn = start & m_chunkMask; 1053 int stopChunk = stop >>> m_chunkBits; 1054 int stopColumn = stop & m_chunkMask; 1055 1056 for (int i = startChunk; i < stopChunk; ++i) 1057 { 1058 if (i == 0 && m_innerFSB != null) 1059 stateForNextChunk= 1060 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, 1061 m_chunkSize - startColumn); 1062 else 1063 stateForNextChunk= 1064 sendNormalizedSAXcharacters(m_array[i], startColumn, 1065 m_chunkSize - startColumn, 1066 ch,stateForNextChunk); 1067 1068 startColumn = 0; // after first chunk 1069 } 1070 1071 // Last, or only, chunk 1072 if (stopChunk == 0 && m_innerFSB != null) 1073 stateForNextChunk= // %REVIEW% Is this update really needed? 1074 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn); 1075 else if (stopColumn > startColumn) 1076 { 1077 stateForNextChunk= // %REVIEW% Is this update really needed? 1078 sendNormalizedSAXcharacters(m_array[stopChunk], 1079 startColumn, stopColumn - startColumn, 1080 ch, stateForNextChunk | SUPPRESS_TRAILING_WS); 1081 } 1082 return stateForNextChunk; 1083 } 1084 1085 static final char[] SINGLE_SPACE = {' '}; 1086 1087 /** 1088 * Internal method to directly normalize and dispatch the character array. 1089 * This version is aware of the fact that it may be called several times 1090 * in succession if the data is made up of multiple "chunks", and thus 1091 * must actively manage the handling of leading and trailing whitespace. 1092 * 1093 * Note: The recursion is due to the possible recursion of inner FSBs. 1094 * 1095 * @param ch The characters from the XML document. 1096 * @param start The start position in the array. 1097 * @param length The number of characters to read from the array. 1098 * @param handler SAX ContentHandler object to receive the event. 1099 * @param edgeTreatmentFlags How leading/trailing spaces should be handled. 1100 * This is a bitfield contining two flags, bitwise-ORed together: 1101 * <dl> 1102 * <dt>SUPPRESS_LEADING_WS</dt> 1103 * <dd>When false, causes leading whitespace to be converted to a single 1104 * space; when true, causes it to be discarded entirely. 1105 * Should be set TRUE for the first chunk, and (in multi-chunk output) 1106 * whenever the previous chunk ended in retained whitespace.</dd> 1107 * <dt>SUPPRESS_TRAILING_WS</dt> 1108 * <dd>When false, causes trailing whitespace to be converted to a single 1109 * space; when true, causes it to be discarded entirely. 1110 * Should be set TRUE for the last or only chunk. 1111 * </dd> 1112 * </dl> 1113 * @return normalization status, as in the edgeTreatmentFlags parameter: 1114 * <dl> 1115 * <dt>0</dt> 1116 * <dd>if this output did not end in retained whitespace, and thus whitespace 1117 * at the start of the following chunk (if any) should be converted to a 1118 * single space. 1119 * <dt>SUPPRESS_LEADING_WS</dt> 1120 * <dd>if this output ended in retained whitespace, and thus whitespace 1121 * at the start of the following chunk (if any) should be completely 1122 * suppressed.</dd> 1123 * </dd> 1124 * </dl> 1125 * 1126 * 1127 * @exception org.xml.sax.SAXException Any SAX exception, possibly 1128 * wrapping another exception. 1129 */ sendNormalizedSAXcharacters(char ch[], int start, int length, org.xml.sax.ContentHandler handler, int edgeTreatmentFlags)1130 static int sendNormalizedSAXcharacters(char ch[], 1131 int start, int length, 1132 org.xml.sax.ContentHandler handler, 1133 int edgeTreatmentFlags) 1134 throws org.xml.sax.SAXException 1135 { 1136 boolean processingLeadingWhitespace = 1137 ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0); 1138 boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0); 1139 boolean suppressTrailingWhitespace = 1140 ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0); 1141 int currPos = start; 1142 int limit = start+length; 1143 1144 // Strip any leading spaces first, if required 1145 if (processingLeadingWhitespace) { 1146 for (; currPos < limit 1147 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1148 currPos++) { } 1149 1150 // If we've only encountered leading spaces, the 1151 // current state remains unchanged 1152 if (currPos == limit) { 1153 return edgeTreatmentFlags; 1154 } 1155 } 1156 1157 // If we get here, there are no more leading spaces to strip 1158 while (currPos < limit) { 1159 int startNonWhitespace = currPos; 1160 1161 // Grab a chunk of non-whitespace characters 1162 for (; currPos < limit 1163 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1164 currPos++) { } 1165 1166 // Non-whitespace seen - emit them, along with a single 1167 // space for any preceding whitespace characters 1168 if (startNonWhitespace != currPos) { 1169 if (seenWhitespace) { 1170 handler.characters(SINGLE_SPACE, 0, 1); 1171 seenWhitespace = false; 1172 } 1173 handler.characters(ch, startNonWhitespace, 1174 currPos - startNonWhitespace); 1175 } 1176 1177 int startWhitespace = currPos; 1178 1179 // Consume any whitespace characters 1180 for (; currPos < limit 1181 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1182 currPos++) { } 1183 1184 if (startWhitespace != currPos) { 1185 seenWhitespace = true; 1186 } 1187 } 1188 1189 return (seenWhitespace ? CARRY_WS : 0) 1190 | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS); 1191 } 1192 1193 /** 1194 * Directly normalize and dispatch the character array. 1195 * 1196 * @param ch The characters from the XML document. 1197 * @param start The start position in the array. 1198 * @param length The number of characters to read from the array. 1199 * @param handler SAX ContentHandler object to receive the event. 1200 * @exception org.xml.sax.SAXException Any SAX exception, possibly 1201 * wrapping another exception. 1202 */ sendNormalizedSAXcharacters(char ch[], int start, int length, org.xml.sax.ContentHandler handler)1203 public static void sendNormalizedSAXcharacters(char ch[], 1204 int start, int length, 1205 org.xml.sax.ContentHandler handler) 1206 throws org.xml.sax.SAXException 1207 { 1208 sendNormalizedSAXcharacters(ch, start, length, 1209 handler, SUPPRESS_BOTH); 1210 } 1211 1212 /** 1213 * Sends the specified range of characters as sax Comment. 1214 * <p> 1215 * Note that, unlike sendSAXcharacters, this has to be done as a single 1216 * call to LexicalHandler#comment. 1217 * 1218 * @param ch SAX LexicalHandler object to receive the event. 1219 * @param start Offset of first character in the range. 1220 * @param length Number of characters to send. 1221 * @exception org.xml.sax.SAXException may be thrown by handler's 1222 * characters() method. 1223 */ sendSAXComment( org.xml.sax.ext.LexicalHandler ch, int start, int length)1224 public void sendSAXComment( 1225 org.xml.sax.ext.LexicalHandler ch, int start, int length) 1226 throws org.xml.sax.SAXException 1227 { 1228 1229 // %OPT% Do it this way for now... 1230 String comment = getString(start, length); 1231 ch.comment(comment.toCharArray(), 0, length); 1232 } 1233 1234 /** 1235 * Copies characters from this string into the destination character 1236 * array. 1237 * 1238 * @param srcBegin index of the first character in the string 1239 * to copy. 1240 * @param srcEnd index after the last character in the string 1241 * to copy. 1242 * @param dst the destination array. 1243 * @param dstBegin the start offset in the destination array. 1244 * @exception IndexOutOfBoundsException If any of the following 1245 * is true: 1246 * <ul><li><code>srcBegin</code> is negative. 1247 * <li><code>srcBegin</code> is greater than <code>srcEnd</code> 1248 * <li><code>srcEnd</code> is greater than the length of this 1249 * string 1250 * <li><code>dstBegin</code> is negative 1251 * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than 1252 * <code>dst.length</code></ul> 1253 * @exception NullPointerException if <code>dst</code> is <code>null</code> 1254 */ getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)1255 private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 1256 { 1257 // %TBD% Joe needs to write this function. Make public when implemented. 1258 } 1259 1260 /** 1261 * Encapsulation c'tor. After this is called, the source FastStringBuffer 1262 * will be reset to use the new object as its m_innerFSB, and will have 1263 * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED 1264 * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits) 1265 * 1266 * NEEDSDOC @param source 1267 */ FastStringBuffer(FastStringBuffer source)1268 private FastStringBuffer(FastStringBuffer source) 1269 { 1270 1271 // Copy existing information into new encapsulation 1272 m_chunkBits = source.m_chunkBits; 1273 m_maxChunkBits = source.m_maxChunkBits; 1274 m_rebundleBits = source.m_rebundleBits; 1275 m_chunkSize = source.m_chunkSize; 1276 m_chunkMask = source.m_chunkMask; 1277 m_array = source.m_array; 1278 m_innerFSB = source.m_innerFSB; 1279 1280 // These have to be adjusted because we're calling just at the time 1281 // when we would be about to allocate another chunk 1282 m_lastChunk = source.m_lastChunk - 1; 1283 m_firstFree = source.m_chunkSize; 1284 1285 // Establish capsule as the Inner FSB, reset chunk sizes/addressing 1286 source.m_array = new char[16][]; 1287 source.m_innerFSB = this; 1288 1289 // Since we encapsulated just as we were about to append another 1290 // chunk, return ready to create the chunk after the innerFSB 1291 // -- 1, not 0. 1292 source.m_lastChunk = 1; 1293 source.m_firstFree = 0; 1294 source.m_chunkBits += m_rebundleBits; 1295 source.m_chunkSize = 1 << (source.m_chunkBits); 1296 source.m_chunkMask = source.m_chunkSize - 1; 1297 } 1298 } 1299