1 /* Copyright 2002-2005, 2010 Elliotte Rusty Harold 2 3 This library is free software; you can redistribute it and/or modify 4 it under the terms of version 2.1 of the GNU Lesser General Public 5 License as published by the Free Software Foundation. 6 7 This library is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU Lesser General Public License for more details. 11 12 You should have received a copy of the GNU Lesser General Public 13 License along with this library; if not, write to the 14 Free Software Foundation, Inc., 59 Temple Place, Suite 330, 15 Boston, MA 02111-1307 USA 16 17 You can contact Elliotte Rusty Harold by sending e-mail to 18 elharo@ibiblio.org. Please include the word "XOM" in the 19 subject line. The XOM home page is located at http://www.xom.nu/ 20 */ 21 22 package nu.xom; 23 24 import java.io.UnsupportedEncodingException; 25 26 /** 27 * <p> 28 * This class represents a run of text. 29 * CDATA sections are not treated differently than 30 * normal text. <code>Text</code> objects may be adjacent to other 31 * <code>Text</code> objects. 32 * </p> 33 * 34 * <p> 35 * The maximum size of a String or an array in Java limits the maximum 36 * size of a text node to about 2 gigabytes. If you're stuffing Base-64 37 * encoded movies inside one node, you will have problems. Try breaking 38 * up the text into smaller, contiguous chunks. Even then you will 39 * probably not be able to use XPath on the result. 40 * </p> 41 * 42 * @author Elliotte Rusty Harold 43 * @version 1.2.7 44 * 45 */ 46 public class Text extends Node { 47 48 49 private byte[] data; 50 51 52 /** 53 * <p> 54 * This constructor creates a new <code>Text</code> object. 55 * The data is checked for legality according to XML 1.0 rules. 56 * Characters that can be serialized by escaping them 57 * such as < and & are allowed. However, characters 58 * such as the form feed, null, vertical tab, 59 * unmatched halves of surrogate pairs, 60 * and 0xFFFE and 0xFFFF are not allowed. 61 * </p> 62 * 63 * @param data the initial text of the object 64 * 65 * @throws IllegalCharacterDataException if data contains any 66 * characters which are illegal in well-formed XML 1.0 such as 67 * null, vertical tab, or unmatched halves of surrogate pairs 68 */ Text(String data)69 public Text(String data) { 70 _setValue(data); 71 } 72 73 74 /** 75 * <p> 76 * Creates a copy of the specified <code>Text</code> object. 77 * </p> 78 * 79 * @param text the <code>Text</code> object to copy 80 */ Text(Text text)81 public Text(Text text) { 82 // I'm relying here on the data array being immutable. 83 // If this ever changes, e.g. by adding an append method, 84 // this method needs to change too. 85 this.data = text.data; 86 } 87 88 Text()89 private Text() {} 90 91 build(String data)92 static Text build(String data) { 93 94 Text result = new Text(); 95 try { 96 result.data = data.getBytes("UTF8"); 97 } 98 catch (UnsupportedEncodingException ex) { 99 throw new RuntimeException( 100 "Bad VM! Does not support UTF-8" 101 ); 102 } 103 return result; 104 105 } 106 107 108 /** 109 * <p> 110 * Sets the content of the <code>Text</code> object 111 * to the specified data. The data is checked for 112 * legality according to XML 1.0 rules. Characters that 113 * can be serialized such as < and & are allowed. 114 * However, characters such as the form feed, null, 115 * vertical tab, unmatched halves of surrogate pairs, 116 * and 0xFFFE and 0xFFFF are not allowed. Passing null is the same 117 * as passing the empty string. 118 * </p> 119 * 120 * @param data the text to install in the object 121 * 122 * @throws IllegalCharacterDataException if data contains any 123 * characters which are illegal in well-formed XML 1.0 such as 124 * null, vertical tab, or unmatched halves of surrogate pairs 125 */ setValue(String data)126 public void setValue(String data) { 127 _setValue(data); 128 } 129 130 _setValue(String data)131 private void _setValue(String data) { 132 133 if (data == null) data = ""; 134 else Verifier.checkPCDATA(data); 135 try { 136 this.data = data.getBytes("UTF8"); 137 } 138 catch (UnsupportedEncodingException ex) { 139 throw new RuntimeException( 140 "Bad VM! Does not support UTF-8" 141 ); 142 } 143 144 } 145 146 /** 147 * <p> 148 * Returns the XPath 1.0 string-value of this <code>Text</code> 149 * node. The XPath string-value of a text node is the same as 150 * the text of the node. 151 * </p> 152 * 153 * @return the content of the node 154 */ getValue()155 public final String getValue() { 156 157 try { 158 return new String(data, "UTF8"); 159 } 160 catch (UnsupportedEncodingException ex) { 161 throw new RuntimeException( 162 "Bad VM! Does not support UTF-8" 163 ); 164 } 165 166 } 167 168 169 /** 170 * <p> 171 * Throws <code>IndexOutOfBoundsException</code> because 172 * texts do not have children. 173 * </p> 174 * 175 * @return never returns because texts do not have children; 176 * always throws an exception. 177 * 178 * @param position the index of the child node to return 179 * 180 * @throws IndexOutOfBoundsException because texts 181 * do not have children 182 */ getChild(int position)183 public final Node getChild(int position) { 184 throw new IndexOutOfBoundsException( 185 "LeafNodes do not have children"); 186 } 187 188 189 /** 190 * <p> 191 * Returns 0 because texts do not have children. 192 * </p> 193 * 194 * @return zero 195 */ getChildCount()196 public final int getChildCount() { 197 return 0; 198 } 199 200 201 /** 202 * <p> 203 * Returns a deep copy of this <code>Text</code> with no parent, 204 * that can be added to this document or a different one. 205 * </p> 206 * 207 * @return a deep copy of this text node with no parent 208 */ copy()209 public Node copy() { 210 211 if (isCDATASection()) { 212 return new CDATASection(this); 213 } 214 else { 215 return new Text(this); 216 } 217 218 } 219 220 221 /** 222 * <p> 223 * Returns a string containing the XML serialization of this text 224 * node. Unlike <code>getValue</code>, this method escapes 225 * characters such as & and < using entity references such 226 * as <code>&amp;</code> and <code>&lt;</code>. 227 * It escapes the carriage return (\r) as <code>&#x0D;</code>. 228 * If this text node is a CDATA section, then it may wrap the value 229 * in CDATA section delimiters instead of escaping. 230 * </p> 231 * 232 * @return the string form of this text node 233 */ toXML()234 public final String toXML() { 235 return escapeText(); 236 } 237 238 escapeText()239 String escapeText() { 240 241 String s = getValue(); 242 int length = s.length(); 243 // Give the string buffer enough room for a couple of escaped characters 244 StringBuffer result = new StringBuffer(length+12); 245 for (int i = 0; i < length; i++) { 246 char c = s.charAt(i); 247 switch (c) { 248 case '\r': 249 result.append("
"); 250 break; 251 case 14: 252 // impossible 253 break; 254 case 15: 255 // impossible 256 break; 257 case 16: 258 // impossible 259 break; 260 case 17: 261 // impossible 262 break; 263 case 18: 264 // impossible 265 break; 266 case 19: 267 // impossible 268 break; 269 case 20: 270 // impossible 271 break; 272 case 21: 273 // impossible 274 break; 275 case 22: 276 // impossible 277 break; 278 case 23: 279 // impossible 280 break; 281 case 24: 282 // impossible 283 break; 284 case 25: 285 // impossible 286 break; 287 case 26: 288 // impossible 289 break; 290 case 27: 291 // impossible 292 break; 293 case 28: 294 // impossible 295 break; 296 case 29: 297 // impossible 298 break; 299 case 30: 300 // impossible 301 break; 302 case 31: 303 // impossible 304 break; 305 case ' ': 306 result.append(' '); 307 break; 308 case '!': 309 result.append('!'); 310 break; 311 case '"': 312 result.append('"'); 313 break; 314 case '#': 315 result.append('#'); 316 break; 317 case '$': 318 result.append('$'); 319 break; 320 case '%': 321 result.append('%'); 322 break; 323 case '&': 324 result.append("&"); 325 break; 326 case '\'': 327 result.append('\''); 328 break; 329 case '(': 330 result.append('('); 331 break; 332 case ')': 333 result.append(')'); 334 break; 335 case '*': 336 result.append('*'); 337 break; 338 case '+': 339 result.append('+'); 340 break; 341 case ',': 342 result.append(','); 343 break; 344 case '-': 345 result.append('-'); 346 break; 347 case '.': 348 result.append('.'); 349 break; 350 case '/': 351 result.append('/'); 352 break; 353 case '0': 354 result.append('0'); 355 break; 356 case '1': 357 result.append('1'); 358 break; 359 case '2': 360 result.append('2'); 361 break; 362 case '3': 363 result.append('3'); 364 break; 365 case '4': 366 result.append('4'); 367 break; 368 case '5': 369 result.append('5'); 370 break; 371 case '6': 372 result.append('6'); 373 break; 374 case '7': 375 result.append('7'); 376 break; 377 case '8': 378 result.append('8'); 379 break; 380 case '9': 381 result.append('9'); 382 break; 383 case ':': 384 result.append(':'); 385 break; 386 case ';': 387 result.append(';'); 388 break; 389 case '<': 390 result.append("<"); 391 break; 392 case '=': 393 result.append('='); 394 break; 395 case '>': 396 result.append(">"); 397 break; 398 default: 399 result.append(c); 400 } 401 } 402 403 return result.toString(); 404 405 } 406 407 isText()408 boolean isText() { 409 return true; 410 } 411 412 413 /** 414 * <p> 415 * Returns a <code>String</code> 416 * representation of this <code>Text</code> suitable for 417 * debugging and diagnosis. This is <em>not</em> 418 * the XML representation of this <code>Text</code> node. 419 * </p> 420 * 421 * @return a non-XML string representation of this node 422 */ toString()423 public final String toString() { 424 425 return "[" + getClass().getName() + ": " 426 + escapeLineBreaksAndTruncate(getValue()) + "]"; 427 428 } 429 430 escapeLineBreaksAndTruncate(String s)431 static String escapeLineBreaksAndTruncate(String s) { 432 433 int length = s.length(); 434 boolean tooLong = length > 40; 435 if (length > 40) { 436 length = 35; 437 s = s.substring(0, 35); 438 } 439 440 StringBuffer result = new StringBuffer(length); 441 for (int i = 0; i < length; i++) { 442 char c = s.charAt(i); 443 switch (c) { 444 case '\n': 445 result.append("\\n"); 446 break; 447 case '\r': 448 result.append("\\r"); 449 break; 450 case '\t': 451 result.append("\\t"); 452 break; 453 default: 454 result.append(c); 455 } 456 } 457 if (tooLong) result.append("..."); 458 459 return result.toString(); 460 461 } 462 463 isCDATASection()464 boolean isCDATASection() { 465 return false; 466 } 467 468 isEmpty()469 boolean isEmpty() { 470 return this.data.length == 0; 471 } 472 473 474 }