1 /* 2 * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. 3 * 4 * This software is open source. 5 * See the bottom of this file for the licence. 6 */ 7 8 package org.dom4j.io; 9 10 /** 11 * <p> 12 * <code>OutputFormat</code> represents the format configuration used by 13 * {@linkXMLWriter}and its base classes to format the XML output 14 * </p> 15 * 16 * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a> 17 * @version $Revision: 1.17 $ 18 */ 19 public class OutputFormat implements Cloneable { 20 /** standard value to indent by, if we are indenting */ 21 protected static final String STANDARD_INDENT = " "; 22 23 /** 24 * Whether or not to suppress the XML declaration - default is 25 * <code>false</code> 26 */ 27 private boolean suppressDeclaration = false; 28 29 /** 30 * Whether or not to print new line after the XML declaration - default is 31 * <code>true</code> 32 */ 33 private boolean newLineAfterDeclaration = true; 34 35 /** The encoding format */ 36 private String encoding = "UTF-8"; 37 38 /** 39 * Whether or not to output the encoding in the XML declaration - default is 40 * <code>false</code> 41 */ 42 private boolean omitEncoding = false; 43 44 /** The default indent is no spaces (as original document) */ 45 private String indent = null; 46 47 /** 48 * Whether or not to expand empty elements to 49 * <tagName></tagName> - default is <code>false</code> 50 */ 51 private boolean expandEmptyElements = false; 52 53 /** 54 * The default new line flag, set to do new lines only as in original 55 * document 56 */ 57 private boolean newlines = false; 58 59 /** New line separator */ 60 private String lineSeparator = "\n"; 61 62 /** should we preserve whitespace or not in text nodes? */ 63 private boolean trimText = false; 64 65 /** pad string-element boundaries with whitespace */ 66 private boolean padText = false; 67 68 /** Whether or not to use XHTML standard. */ 69 private boolean doXHTML = false; 70 71 /** 72 * Controls when to output a line.separtor every so many tags in case of no 73 * lines and total text trimming. 74 */ 75 private int newLineAfterNTags = 0; // zero means don't bother. 76 77 /** Quote character to use when writing attributes. */ 78 private char attributeQuoteChar = '\"'; 79 80 /** 81 * Creates an <code>OutputFormat</code> with no additional whitespace 82 * (indent or new lines) added. The whitespace from the element text content 83 * is fully preserved. 84 */ OutputFormat()85 public OutputFormat() { 86 } 87 88 /** 89 * Creates an <code>OutputFormat</code> with the given indent added but no 90 * new lines added. All whitespace from element text will be included. 91 * 92 * @param indent 93 * is the indent string to be used for indentation (usually a 94 * number of spaces). 95 */ OutputFormat(String indent)96 public OutputFormat(String indent) { 97 this.indent = indent; 98 } 99 100 /** 101 * Creates an <code>OutputFormat</code> with the given indent added with 102 * optional newlines between the Elements. All whitespace from element text 103 * will be included. 104 * 105 * @param indent 106 * is the indent string to be used for indentation (usually a 107 * number of spaces). 108 * @param newlines 109 * whether new lines are added to layout the 110 */ OutputFormat(String indent, boolean newlines)111 public OutputFormat(String indent, boolean newlines) { 112 this.indent = indent; 113 this.newlines = newlines; 114 } 115 116 /** 117 * Creates an <code>OutputFormat</code> with the given indent added with 118 * optional newlines between the Elements and the given encoding format. 119 * 120 * @param indent 121 * is the indent string to be used for indentation (usually a 122 * number of spaces). 123 * @param newlines 124 * whether new lines are added to layout the 125 * @param encoding 126 * is the text encoding to use for writing the XML 127 */ OutputFormat(String indent, boolean newlines, String encoding)128 public OutputFormat(String indent, boolean newlines, String encoding) { 129 this.indent = indent; 130 this.newlines = newlines; 131 this.encoding = encoding; 132 } 133 getLineSeparator()134 public String getLineSeparator() { 135 return lineSeparator; 136 } 137 138 /** 139 * <p> 140 * This will set the new-line separator. The default is <code>\n</code>. 141 * Note that if the "newlines" property is false, this value is irrelevant. 142 * To make it output the system default line ending string, call 143 * <code>setLineSeparator(System.getProperty("line.separator"))</code> 144 * </p> 145 * 146 * @param separator 147 * <code>String</code> line separator to use. 148 * 149 * @see #setNewlines(boolean) 150 */ setLineSeparator(String separator)151 public void setLineSeparator(String separator) { 152 lineSeparator = separator; 153 } 154 isNewlines()155 public boolean isNewlines() { 156 return newlines; 157 } 158 159 /** 160 * DOCUMENT ME! 161 * 162 * @param newlines 163 * <code>true</code> indicates new lines should be printed, 164 * else new lines are ignored (compacted). 165 * 166 * @see #setLineSeparator(String) 167 */ setNewlines(boolean newlines)168 public void setNewlines(boolean newlines) { 169 this.newlines = newlines; 170 } 171 getEncoding()172 public String getEncoding() { 173 return encoding; 174 } 175 176 /** 177 * DOCUMENT ME! 178 * 179 * @param encoding 180 * encoding format 181 */ setEncoding(String encoding)182 public void setEncoding(String encoding) { 183 if (encoding != null) { 184 this.encoding = encoding; 185 } 186 } 187 isOmitEncoding()188 public boolean isOmitEncoding() { 189 return omitEncoding; 190 } 191 192 /** 193 * <p> 194 * This will set whether the XML declaration (<code><?xml version="1.0" 195 * encoding="UTF-8"?></code>) 196 * includes the encoding of the document. It is common to suppress this in 197 * protocols such as WML and SOAP. 198 * </p> 199 * 200 * @param omitEncoding 201 * <code>boolean</code> indicating whether or not the XML 202 * declaration should indicate the document encoding. 203 */ setOmitEncoding(boolean omitEncoding)204 public void setOmitEncoding(boolean omitEncoding) { 205 this.omitEncoding = omitEncoding; 206 } 207 208 /** 209 * <p> 210 * This will set whether the XML declaration (<code><?xml version="1.0" 211 * encoding="UTF-8"?></code>) 212 * is included or not. It is common to suppress this in protocols such as 213 * WML and SOAP. 214 * </p> 215 * 216 * @param suppressDeclaration 217 * <code>boolean</code> indicating whether or not the XML 218 * declaration should be suppressed. 219 */ setSuppressDeclaration(boolean suppressDeclaration)220 public void setSuppressDeclaration(boolean suppressDeclaration) { 221 this.suppressDeclaration = suppressDeclaration; 222 } 223 224 /** 225 * DOCUMENT ME! 226 * 227 * @return true if the output of the XML declaration (<code><?xml 228 * version="1.0"?></code>) 229 * should be suppressed else false. 230 */ isSuppressDeclaration()231 public boolean isSuppressDeclaration() { 232 return suppressDeclaration; 233 } 234 235 /** 236 * <p> 237 * This will set whether a new line is printed after the XML declaration 238 * (assuming it is not supressed.) 239 * </p> 240 * 241 * @param newLineAfterDeclaration 242 * <code>boolean</code> indicating whether or not to print new 243 * line following the XML declaration. The default is true. 244 */ setNewLineAfterDeclaration(boolean newLineAfterDeclaration)245 public void setNewLineAfterDeclaration(boolean newLineAfterDeclaration) { 246 this.newLineAfterDeclaration = newLineAfterDeclaration; 247 } 248 249 /** 250 * DOCUMENT ME! 251 * 252 * @return true if a new line should be printed following XML declaration 253 */ isNewLineAfterDeclaration()254 public boolean isNewLineAfterDeclaration() { 255 return newLineAfterDeclaration; 256 } 257 isExpandEmptyElements()258 public boolean isExpandEmptyElements() { 259 return expandEmptyElements; 260 } 261 262 /** 263 * <p> 264 * This will set whether empty elements are expanded from 265 * <code><tagName></code> to 266 * <code><tagName></tagName></code>. 267 * </p> 268 * 269 * @param expandEmptyElements 270 * <code>boolean</code> indicating whether or not empty 271 * elements should be expanded. 272 */ setExpandEmptyElements(boolean expandEmptyElements)273 public void setExpandEmptyElements(boolean expandEmptyElements) { 274 this.expandEmptyElements = expandEmptyElements; 275 } 276 isTrimText()277 public boolean isTrimText() { 278 return trimText; 279 } 280 281 /** 282 * <p> 283 * This will set whether the text is output verbatim (false) or with 284 * whitespace stripped as per <code>{@link 285 * org.dom4j.Element#getTextTrim()}</code>. 286 * </p> 287 * 288 * <p> 289 * </p> 290 * 291 * <p> 292 * Default: false 293 * </p> 294 * 295 * @param trimText 296 * <code>boolean</code> true=>trim the whitespace, false=>use 297 * text verbatim 298 */ setTrimText(boolean trimText)299 public void setTrimText(boolean trimText) { 300 this.trimText = trimText; 301 } 302 isPadText()303 public boolean isPadText() { 304 return padText; 305 } 306 307 /** 308 * <p> 309 * Ensure that text immediately preceded by or followed by an element will 310 * be "padded" with a single space. This is used to allow make 311 * browser-friendly HTML, avoiding trimText's transformation of, e.g., 312 * <code>The quick <b>brown</b> fox</code> into <code>The 313 * quick<b>brown</b>fox</code> 314 * (the latter will run the three separate words together into a single 315 * word). This setting is not too useful if you haven't also called 316 * {@link #setTrimText}. 317 * </p> 318 * 319 * <p> 320 * The padding string will only be added if the text itself starts or ends 321 * with some whitespace characters. 322 * </p> 323 * 324 * <p> 325 * Default: false 326 * </p> 327 * 328 * @param padText 329 * <code>boolean</code> if true, pad string-element boundaries 330 */ setPadText(boolean padText)331 public void setPadText(boolean padText) { 332 this.padText = padText; 333 } 334 getIndent()335 public String getIndent() { 336 return indent; 337 } 338 339 /** 340 * <p> 341 * This will set the indent <code>String</code> to use; this is usually a 342 * <code>String</code> of empty spaces. If you pass null, or the empty 343 * string (""), then no indentation will happen. 344 * </p> 345 * Default: none (null) 346 * 347 * @param indent 348 * <code>String</code> to use for indentation. 349 */ setIndent(String indent)350 public void setIndent(String indent) { 351 // nullify empty string to void unnecessary indentation code 352 if ((indent != null) && (indent.length() <= 0)) { 353 indent = null; 354 } 355 356 this.indent = indent; 357 } 358 359 /** 360 * Set the indent on or off. If setting on, will use the value of 361 * STANDARD_INDENT, which is usually two spaces. 362 * 363 * @param doIndent 364 * if true, set indenting on; if false, set indenting off 365 */ setIndent(boolean doIndent)366 public void setIndent(boolean doIndent) { 367 if (doIndent) { 368 this.indent = STANDARD_INDENT; 369 } else { 370 this.indent = null; 371 } 372 } 373 374 /** 375 * <p> 376 * This will set the indent <code>String</code>'s size; an indentSize of 377 * 4 would result in the indention being equivalent to the 378 * <code>String</code> " " (four space characters). 379 * </p> 380 * 381 * @param indentSize 382 * <code>int</code> number of spaces in indentation. 383 */ setIndentSize(int indentSize)384 public void setIndentSize(int indentSize) { 385 StringBuffer indentBuffer = new StringBuffer(); 386 387 for (int i = 0; i < indentSize; i++) { 388 indentBuffer.append(" "); 389 } 390 391 this.indent = indentBuffer.toString(); 392 } 393 394 /** 395 * <p> 396 * Whether or not to use the XHTML standard: like HTML but passes an XML 397 * parser with real, closed tags. Also, XHTML CDATA sections will be output 398 * with the CDATA delimiters: ( " <b><![CDATA[ </b>" and " 399 * <b>]]> </b>" ) otherwise, the class HTMLWriter will output the 400 * CDATA text, but not the delimiters. 401 * </p> 402 * 403 * <p> 404 * Default is <code>false</code> 405 * </p> 406 * 407 * @return DOCUMENT ME! 408 */ isXHTML()409 public boolean isXHTML() { 410 return doXHTML; 411 } 412 413 /** 414 * <p> 415 * This will set whether or not to use the XHTML standard: like HTML but 416 * passes an XML parser with real, closed tags. Also, XHTML CDATA sections 417 * will be output with the CDATA delimiters: ( " <b><[CDATA[ 418 * </b>" and " <b>]]< </b>) otherwise, the class HTMLWriter 419 * will output the CDATA text, but not the delimiters. 420 * </p> 421 * 422 * <p> 423 * Default: false 424 * </p> 425 * 426 * @param xhtml 427 * <code>boolean</code> true=>conform to XHTML, false=>conform 428 * to HTML, can have unclosed tags, etc. 429 */ setXHTML(boolean xhtml)430 public void setXHTML(boolean xhtml) { 431 doXHTML = xhtml; 432 } 433 getNewLineAfterNTags()434 public int getNewLineAfterNTags() { 435 return newLineAfterNTags; 436 } 437 438 /** 439 * Controls output of a line.separator every tagCount tags when isNewlines 440 * is false. If tagCount equals zero, it means don't do anything special. If 441 * greater than zero, then a line.separator will be output after tagCount 442 * tags have been output. Used when you would like to squeeze the html as 443 * much as possible, but some browsers don't like really long lines. A tag 444 * count of 10 would produce a line.separator in the output after 10 close 445 * tags (including single tags). 446 * 447 * @param tagCount 448 * DOCUMENT ME! 449 */ setNewLineAfterNTags(int tagCount)450 public void setNewLineAfterNTags(int tagCount) { 451 newLineAfterNTags = tagCount; 452 } 453 getAttributeQuoteCharacter()454 public char getAttributeQuoteCharacter() { 455 return attributeQuoteChar; 456 } 457 458 /** 459 * Sets the character used to quote attribute values. The specified 460 * character must be a valid XML attribute quote character, otherwise an 461 * <code>IllegalArgumentException</code> will be thrown. 462 * 463 * @param quoteChar 464 * The character to use when quoting attribute values. 465 * 466 * @throws IllegalArgumentException 467 * If the specified character is not a valid XML attribute quote 468 * character. 469 */ setAttributeQuoteCharacter(char quoteChar)470 public void setAttributeQuoteCharacter(char quoteChar) { 471 if ((quoteChar == '\'') || (quoteChar == '"')) { 472 attributeQuoteChar = quoteChar; 473 } else { 474 throw new IllegalArgumentException("Invalid attribute quote " 475 + "character (" + quoteChar + ")"); 476 } 477 } 478 479 /** 480 * Parses command line arguments of the form <code>-omitEncoding 481 * -indentSize 3 -newlines -trimText</code> 482 * 483 * @param args 484 * is the array of command line arguments 485 * @param i 486 * is the index in args to start parsing options 487 * 488 * @return the index of first parameter that we didn't understand 489 */ parseOptions(String[] args, int i)490 public int parseOptions(String[] args, int i) { 491 for (int size = args.length; i < size; i++) { 492 if (args[i].equals("-suppressDeclaration")) { 493 setSuppressDeclaration(true); 494 } else if (args[i].equals("-omitEncoding")) { 495 setOmitEncoding(true); 496 } else if (args[i].equals("-indent")) { 497 setIndent(args[++i]); 498 } else if (args[i].equals("-indentSize")) { 499 setIndentSize(Integer.parseInt(args[++i])); 500 } else if (args[i].startsWith("-expandEmpty")) { 501 setExpandEmptyElements(true); 502 } else if (args[i].equals("-encoding")) { 503 setEncoding(args[++i]); 504 } else if (args[i].equals("-newlines")) { 505 setNewlines(true); 506 } else if (args[i].equals("-lineSeparator")) { 507 setLineSeparator(args[++i]); 508 } else if (args[i].equals("-trimText")) { 509 setTrimText(true); 510 } else if (args[i].equals("-padText")) { 511 setPadText(true); 512 } else if (args[i].startsWith("-xhtml")) { 513 setXHTML(true); 514 } else { 515 return i; 516 } 517 } 518 519 return i; 520 } 521 522 /** 523 * A static helper method to create the default pretty printing format. This 524 * format consists of an indent of 2 spaces, newlines after each element and 525 * all other whitespace trimmed, and XMTML is false. 526 * 527 * @return DOCUMENT ME! 528 */ createPrettyPrint()529 public static OutputFormat createPrettyPrint() { 530 OutputFormat format = new OutputFormat(); 531 format.setIndentSize(2); 532 format.setNewlines(true); 533 format.setTrimText(true); 534 format.setPadText(true); 535 536 return format; 537 } 538 539 /** 540 * A static helper method to create the default compact format. This format 541 * does not have any indentation or newlines after an alement and all other 542 * whitespace trimmed 543 * 544 * @return DOCUMENT ME! 545 */ createCompactFormat()546 public static OutputFormat createCompactFormat() { 547 OutputFormat format = new OutputFormat(); 548 format.setIndent(false); 549 format.setNewlines(false); 550 format.setTrimText(true); 551 552 return format; 553 } 554 } 555 556 /* 557 * Redistribution and use of this software and associated documentation 558 * ("Software"), with or without modification, are permitted provided that the 559 * following conditions are met: 560 * 561 * 1. Redistributions of source code must retain copyright statements and 562 * notices. Redistributions must also contain a copy of this document. 563 * 564 * 2. Redistributions in binary form must reproduce the above copyright notice, 565 * this list of conditions and the following disclaimer in the documentation 566 * and/or other materials provided with the distribution. 567 * 568 * 3. The name "DOM4J" must not be used to endorse or promote products derived 569 * from this Software without prior written permission of MetaStuff, Ltd. For 570 * written permission, please contact dom4j-info@metastuff.com. 571 * 572 * 4. Products derived from this Software may not be called "DOM4J" nor may 573 * "DOM4J" appear in their names without prior written permission of MetaStuff, 574 * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd. 575 * 576 * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org 577 * 578 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND 579 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 580 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 581 * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE 582 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 583 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 584 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 585 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 586 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 587 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 588 * POSSIBILITY OF SUCH DAMAGE. 589 * 590 * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. 591 */ 592