1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20 
21 package net.htmlparser.jericho;
22 
23 import java.util.*;
24 import java.io.*;
25 
26 /**
27  * Performs a simple rendering of HTML markup into text.
28  * <p>
29  * This provides a human readable version of the segment content that is modelled on the way
30  * <a target="_blank" href="http://www.mozilla.com/thunderbird/">Mozilla Thunderbird</a> and other email clients provide an automatic conversion of
31  * HTML content to text in their <a target="_blank" href="http://tools.ietf.org/html/rfc2046#section-5.1.4">alternative MIME encoding</a> of emails.
32  * <p>
33  * The output using default settings complies with the "text/plain; format=flowed" (DelSp=No) protocol described in
34  * <a target="_blank" href="http://tools.ietf.org/html/rfc3676">RFC3676</a>.
35  * <p>
36  * Many properties are available to customise the output, possibly the most significant of which being {@link #setMaxLineLength(int) MaxLineLength}.
37  * See the individual property descriptions for details.
38  * <p>
39  * Use one of the following methods to obtain the output:
40  * <ul>
41  *  <li>{@link #writeTo(Writer)}</li>
42  *  <li>{@link #appendTo(Appendable)}</li>
43  *  <li>{@link #toString()}</li>
44  *  <li>{@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}</li>
45  * </ul>
46  * <p>
47  * The rendering of some constructs, especially tables, is very rudimentary.
48  * No attempt is made to render nested tables properly, except to ensure that all of the text content is included in the output.
49  * <p>
50  * Rendering an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically.
51  * <p>
52  * Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions.
53  * <p>
54  * To extract pure text without any rendering of the markup, use the {@link TextExtractor} class instead.
55  */
56 public class Renderer implements CharStreamSource {
57 	private final Segment rootSegment;
58 	private int maxLineLength=76;
59 	private String newLine="\r\n";
60 	private boolean includeHyperlinkURLs=true;
61 	private boolean includeAlternateText=true;
62 	private boolean decorateFontStyles=false;
63 	private boolean convertNonBreakingSpaces=Config.ConvertNonBreakingSpaces;
64 	private int blockIndentSize=4;
65 	private int listIndentSize=6;
66 	private char[] listBullets=new char[] {'*','o','+','#'};
67 	private boolean includeFirstElementTopMargin=false;
68 	private String tableCellSeparator=" \t";
69 
70 	private static final int UNORDERED_LIST=-1;
71 
72 	private static Map<String,ElementHandler> ELEMENT_HANDLERS=new HashMap<String,ElementHandler>();
73 	static {
ELEMENT_HANDLERS.put(HTMLElementName.A,A_ElementHandler.INSTANCE)74 		ELEMENT_HANDLERS.put(HTMLElementName.A,A_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.ADDRESS,StandardBlockElementHandler.INSTANCE_0_0)75 		ELEMENT_HANDLERS.put(HTMLElementName.ADDRESS,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.APPLET,AlternateTextElementHandler.INSTANCE)76 		ELEMENT_HANDLERS.put(HTMLElementName.APPLET,AlternateTextElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.B,FontStyleElementHandler.INSTANCE_B)77 		ELEMENT_HANDLERS.put(HTMLElementName.B,FontStyleElementHandler.INSTANCE_B);
ELEMENT_HANDLERS.put(HTMLElementName.BLOCKQUOTE,StandardBlockElementHandler.INSTANCE_1_1_INDENT)78 		ELEMENT_HANDLERS.put(HTMLElementName.BLOCKQUOTE,StandardBlockElementHandler.INSTANCE_1_1_INDENT);
ELEMENT_HANDLERS.put(HTMLElementName.BR,BR_ElementHandler.INSTANCE)79 		ELEMENT_HANDLERS.put(HTMLElementName.BR,BR_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.BUTTON,RemoveElementHandler.INSTANCE)80 		ELEMENT_HANDLERS.put(HTMLElementName.BUTTON,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.CAPTION,StandardBlockElementHandler.INSTANCE_0_0)81 		ELEMENT_HANDLERS.put(HTMLElementName.CAPTION,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.CENTER,StandardBlockElementHandler.INSTANCE_1_1)82 		ELEMENT_HANDLERS.put(HTMLElementName.CENTER,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.CODE,FontStyleElementHandler.INSTANCE_CODE)83 		ELEMENT_HANDLERS.put(HTMLElementName.CODE,FontStyleElementHandler.INSTANCE_CODE);
ELEMENT_HANDLERS.put(HTMLElementName.DD,StandardBlockElementHandler.INSTANCE_0_0_INDENT)84 		ELEMENT_HANDLERS.put(HTMLElementName.DD,StandardBlockElementHandler.INSTANCE_0_0_INDENT);
ELEMENT_HANDLERS.put(HTMLElementName.DIR,ListElementHandler.INSTANCE_UL)85 		ELEMENT_HANDLERS.put(HTMLElementName.DIR,ListElementHandler.INSTANCE_UL);
ELEMENT_HANDLERS.put(HTMLElementName.DIV,StandardBlockElementHandler.INSTANCE_0_0)86 		ELEMENT_HANDLERS.put(HTMLElementName.DIV,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.DT,StandardBlockElementHandler.INSTANCE_0_0)87 		ELEMENT_HANDLERS.put(HTMLElementName.DT,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.EM,FontStyleElementHandler.INSTANCE_I)88 		ELEMENT_HANDLERS.put(HTMLElementName.EM,FontStyleElementHandler.INSTANCE_I);
ELEMENT_HANDLERS.put(HTMLElementName.FIELDSET,StandardBlockElementHandler.INSTANCE_1_1)89 		ELEMENT_HANDLERS.put(HTMLElementName.FIELDSET,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.FORM,StandardBlockElementHandler.INSTANCE_1_1)90 		ELEMENT_HANDLERS.put(HTMLElementName.FORM,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.H1,StandardBlockElementHandler.INSTANCE_2_1)91 		ELEMENT_HANDLERS.put(HTMLElementName.H1,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H2,StandardBlockElementHandler.INSTANCE_2_1)92 		ELEMENT_HANDLERS.put(HTMLElementName.H2,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H3,StandardBlockElementHandler.INSTANCE_2_1)93 		ELEMENT_HANDLERS.put(HTMLElementName.H3,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H4,StandardBlockElementHandler.INSTANCE_2_1)94 		ELEMENT_HANDLERS.put(HTMLElementName.H4,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H5,StandardBlockElementHandler.INSTANCE_2_1)95 		ELEMENT_HANDLERS.put(HTMLElementName.H5,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H6,StandardBlockElementHandler.INSTANCE_2_1)96 		ELEMENT_HANDLERS.put(HTMLElementName.H6,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.HEAD,RemoveElementHandler.INSTANCE)97 		ELEMENT_HANDLERS.put(HTMLElementName.HEAD,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.HR,HR_ElementHandler.INSTANCE)98 		ELEMENT_HANDLERS.put(HTMLElementName.HR,HR_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.I,FontStyleElementHandler.INSTANCE_I)99 		ELEMENT_HANDLERS.put(HTMLElementName.I,FontStyleElementHandler.INSTANCE_I);
ELEMENT_HANDLERS.put(HTMLElementName.IMG,AlternateTextElementHandler.INSTANCE)100 		ELEMENT_HANDLERS.put(HTMLElementName.IMG,AlternateTextElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.INPUT,AlternateTextElementHandler.INSTANCE)101 		ELEMENT_HANDLERS.put(HTMLElementName.INPUT,AlternateTextElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.LEGEND,StandardBlockElementHandler.INSTANCE_0_0)102 		ELEMENT_HANDLERS.put(HTMLElementName.LEGEND,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.LI,LI_ElementHandler.INSTANCE)103 		ELEMENT_HANDLERS.put(HTMLElementName.LI,LI_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.MENU,ListElementHandler.INSTANCE_UL)104 		ELEMENT_HANDLERS.put(HTMLElementName.MENU,ListElementHandler.INSTANCE_UL);
ELEMENT_HANDLERS.put(HTMLElementName.MAP,RemoveElementHandler.INSTANCE)105 		ELEMENT_HANDLERS.put(HTMLElementName.MAP,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.NOFRAMES,RemoveElementHandler.INSTANCE)106 		ELEMENT_HANDLERS.put(HTMLElementName.NOFRAMES,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.NOSCRIPT,RemoveElementHandler.INSTANCE)107 		ELEMENT_HANDLERS.put(HTMLElementName.NOSCRIPT,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.OL,ListElementHandler.INSTANCE_OL)108 		ELEMENT_HANDLERS.put(HTMLElementName.OL,ListElementHandler.INSTANCE_OL);
ELEMENT_HANDLERS.put(HTMLElementName.P,StandardBlockElementHandler.INSTANCE_1_1)109 		ELEMENT_HANDLERS.put(HTMLElementName.P,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.PRE,PRE_ElementHandler.INSTANCE)110 		ELEMENT_HANDLERS.put(HTMLElementName.PRE,PRE_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.SCRIPT,RemoveElementHandler.INSTANCE)111 		ELEMENT_HANDLERS.put(HTMLElementName.SCRIPT,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.SELECT,RemoveElementHandler.INSTANCE)112 		ELEMENT_HANDLERS.put(HTMLElementName.SELECT,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.STRONG,FontStyleElementHandler.INSTANCE_B)113 		ELEMENT_HANDLERS.put(HTMLElementName.STRONG,FontStyleElementHandler.INSTANCE_B);
ELEMENT_HANDLERS.put(HTMLElementName.STYLE,RemoveElementHandler.INSTANCE)114 		ELEMENT_HANDLERS.put(HTMLElementName.STYLE,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TEXTAREA,RemoveElementHandler.INSTANCE)115 		ELEMENT_HANDLERS.put(HTMLElementName.TEXTAREA,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TD,TD_ElementHandler.INSTANCE)116 		ELEMENT_HANDLERS.put(HTMLElementName.TD,TD_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TH,TD_ElementHandler.INSTANCE)117 		ELEMENT_HANDLERS.put(HTMLElementName.TH,TD_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TR,StandardBlockElementHandler.INSTANCE_0_0)118 		ELEMENT_HANDLERS.put(HTMLElementName.TR,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.U,FontStyleElementHandler.INSTANCE_U)119 		ELEMENT_HANDLERS.put(HTMLElementName.U,FontStyleElementHandler.INSTANCE_U);
ELEMENT_HANDLERS.put(HTMLElementName.UL,ListElementHandler.INSTANCE_UL)120 		ELEMENT_HANDLERS.put(HTMLElementName.UL,ListElementHandler.INSTANCE_UL);
121 	}
122 
123 	/**
124 	 * Constructs a new <code>Renderer</code> based on the specified {@link Segment}.
125 	 * @param segment  the segment containing the HTML to be rendered.
126 	 * @see Segment#getRenderer()
127 	 */
Renderer(final Segment segment)128 	public Renderer(final Segment segment) {
129 		rootSegment=segment;
130 	}
131 
132 	// Documentation inherited from CharStreamSource
writeTo(final Writer writer)133 	public void writeTo(final Writer writer) throws IOException {
134 		appendTo(writer);
135 		writer.flush();
136 	}
137 
138 	// Documentation inherited from CharStreamSource
appendTo(final Appendable appendable)139 	public void appendTo(final Appendable appendable) throws IOException {
140 		new Processor(this,rootSegment,getMaxLineLength(),getNewLine(),getIncludeHyperlinkURLs(),getIncludeAlternateText(),getDecorateFontStyles(),getConvertNonBreakingSpaces(),getBlockIndentSize(),getListIndentSize(),getListBullets(),getTableCellSeparator()).appendTo(appendable);
141 	}
142 
143 	// Documentation inherited from CharStreamSource
getEstimatedMaximumOutputLength()144 	public long getEstimatedMaximumOutputLength() {
145 		return rootSegment.length();
146 	}
147 
148 	// Documentation inherited from CharStreamSource
toString()149 	public String toString() {
150 		return CharStreamSourceUtil.toString(this);
151 	}
152 
153 	/**
154 	 * Sets the column at which lines are to be wrapped.
155 	 * <p>
156 	 * Lines that would otherwise exceed this length are wrapped onto a new line at a word boundary.
157 	 * <p>
158 	 * A Line may still exceed this length if it consists of a single word, where the length of the word plus the line indent exceeds the maximum length.
159 	 * In this case the line is wrapped immediately after the end of the word.
160 	 * <p>
161 	 * The default value is <code>76</code>, which reflects the maximum line length for sending
162 	 * email data specified in <a target="_blank" href="http://rfc.net/rfc2049.html#s3.">RFC2049 section 3.5</a>.
163 	 *
164 	 * @param maxLineLength  the column at which lines are to be wrapped.
165 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
166 	 * @see #getMaxLineLength()
167 	 */
setMaxLineLength(final int maxLineLength)168 	public Renderer setMaxLineLength(final int maxLineLength) {
169 		this.maxLineLength=maxLineLength;
170 		return this;
171 	}
172 
173 	/**
174 	 * Returns the column at which lines are to be wrapped.
175 	 * <p>
176 	 * See the {@link #setMaxLineLength(int)} method for a full description of this property.
177 	 *
178 	 * @return the column at which lines are to be wrapped.
179 	 */
getMaxLineLength()180 	public int getMaxLineLength() {
181 		return maxLineLength;
182 	}
183 
184 	/**
185 	 * Sets the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
186 	 * <p>
187 	 * The default value is <code>"\r\n"</code> <span title="carriage return + line feed">(CR+LF)</span> regardless of the platform on which the library is running.
188 	 * This is so that the default configuration produces valid
189 	 * <a target="_blank" href="http://tools.ietf.org/html/rfc1521#section-7.1.2">MIME plain/text</a> output, which mandates the use of CR+LF for line breaks.
190 	 * <p>
191 	 * Specifying a <code>null</code> argument causes the output to use same new line string as is used in the source document, which is
192 	 * determined via the {@link Source#getNewLine()} method.
193 	 * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document,
194 	 * or using the value from the static {@link Config#NewLine} property.
195 	 *
196 	 * @param newLine  the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output, may be <code>null</code>.
197 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
198 	 * @see #getNewLine()
199 	 */
setNewLine(final String newLine)200 	public Renderer setNewLine(final String newLine) {
201 		this.newLine=newLine;
202 		return this;
203 	}
204 
205 	/**
206 	 * Returns the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
207 	 * <p>
208 	 * See the {@link #setNewLine(String)} method for a full description of this property.
209 	 *
210 	 * @return the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
211 	 */
getNewLine()212 	public String getNewLine() {
213 		if (newLine==null) newLine=rootSegment.source.getBestGuessNewLine();
214 		return newLine;
215 	}
216 
217 	/**
218 	 * Sets whether hyperlink URLs are included in the output.
219 	 * <p>
220 	 * The default value is <code>true</code>.
221 	 * <p>
222 	 * When this property is <code>true</code>, the URL of each hyperlink is included in the output as determined by the implementation of the
223 	 * {@link #renderHyperlinkURL(StartTag)} method.
224 	 * <p>
225 	 * <dl>
226 	 *  <dt>Example:</dt>
227 	 *  <dd>
228 	 *   <p>
229 	 *   Assuming the default implementation of {@link #renderHyperlinkURL(StartTag)}, when this property is <code>true</code>, the following HTML:
230 	 *   <blockquote class="code">
231 	 *    <code>&lt;a href="http://jericho.htmlparser.net/"&gt;Jericho HTML Parser&lt;/a&gt;</code>
232 	 *   </blockquote>
233 	 *   produces the following output:
234 	 *   <blockquote class="code">
235 	 *    <code>Jericho HTML Parser &lt;http://jericho.htmlparser.net/&gt;</code>
236 	 *   </blockquote>
237 	 *  </dd>
238 	 * </dl>
239 	 *
240 	 * @param includeHyperlinkURLs  specifies whether hyperlink URLs are included in the output.
241 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
242 	 * @see #getIncludeHyperlinkURLs()
243 	 */
setIncludeHyperlinkURLs(final boolean includeHyperlinkURLs)244 	public Renderer setIncludeHyperlinkURLs(final boolean includeHyperlinkURLs) {
245 		this.includeHyperlinkURLs=includeHyperlinkURLs;
246 		return this;
247 	}
248 
249 	/**
250 	 * Indicates whether hyperlink URLs are included in the output.
251 	 * <p>
252 	 * See the {@link #setIncludeHyperlinkURLs(boolean)} method for a full description of this property.
253 	 *
254 	 * @return <code>true</code> if hyperlink URLs are included in the output, otherwise <code>false</code>.
255 	 */
getIncludeHyperlinkURLs()256 	public boolean getIncludeHyperlinkURLs() {
257 		return includeHyperlinkURLs;
258 	}
259 
260 	/**
261 	 * Renders the hyperlink URL from the specified {@link StartTag}.
262 	 * <p>
263 	 * A return value of <code>null</code> indicates that the hyperlink URL should not be rendered at all.
264 	 * <p>
265 	 * The default implementation of this method returns <code>null</code> if the <code>href</code> attribute of the specified start tag
266 	 * is '<code>#</code>', starts with "<code>javascript:</code>", or is missing.
267 	 * In all other cases it returns the value of the <code>href</code> attribute enclosed in angle brackets.
268 	 * <p>
269 	 * See the documentation of the {@link #setIncludeHyperlinkURLs(boolean)} method for an example of how a hyperlink is rendered by the default implementation.
270 	 * <p>
271 	 * This method can be overridden in a subclass to customise the rendering of hyperlink URLs.
272 	 * <p>
273 	 * Rendering of hyperlink URLs can be disabled completely without overriding this method by setting the
274 	 * {@link #setIncludeHyperlinkURLs(boolean) IncludeHyperlinkURLs} property to <code>false</code>.
275 	 * <p>
276 	 * <dl>
277 	 *  <dt>Example:</dt>
278 	 *  <dd>
279 	 *   To render hyperlink URLs without the enclosing angle brackets:<br /><br />
280 	 *   <code>
281 	 *    Renderer renderer=new Renderer(segment) {<br />
282 	 *    &nbsp; &nbsp; public String renderHyperlinkURL(StartTag startTag) {<br />
283 	 *    &nbsp; &nbsp; &nbsp; &nbsp; String href=startTag.getAttributeValue("href");<br />
284 	 *    &nbsp; &nbsp; &nbsp; &nbsp; if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;<br />
285 	 *    &nbsp; &nbsp; &nbsp; &nbsp; return href;<br />
286 	 *    &nbsp; &nbsp; }<br />
287 	 *    };<br />
288 	 *    String renderedSegment=renderer.toString();
289 	 *   </code>
290 	 *  </dd>
291 	 * </dl>
292 	 * @param startTag  the start tag of the hyperlink element, must not be <code>null</code>.
293 	 * @return The rendered hyperlink URL from the specified {@link StartTag}, or <code>null</code> if the hyperlink URL should not be rendered.
294 	 */
renderHyperlinkURL(final StartTag startTag)295 	public String renderHyperlinkURL(final StartTag startTag) {
296 		final String href=startTag.getAttributeValue("href");
297 		if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
298 		return '<'+href+'>';
299 	}
300 
301 	/**
302 	 * Sets whether the alternate text of a tag that has an <code>alt</code> attribute is included in the output.
303 	 * <p>
304 	 * The default value is <code>true</code>.
305 	 * Note that this is not conistent with common email clients such as Mozilla Thunderbird which do not render alternate text at all,
306 	 * even when a tag specifies alternate text.
307 	 * <p>
308 	 * When this property is <code>true</code>, the alternate text is included in the output as determined by the implementation of the
309 	 * {@link #renderAlternateText(StartTag)} method.
310 	 * <p>
311 	 * <dl>
312 	 *  <dt>Example:</dt>
313 	 *  <dd>
314 	 *   <p>
315 	 *   Assuming the default implementation of {@link #renderAlternateText(StartTag)}, when this property is <code>true</code>, the following HTML:
316 	 *   <blockquote class="code">
317 	 *    <code>&lt;img src="smiley.png" alt="smiley face" /&gt;</code>
318 	 *   </blockquote>
319 	 *   produces the following output:
320 	 *   <blockquote class="code">
321 	 *    <code>[smiley face]</code>
322 	 *   </blockquote>
323 	 *  </dd>
324 	 * </dl>
325 	 *
326 	 * @param includeAlternateText  specifies whether the alternate text of a tag that has an <code>alt</code> attribute is included in the output.
327 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
328 	 * @see #getIncludeAlternateText()
329 	 */
setIncludeAlternateText(final boolean includeAlternateText)330 	public Renderer setIncludeAlternateText(final boolean includeAlternateText) {
331 		this.includeAlternateText=includeAlternateText;
332 		return this;
333 	}
334 
335 	/**
336 	 * Indicates whether the alternate text of a tag that has an <code>alt</code> attribute is included in the output.
337 	 * <p>
338 	 * See the {@link #setIncludeAlternateText(boolean)} method for a full description of this property.
339 	 *
340 	 * @return <code>true</code> if the alternate text of a tag that has an <code>alt</code> attribute is included in the output, otherwise <code>false</code>.
341 	 */
getIncludeAlternateText()342 	public boolean getIncludeAlternateText() {
343 		return includeAlternateText;
344 	}
345 
346 	/**
347 	 * Renders the alternate text of the specified start tag.
348 	 * <p>
349 	 * A return value of <code>null</code> indicates that the alternate text is not to be rendered at all.
350 	 * <p>
351 	 * The default implementation of this method returns <code>null</code> if the <code>alt</code> attribute of the specified start tag is missing or empty, or if the
352 	 * specified start tag is from an {@link HTMLElementName#AREA AREA} element.
353 	 * In all other cases it returns the value of the <code>alt</code> attribute enclosed in square brackets <code>[&hellip;]</code>.
354 	 * <p>
355 	 * See the documentation of the {@link #setIncludeAlternateText(boolean)} method for an example of how alternate text is rendered by the default implementation.
356 	 * <p>
357 	 * This method can be overridden in a subclass to customise the rendering of alternate text.
358 	 * <p>
359 	 * Rendering of alternate text can be disabled completely without overriding this method by setting the
360 	 * {@link #setIncludeAlternateText(boolean) IncludeAlternateText} property to <code>false</code>.
361 	 * <p>
362 	 * <dl>
363 	 *  <dt>Example:</dt>
364 	 *  <dd>
365 	 *   To render alternate text with double angle quotation marks instead of square brackets:<br /><br />
366 	 *   <code>
367 	 *    Renderer renderer=new Renderer(segment) {<br />
368 	 *    &nbsp; &nbsp; public String renderAlternateText(StartTag startTag) {<br />
369 	 *    &nbsp; &nbsp; &nbsp; &nbsp; if (startTag.getName()==HTMLElementName.AREA) return null;
370 	 *    &nbsp; &nbsp; &nbsp; &nbsp; String alt=startTag.getAttributeValue("alt");<br />
371 	 *    &nbsp; &nbsp; &nbsp; &nbsp; if (alt==null || alt.length()==0) return null;<br />
372 	 *    &nbsp; &nbsp; &nbsp; &nbsp; return '�'+alt+'�';<br />
373 	 *    &nbsp; &nbsp; }<br />
374 	 *    };<br />
375 	 *    String renderedSegment=renderer.toString();
376 	 *   </code>
377 	 *  </dd>
378 	 * </dl>
379 	 * @param startTag  the start tag containing an <code>alt</code> attribute, must not be <code>null</code>.
380 	 * @return The rendered alternate text, or <code>null</code> if the alternate text should not be rendered.
381 	 */
renderAlternateText(final StartTag startTag)382 	public String renderAlternateText(final StartTag startTag) {
383 		if (startTag.getName()==HTMLElementName.AREA) return null;
384 		final String alt=startTag.getAttributeValue("alt");
385 		if (alt==null || alt.length()==0) return null;
386 		return '['+alt+']';
387 	}
388 
389 	/**
390 	 * Sets whether decoration characters are to be included around the content of some
391 	 * <a target="_blank" href="http://www.w3.org/TR/html401/present/graphics.html#h-15.2.1">font style elements</a> and
392 	 * <a target="_blank" href="http://www.w3.org/TR/html401/struct/text.html#h-9.2.1">phrase elements</a>.
393 	 * <p>
394 	 * The default value is <code>false</code>.
395 	 * <p>
396 	 * Below is a table summarising the decorated elements.
397 	 * <p>
398 	 * <style type="text/css">
399 	 *  table#FontStyleElementSummary td, table#FontStyleElementSummary th {text-align: center; padding-bottom: 2px}
400 	 * </style>
401 	 * <table id="FontStyleElementSummary" class="bordered" cellspacing="0">
402 	 *  <tr><th title="HTML elements decorated">Elements</th><th title="The character placed around the element content">Character</th><th>Example Output</th></tr>
403 	 *  <tr><td>{@link HTMLElementName#B B} and {@link HTMLElementName#STRONG STRONG}</td><td><code>*</code></td><td><code>*bold text*</code></td></tr>
404 	 *  <tr><td>{@link HTMLElementName#I I} and {@link HTMLElementName#EM EM}</td><td><code>/</code></td><td><code>/italic text/</code></td></tr>
405 	 *  <tr><td>{@link HTMLElementName#U U}</td><td><code>_</code></td><td><code>_underlined text_</code></td></tr>
406 	 *  <tr><td>{@link HTMLElementName#CODE CODE}</td><td><code>|</code></td><td><code>|code|</code></td></tr>
407 	 * </table>
408 	 *
409 	 * @param decorateFontStyles  specifies whether decoration characters are to be included around the content of some font style elements.
410 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
411 	 * @see #getDecorateFontStyles()
412 	 */
setDecorateFontStyles(final boolean decorateFontStyles)413 	public Renderer setDecorateFontStyles(final boolean decorateFontStyles) {
414 		this.decorateFontStyles=decorateFontStyles;
415 		return this;
416 	}
417 
418 	/**
419 	 * Indicates whether decoration characters are to be included around the content of some
420 	 * <a target="_blank" href="http://www.w3.org/TR/html401/present/graphics.html#h-15.2.1">font style elements</a> and
421 	 * <a target="_blank" href="http://www.w3.org/TR/html401/struct/text.html#h-9.2.1">phrase elements</a>.
422 	 * <p>
423 	 * See the {@link #setDecorateFontStyles(boolean)} method for a full description of this property.
424 	 *
425 	 * @return <code>true</code> if decoration characters are to be included around the content of some font style elements, otherwise <code>false</code>.
426 	 */
getDecorateFontStyles()427 	public boolean getDecorateFontStyles() {
428 		return decorateFontStyles;
429 	}
430 
431 	/**
432 	 * Sets whether non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;}) character entity references are converted to spaces.
433 	 * <p>
434 	 * The default value is that of the static {@link Config#ConvertNonBreakingSpaces} property at the time the <code>Renderer</code> is instantiated.
435 	 *
436 	 * @param convertNonBreakingSpaces  specifies whether non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;}) character entity references are converted to spaces.
437 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
438 	 * @see #getConvertNonBreakingSpaces()
439 	 */
setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)440 	public Renderer setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces) {
441 		this.convertNonBreakingSpaces=convertNonBreakingSpaces;
442 		return this;
443 	}
444 
445 	/**
446 	 * Indicates whether non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;}) character entity references are converted to spaces.
447 	 * <p>
448 	 * See the {@link #setConvertNonBreakingSpaces(boolean)} method for a full description of this property.
449 	 *
450 	 * @return <code>true</code> if non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;}) character entity references are converted to spaces, otherwise <code>false</code>.
451 	 */
getConvertNonBreakingSpaces()452 	public boolean getConvertNonBreakingSpaces() {
453 		return convertNonBreakingSpaces;
454 	}
455 
456 	/**
457 	 * Sets the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements.
458 	 * <p>
459 	 * At present this applies to {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE} and {@link HTMLElementName#DD DD} elements.
460 	 * <p>
461 	 * The default value is <code>4</code>.
462 	 *
463 	 * @param blockIndentSize  the size of the indent.
464 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
465 	 * @see #getBlockIndentSize()
466 	 */
setBlockIndentSize(final int blockIndentSize)467 	public Renderer setBlockIndentSize(final int blockIndentSize) {
468 		this.blockIndentSize=blockIndentSize;
469 		return this;
470 	}
471 
472 	/**
473 	 * Returns the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements.
474 	 * <p>
475 	 * See the {@link #setBlockIndentSize(int)} method for a full description of this property.
476 	 *
477 	 * @return the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements.
478 	 */
getBlockIndentSize()479 	public int getBlockIndentSize() {
480 		return blockIndentSize;
481 	}
482 
483 	/**
484 	 * Sets the size of the indent to be used for {@link HTMLElementName#LI LI} elements.
485 	 * <p>
486 	 * The default value is <code>6</code>.
487 	 * <p>
488 	 * This applies to {@link HTMLElementName#LI LI} elements inside both {@link HTMLElementName#UL UL} and {@link HTMLElementName#OL OL} elements.
489 	 * <p>
490 	 * The bullet or number of the list item is included as part of the indent.
491 	 *
492 	 * @param listIndentSize  the size of the indent.
493 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
494 	 * @see #getListIndentSize()
495 	 */
setListIndentSize(final int listIndentSize)496 	public Renderer setListIndentSize(final int listIndentSize) {
497 		this.listIndentSize=listIndentSize;
498 		return this;
499 	}
500 
501 	/**
502 	 * Returns the size of the indent to be used for {@link HTMLElementName#LI LI} elements.
503 	 * <p>
504 	 * See the {@link #setListIndentSize(int)} method for a full description of this property.
505 	 *
506 	 * @return the size of the indent to be used for {@link HTMLElementName#LI LI} elements.
507 	 */
getListIndentSize()508 	public int getListIndentSize() {
509 		return listIndentSize;
510 	}
511 
512 	/**
513 	 * Sets the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements.
514 	 * <p>
515 	 * The values in the default array are <code>*</code>, <code>o</code>, <code>+</code> and <code>#</code>.
516 	 * <p>
517 	 * If the nesting of rendered lists goes deeper than the length of this array, the bullet characters start repeating from the first in the array.
518 	 * <p>
519 	 * WARNING: If any of the characters in the default array are modified, this will affect all other instances of this class using the default array.
520 	 *
521 	 * @param listBullets  an array of characters to be used as bullets, must have at least one entry.
522 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
523 	 * @see #getListBullets()
524 	 */
setListBullets(final char[] listBullets)525 	public Renderer setListBullets(final char[] listBullets) {
526 		if (listBullets==null || listBullets.length==0) throw new IllegalArgumentException("listBullets argument must be an array of at least one character");
527 		this.listBullets=listBullets;
528 		return this;
529 	}
530 
531 	/**
532 	 * Returns the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements.
533 	 * <p>
534 	 * See the {@link #setListBullets(char[])} method for a full description of this property.
535 	 *
536 	 * @return the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements.
537 	 */
getListBullets()538 	public char[] getListBullets() {
539 		return listBullets;
540 	}
541 
542 	/**
543 	 * Sets whether the top margin of the first element is rendered.
544 	 * <p>
545 	 * The default value is <code>false</code>.
546 	 * <p>
547 	 * If this property is set to <code>true</code>, then the source "<code>&lt;h1&gt;Heading&lt;/h1&gt;</code>" would be rendered as "<code>\r\n\r\nHeading</code>",
548 	 * assuming all other default settings.
549 	 * If this property is <code>false</code>, then the same source would be rendered as "<code>Heading</code>".
550 	 * <p>
551 	 * Note that the bottom margin of the last element is never rendered.
552 	 *
553 	 * @param includeFirstElementTopMargin  specifies whether the top margin of the first element is rendered.
554 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
555 	 * @see #getIncludeFirstElementTopMargin()
556 	 */
setIncludeFirstElementTopMargin(final boolean includeFirstElementTopMargin)557 	public Renderer setIncludeFirstElementTopMargin(final boolean includeFirstElementTopMargin) {
558 		this.includeFirstElementTopMargin=includeFirstElementTopMargin;
559 		return this;
560 	}
561 
562 	/**
563 	 * Indicates whether the top margin of the first element is rendered.
564 	 * <p>
565 	 * See the {@link #setIncludeFirstElementTopMargin(boolean)} method for a full description of this property.
566 	 *
567 	 * @return <code>true</code> if the top margin of the first element is rendered, otherwise <code>false</code>.
568 	 */
getIncludeFirstElementTopMargin()569 	public boolean getIncludeFirstElementTopMargin() {
570 		return includeFirstElementTopMargin;
571 	}
572 
573 	/**
574 	 * Sets the string that is to separate table cells.
575 	 * <p>
576 	 * The default value is <code>" \t"</code> (a space followed by a tab).
577 	 *
578 	 * @param tableCellSeparator  the string that is to separate table cells.
579 	 * @return this <code>Renderer</code> instance, allowing multiple property setting methods to be chained in a single statement.
580 	 * @see #getTableCellSeparator()
581 	 */
setTableCellSeparator(final String tableCellSeparator)582 	public Renderer setTableCellSeparator(final String tableCellSeparator) {
583 		this.tableCellSeparator=tableCellSeparator;
584 		return this;
585 	}
586 
587 	/**
588 	 * Returns the string that is to separate table cells.
589 	 * <p>
590 	 * See the {@link #setTableCellSeparator(String)} method for a full description of this property.
591 	 *
592 	 * @return the string that is to separate table cells.
593 	 */
getTableCellSeparator()594 	public String getTableCellSeparator() {
595 		return tableCellSeparator;
596 	}
597 
598 	/**
599 	 * Sets the default top margin of an HTML block element with the specified name.
600 	 * <p>
601 	 * The top margin is the number of blank lines that are to be inserted above the rendered block.
602 	 * <p>
603 	 * As this is a static method, the setting affects all instances of the <code>Renderer</code> class.
604 	 * <p>
605 	 * The <code>htmlElementName</code> argument must be one of the following:<br />
606 	 * {@link HTMLElementName#ADDRESS ADDRESS},
607 	 * {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE},
608 	 * {@link HTMLElementName#CAPTION CAPTION},
609 	 * {@link HTMLElementName#CENTER CENTER},
610 	 * {@link HTMLElementName#DD DD},
611 	 * {@link HTMLElementName#DIR DIR},
612 	 * {@link HTMLElementName#DIV DIV},
613 	 * {@link HTMLElementName#DT DT},
614 	 * {@link HTMLElementName#FIELDSET FIELDSET},
615 	 * {@link HTMLElementName#FORM FORM},
616 	 * {@link HTMLElementName#H1 H1},
617 	 * {@link HTMLElementName#H2 H2},
618 	 * {@link HTMLElementName#H3 H3},
619 	 * {@link HTMLElementName#H4 H4},
620 	 * {@link HTMLElementName#H5 H5},
621 	 * {@link HTMLElementName#H6 H6},
622 	 * {@link HTMLElementName#HR HR},
623 	 * {@link HTMLElementName#LEGEND LEGEND},
624 	 * {@link HTMLElementName#LI LI},
625 	 * {@link HTMLElementName#MENU MENU},
626 	 * {@link HTMLElementName#OL OL},
627 	 * {@link HTMLElementName#P P},
628 	 * {@link HTMLElementName#PRE PRE},
629 	 * {@link HTMLElementName#TR TR},
630 	 * {@link HTMLElementName#UL UL}
631 	 *
632 	 * @param htmlElementName  (required) the case insensitive name of a supported HTML block element.
633 	 * @param topMargin  the new top margin of the specified element.
634 	 * @throws UnsupportedOperationException if an unsupported element name is specified.
635 	 */
setDefaultTopMargin(String htmlElementName, final int topMargin)636 	public static void setDefaultTopMargin(String htmlElementName, final int topMargin) {
637 		htmlElementName=HTMLElements.getConstantElementName(htmlElementName.toLowerCase());
638 		ELEMENT_HANDLERS.put(htmlElementName,getAbstractBlockElementHandler(htmlElementName).newTopMargin(topMargin));
639 	}
640 
641 	/**
642 	 * Returns the default top margin of an HTML block element with the specified name.
643 	 * <p>
644 	 * See the {@link #setDefaultTopMargin(String htmlElementName, int topMargin)} method for a full description of this property.
645 	 *
646 	 * @param htmlElementName  (required) the case insensitive name of a supported HTML block element.
647 	 * @return the default top margin of an HTML block element with the specified name.
648 	 * @throws UnsupportedOperationException if an unsupported element name is specified.
649 	 */
getDefaultTopMargin(final String htmlElementName)650 	public static int getDefaultTopMargin(final String htmlElementName) {
651 		return getAbstractBlockElementHandler(htmlElementName.toLowerCase()).getTopMargin();
652 	}
653 
654 	/**
655 	 * Sets the default bottom margin of an HTML block element with the specified name.
656 	 * <p>
657 	 * The bottom margin is the number of blank lines that are to be inserted below the rendered block.
658 	 * <p>
659 	 * As this is a static method, the setting affects all instances of the <code>Renderer</code> class.
660 	 * <p>
661 	 * The <code>htmlElementName</code> argument must be one of the following:<br />
662 	 * {@link HTMLElementName#ADDRESS ADDRESS},
663 	 * {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE},
664 	 * {@link HTMLElementName#CAPTION CAPTION},
665 	 * {@link HTMLElementName#CENTER CENTER},
666 	 * {@link HTMLElementName#DD DD},
667 	 * {@link HTMLElementName#DIR DIR},
668 	 * {@link HTMLElementName#DIV DIV},
669 	 * {@link HTMLElementName#DT DT},
670 	 * {@link HTMLElementName#FIELDSET FIELDSET},
671 	 * {@link HTMLElementName#FORM FORM},
672 	 * {@link HTMLElementName#H1 H1},
673 	 * {@link HTMLElementName#H2 H2},
674 	 * {@link HTMLElementName#H3 H3},
675 	 * {@link HTMLElementName#H4 H4},
676 	 * {@link HTMLElementName#H5 H5},
677 	 * {@link HTMLElementName#H6 H6},
678 	 * {@link HTMLElementName#HR HR},
679 	 * {@link HTMLElementName#LEGEND LEGEND},
680 	 * {@link HTMLElementName#LI LI},
681 	 * {@link HTMLElementName#MENU MENU},
682 	 * {@link HTMLElementName#OL OL},
683 	 * {@link HTMLElementName#P P},
684 	 * {@link HTMLElementName#PRE PRE},
685 	 * {@link HTMLElementName#TR TR},
686 	 * {@link HTMLElementName#UL UL}
687 	 *
688 	 * @param htmlElementName  (required) the case insensitive name of a supported HTML block element.
689 	 * @param bottomMargin  the new bottom margin of the specified element.
690 	 * @throws UnsupportedOperationException if an unsupported element name is specified.
691 	 */
setDefaultBottomMargin(String htmlElementName, final int bottomMargin)692 	public static void setDefaultBottomMargin(String htmlElementName, final int bottomMargin) {
693 		htmlElementName=HTMLElements.getConstantElementName(htmlElementName.toLowerCase());
694 		ELEMENT_HANDLERS.put(htmlElementName,getAbstractBlockElementHandler(htmlElementName).newBottomMargin(bottomMargin));
695 	}
696 
697 	/**
698 	 * Returns the default bottom margin of an HTML block element with the specified name.
699 	 * <p>
700 	 * See the {@link #setDefaultBottomMargin(String htmlElementName, int bottomMargin)} method for a full description of this property.
701 	 *
702 	 * @param htmlElementName  (required) the case insensitive name of a supported HTML block element.
703 	 * @return the default bottom margin of an HTML block element with the specified name.
704 	 * @throws UnsupportedOperationException if an unsupported element name is specified.
705 	 */
getDefaultBottomMargin(final String htmlElementName)706 	public static int getDefaultBottomMargin(final String htmlElementName) {
707 		return getAbstractBlockElementHandler(htmlElementName.toLowerCase()).getBottomMargin();
708 	}
709 
710 	/**
711 	 * Sets the default value of whether an HTML block element of the specified name is indented.
712 	 * <p>
713 	 * As this is a static method, the setting affects all instances of the <code>Renderer</code> class.
714 	 * <p>
715 	 * The <code>htmlElementName</code> argument must be one of the following:<br />
716 	 * {@link HTMLElementName#ADDRESS ADDRESS},
717 	 * {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE},
718 	 * {@link HTMLElementName#CAPTION CAPTION},
719 	 * {@link HTMLElementName#CENTER CENTER},
720 	 * {@link HTMLElementName#DD DD},
721 	 * {@link HTMLElementName#DIR DIR},
722 	 * {@link HTMLElementName#DIV DIV},
723 	 * {@link HTMLElementName#DT DT},
724 	 * {@link HTMLElementName#FIELDSET FIELDSET},
725 	 * {@link HTMLElementName#FORM FORM},
726 	 * {@link HTMLElementName#H1 H1},
727 	 * {@link HTMLElementName#H2 H2},
728 	 * {@link HTMLElementName#H3 H3},
729 	 * {@link HTMLElementName#H4 H4},
730 	 * {@link HTMLElementName#H5 H5},
731 	 * {@link HTMLElementName#H6 H6},
732 	 * {@link HTMLElementName#HR HR},
733 	 * {@link HTMLElementName#LEGEND LEGEND},
734 	 * {@link HTMLElementName#MENU MENU},
735 	 * {@link HTMLElementName#OL OL},
736 	 * {@link HTMLElementName#P P},
737 	 * {@link HTMLElementName#PRE PRE},
738 	 * {@link HTMLElementName#TR TR},
739 	 * {@link HTMLElementName#UL UL}
740 	 *
741 	 * @param htmlElementName  (required) the case insensitive name of a supported HTML block element.
742 	 * @param indent  whether the the specified element is indented.
743 	 * @throws UnsupportedOperationException if an unsupported element name is specified.
744 	 */
setDefaultIndent(String htmlElementName, final boolean indent)745 	public static void setDefaultIndent(String htmlElementName, final boolean indent) {
746 		htmlElementName=HTMLElements.getConstantElementName(htmlElementName.toLowerCase());
747 		if (htmlElementName==HTMLElementName.LI) throw new UnsupportedOperationException();
748 		ELEMENT_HANDLERS.put(htmlElementName,getAbstractBlockElementHandler(htmlElementName).newIndent(indent));
749 	}
750 
751 	/**
752 	 * Returns the default value of whether an HTML block element of the specified name is indented.
753 	 * <p>
754 	 * See the {@link #setDefaultIndent(String htmlElementName, boolean indent)} method for a full description of this property.
755 	 *
756 	 * @param htmlElementName  (required) the case insensitive name of a supported HTML block element.
757 	 * @return the default value of whether an HTML block element of the specified name is indented.
758 	 * @throws UnsupportedOperationException if an unsupported element name is specified.
759 	 */
isDefaultIndent(String htmlElementName)760 	public static boolean isDefaultIndent(String htmlElementName) {
761 		htmlElementName=HTMLElements.getConstantElementName(htmlElementName.toLowerCase());
762 		if (htmlElementName==HTMLElementName.LI) throw new UnsupportedOperationException();
763 		return getAbstractBlockElementHandler(htmlElementName.toLowerCase()).isIndent();
764 	}
765 
getAbstractBlockElementHandler(String htmlElementName)766 	private static AbstractBlockElementHandler getAbstractBlockElementHandler(String htmlElementName) {
767 		ElementHandler elementHandler=ELEMENT_HANDLERS.get(htmlElementName);
768 		if (elementHandler==null || !(elementHandler instanceof AbstractBlockElementHandler)) throw new UnsupportedOperationException("Cannot set block properties on element "+htmlElementName);
769 		return (AbstractBlockElementHandler)elementHandler;
770 	}
771 
772 	/** This class does the actual work, but is first passed final copies of all the parameters for efficiency. */
773 	private static final class Processor {
774 		private final Renderer renderer;
775 		private final Segment rootSegment;
776 		private final Source source;
777 		private final int maxLineLength;
778 		private final String newLine;
779 		private final boolean includeHyperlinkURLs;
780 		private final boolean includeAlternateText;
781 		private final boolean decorateFontStyles;
782 		private final boolean convertNonBreakingSpaces;
783 		private final int blockIndentSize;
784 		private final int listIndentSize;
785 		private final char[] listBullets;
786 		private final String tableCellSeparator;
787 
788 		private Appendable appendable;
789 		private int renderedIndex; // keeps track of where rendering is up to in case of overlapping elements
790 		private boolean atStartOfLine;
791 		private boolean skipInitialNewLines;
792 		private int col;
793 		private int listIndentLevel;
794 		private int indentSize;
795 		private int blockVerticalMargin; // minimum number of blank lines to output at the current block boundary, or NO_MARGIN (-1) if we are not currently at a block boundary.
796 		private boolean preformatted;
797 		private boolean lastCharWhiteSpace;
798 		private final boolean ignoreInitialWhiteSpace=false; // can remove this at some stage once we're sure it won't be used.
799 		private boolean bullet;
800 		private int listBulletNumber;
801 
802 		private static final int NO_MARGIN=-1;
803 
Processor(final Renderer renderer, final Segment rootSegment, final int maxLineLength, final String newLine, final boolean includeHyperlinkURLs, final boolean includeAlternateText, final boolean decorateFontStyles, final boolean convertNonBreakingSpaces, final int blockIndentSize, final int listIndentSize, final char[] listBullets, final String tableCellSeparator)804 		public Processor(final Renderer renderer, final Segment rootSegment, final int maxLineLength, final String newLine, final boolean includeHyperlinkURLs, final boolean includeAlternateText, final boolean decorateFontStyles, final boolean convertNonBreakingSpaces, final int blockIndentSize, final int listIndentSize, final char[] listBullets, final String tableCellSeparator) {
805 			this.renderer=renderer;
806 			this.rootSegment=rootSegment;
807 			source=rootSegment.source;
808 			this.maxLineLength=maxLineLength;
809 			this.newLine=newLine;
810 			this.includeHyperlinkURLs=includeHyperlinkURLs;
811 			this.includeAlternateText=includeAlternateText;
812 			this.decorateFontStyles=decorateFontStyles;
813 			this.convertNonBreakingSpaces=convertNonBreakingSpaces;
814 			this.blockIndentSize=blockIndentSize;
815 			this.listIndentSize=listIndentSize;
816 			this.listBullets=listBullets;
817 			this.tableCellSeparator=tableCellSeparator;
818 		}
819 
appendTo(final Appendable appendable)820 		public void appendTo(final Appendable appendable) throws IOException {
821 			reset();
822 			this.appendable=appendable;
823 			appendSegmentProcessingChildElements(rootSegment.begin,rootSegment.end,rootSegment.getChildElements());
824 		}
825 
reset()826 		private void reset() {
827 			renderedIndex=0;
828 			atStartOfLine=true;
829 			skipInitialNewLines=!renderer.includeFirstElementTopMargin;
830 			col=0;
831 			listIndentLevel=0;
832 			indentSize=0;
833 			blockVerticalMargin=NO_MARGIN;
834 			preformatted=false;
835 			lastCharWhiteSpace=false;
836 			//ignoreInitialWhiteSpace=false;
837 			bullet=false;
838 		}
839 
appendElementContent(final Element element)840 		private void appendElementContent(final Element element) throws IOException {
841 			final int contentEnd=element.getContentEnd();
842 			if (element.isEmpty() || renderedIndex>=contentEnd) return;
843 			final int contentBegin=element.getStartTag().end;
844 			appendSegmentProcessingChildElements(Math.max(renderedIndex,contentBegin),contentEnd,element.getChildElements());
845 		}
846 
appendSegmentProcessingChildElements(final int begin, final int end, final List<Element> childElements)847 		private void appendSegmentProcessingChildElements(final int begin, final int end, final List<Element> childElements) throws IOException {
848 			int index=begin;
849 			for (Element childElement : childElements) {
850 				if (index>=childElement.end) continue;
851 				if (index<childElement.begin) appendSegmentRemovingTags(index,childElement.begin);
852 				getElementHandler(childElement).process(this,childElement);
853 				index=Math.max(renderedIndex,childElement.end);
854 			}
855 			if (index<end) appendSegmentRemovingTags(index,end);
856 		}
857 
getElementHandler(final Element element)858 		private static ElementHandler getElementHandler(final Element element) {
859 			if (element.getStartTag().getStartTagType().isServerTag()) return RemoveElementHandler.INSTANCE; // hard-coded configuration does not include server tags in child element hierarchy, so this is normally not executed.
860 			ElementHandler elementHandler=ELEMENT_HANDLERS.get(element.getName());
861 			return (elementHandler!=null) ? elementHandler : StandardInlineElementHandler.INSTANCE;
862 		}
863 
appendSegmentRemovingTags(final int begin, final int end)864 		private void appendSegmentRemovingTags(final int begin, final int end) throws IOException {
865 			int index=begin;
866 			while (true) {
867 				Tag tag=source.getNextTag(index);
868 				if (tag==null || tag.begin>=end) break;
869 				appendSegment(index,tag.begin);
870 				index=tag.end;
871 			}
872 			appendSegment(index,end);
873 		}
874 
appendSegment(int begin, final int end)875 		private void appendSegment(int begin, final int end) throws IOException {
876  			assert begin<=end;
877 			if (begin<renderedIndex) begin=renderedIndex;
878 			if (begin>=end) return;
879 			try {
880 				if (preformatted)
881 					appendPreformattedSegment(begin,end);
882 				else
883 					appendNonPreformattedSegment(begin,end);
884 			} finally {
885 				if (renderedIndex<end) renderedIndex=end;
886 			}
887 		}
888 
appendPreformattedSegment(final int begin, final int end)889 		private void appendPreformattedSegment(final int begin, final int end) throws IOException {
890 			assert begin<end;
891 			assert begin>=renderedIndex;
892 			if (isBlockBoundary()) appendBlockVerticalMargin();
893 			final String text=CharacterReference.decode(source.subSequence(begin,end),false,convertNonBreakingSpaces);
894 			for (int i=0; i<text.length(); i++) {
895 				final char ch=text.charAt(i);
896 				if (ch=='\n') {
897 					newLine();
898 				} else if (ch=='\r') {
899 					newLine();
900 					final int nextI=i+1;
901 					if (nextI==text.length()) break;
902 					if (text.charAt(nextI)=='\n') i++;
903 				} else {
904 					append(ch);
905 				}
906 			}
907 		}
908 
appendNonPreformattedSegment(final int begin, final int end)909 		private void appendNonPreformattedSegment(final int begin, final int end) throws IOException {
910 			assert begin<end;
911 			assert begin>=renderedIndex;
912 			final String text=CharacterReference.decodeCollapseWhiteSpace(source.subSequence(begin,end),convertNonBreakingSpaces);
913 			if (text.length()==0) {
914 				// collapsed text is zero length but original segment wasn't, meaning it consists purely of white space.
915 				if (!ignoreInitialWhiteSpace) lastCharWhiteSpace=true;
916 				return;
917 			}
918 			appendNonPreformattedText(text,Segment.isWhiteSpace(source.charAt(begin)),Segment.isWhiteSpace(source.charAt(end-1)));
919 		}
920 
appendText(final String text)921 		private void appendText(final String text) throws IOException {
922 			assert text.length()>0;
923 			appendNonPreformattedText(text,Segment.isWhiteSpace(text.charAt(0)),Segment.isWhiteSpace(text.charAt(text.length()-1)));
924 		}
925 
appendNonPreformattedText(final String text, final boolean isWhiteSpaceAtStart, final boolean isWhiteSpaceAtEnd)926 		private void appendNonPreformattedText(final String text, final boolean isWhiteSpaceAtStart, final boolean isWhiteSpaceAtEnd) throws IOException {
927 			if (isBlockBoundary()) {
928 				appendBlockVerticalMargin();
929 			} else if (lastCharWhiteSpace || (isWhiteSpaceAtStart && !ignoreInitialWhiteSpace)) {
930 				// output white space only if not on a block boundary
931 				append(' ');
932 			}
933 			int textIndex=0;
934 			int i=0;
935 			lastCharWhiteSpace=false;
936 			//ignoreInitialWhiteSpace=false;
937 			while (true) {
938 				for (; i<text.length(); i++) {
939 					if (text.charAt(i)!=' ') continue; // search for end of word
940 					// At end of word. To comply with RFC264 Format=Flowed protocol, need to make sure we don't wrap immediately before ">" or "From ".
941 					if (i+1<text.length() && text.charAt(i+1)=='>') continue;
942 					if (i+6<text.length() && text.startsWith("From ",i+1)) continue;
943 					break; // OK to wrap here if necessary
944 				}
945 				if (col+i-textIndex+1>=maxLineLength) {
946 					if (lastCharWhiteSpace && (listIndentLevel|indentSize)==0) append(' ');
947 					startNewLine(0);
948 				} else if (lastCharWhiteSpace) {
949 					append(' ');
950 				}
951 				append(text,textIndex,i);
952 				if (i==text.length()) break;
953 				lastCharWhiteSpace=true;
954 				textIndex=++i;
955 			}
956 			lastCharWhiteSpace=isWhiteSpaceAtEnd;
957 		}
958 
isBlockBoundary()959 		private boolean isBlockBoundary() {
960 			return blockVerticalMargin!=NO_MARGIN;
961 		}
962 
appendBlockVerticalMargin()963 		private void appendBlockVerticalMargin() throws IOException {
964 			assert blockVerticalMargin!=NO_MARGIN;
965 			if (skipInitialNewLines) {
966 				// at first text after <li> element or start of document
967 				skipInitialNewLines=false;
968 				final int indentCol=indentSize+listIndentLevel*listIndentSize;
969 				if (col==indentCol) {
970 					atStartOfLine=false; // no need to call appendIndent() from appendTextInit().
971 				} else {
972 					// there was an indenting block since the <li> or start of document
973 					if (bullet || col>indentCol) {
974 						// just start new line as normal if the last indenting block is another <li>, or if the current column is already past the required indent
975 						startNewLine(0);
976 					} else {
977 						// just append spaces to get the column up to the required indent
978 						while (indentCol>col) {
979 							appendable.append(' ');
980 							col++;
981 						}
982 						atStartOfLine=false; // make sure appendIndent() isn't called again from appendTextInit()
983 					}
984 				}
985 			} else {
986 				startNewLine(blockVerticalMargin);
987 			}
988 			blockVerticalMargin=NO_MARGIN;
989 		}
990 
blockBoundary(final int verticalMargin)991 		private void blockBoundary(final int verticalMargin) throws IOException {
992 			// Set a block boundary with the given vertical margin.  The vertical margin is the minimum number of blank lines to output between the blocks.
993 			// This method can be called multiple times at a block boundary, and the next textual output will output the number of blank lines determined by the
994 			// maximum vertical margin of all the method calls.
995 			if (blockVerticalMargin<verticalMargin) blockVerticalMargin=verticalMargin;
996 		}
997 
startNewLine(int verticalMargin)998 		private void startNewLine(int verticalMargin) throws IOException {
999 			// ensures we end up at the start of a line with the specified vertical margin between the previous textual output and the next textual output.
1000 			final int requiredNewLines=verticalMargin+(atStartOfLine?0:1);
1001 			for (int i=0; i<requiredNewLines; i++) appendable.append(newLine);
1002 			atStartOfLine=true;
1003 			col=0;
1004 		}
1005 
newLine()1006 		private void newLine() throws IOException {
1007 			appendable.append(newLine);
1008 			atStartOfLine=true;
1009 			col=0;
1010 		}
1011 
appendTextInit()1012 		private void appendTextInit() throws IOException {
1013 			skipInitialNewLines=false;
1014 			if (atStartOfLine) appendIndent();
1015 		}
1016 
appendIndent()1017 		private void appendIndent() throws IOException {
1018 			for (int i=indentSize; i>0; i--) appendable.append(' ');
1019 			if (bullet) {
1020 				for (int i=(listIndentLevel-1)*listIndentSize; i>0; i--) appendable.append(' ');
1021 				if (listBulletNumber==UNORDERED_LIST) {
1022 					for (int i=listIndentSize-2; i>0; i--) appendable.append(' ');
1023 					appendable.append(listBullets[(listIndentLevel-1)%listBullets.length]).append(' ');
1024 				} else {
1025 					String bulletNumberString=Integer.toString(listBulletNumber);
1026 					for (int i=listIndentSize-bulletNumberString.length()-2; i>0; i--) appendable.append(' ');
1027 					appendable.append(bulletNumberString).append(". ");
1028 				}
1029 				bullet=false;
1030 			} else {
1031 				for (int i=listIndentLevel*listIndentSize; i>0; i--) appendable.append(' ');
1032 			}
1033 			col=indentSize+listIndentLevel*listIndentSize;
1034 			atStartOfLine=false;
1035 		}
1036 
append(final char ch)1037 		private Processor append(final char ch) throws IOException {
1038 			appendTextInit();
1039 			appendable.append(ch);
1040 			col++;
1041 			return this;
1042 		}
1043 
append(final String text)1044 		private Processor append(final String text) throws IOException {
1045 			appendTextInit();
1046 			appendable.append(text);
1047 			col+=text.length();
1048 			return this;
1049 		}
1050 
append(final CharSequence text, final int begin, final int end)1051 		private void append(final CharSequence text, final int begin, final int end) throws IOException {
1052 			appendTextInit();
1053 			for (int i=begin; i<end; i++) appendable.append(text.charAt(i));
1054 			col+=end-begin;
1055 		}
1056 	}
1057 
1058 	private interface ElementHandler {
process(Processor x, Element element)1059 		void process(Processor x, Element element) throws IOException;
1060 	}
1061 
1062 	private static final class RemoveElementHandler implements ElementHandler {
1063 		public static final ElementHandler INSTANCE=new RemoveElementHandler();
process(Processor x, Element element)1064 		public void process(Processor x, Element element) {}
1065 	}
1066 
1067 	private static final class StandardInlineElementHandler implements ElementHandler {
1068 		public static final ElementHandler INSTANCE=new StandardInlineElementHandler();
process(Processor x, Element element)1069 		public void process(Processor x, Element element) throws IOException {
1070 			x.appendElementContent(element);
1071 		}
1072 	}
1073 
1074 	private static final class FontStyleElementHandler implements ElementHandler {
1075 		public static final ElementHandler INSTANCE_B=new FontStyleElementHandler('*');
1076 		public static final ElementHandler INSTANCE_I=new FontStyleElementHandler('/');
1077 		public static final ElementHandler INSTANCE_U=new FontStyleElementHandler('_');
1078 		public static final ElementHandler INSTANCE_CODE=new FontStyleElementHandler('|');
1079 		private final char decorationChar;
FontStyleElementHandler(char decorationChar)1080 		public FontStyleElementHandler(char decorationChar) {
1081 			this.decorationChar=decorationChar;
1082 		}
process(Processor x, Element element)1083 		public void process(Processor x, Element element) throws IOException {
1084 			if (x.decorateFontStyles) {
1085 				if (x.lastCharWhiteSpace) {
1086 					x.append(' ');
1087 					x.lastCharWhiteSpace=false;
1088 				}
1089 				x.append(decorationChar);
1090 				x.appendElementContent(element);
1091 				if (x.decorateFontStyles) x.append(decorationChar);
1092 			} else {
1093 				x.appendElementContent(element);
1094 			}
1095 		}
1096 	}
1097 
1098 	abstract private static class AbstractBlockElementHandler implements ElementHandler {
1099 		private final int topMargin;
1100 		private final int bottomMargin;
1101 		private final boolean indent;
AbstractBlockElementHandler(int topMargin, int bottomMargin, boolean indent)1102 		protected AbstractBlockElementHandler(int topMargin, int bottomMargin, boolean indent) {
1103 			this.topMargin=topMargin;
1104 			this.bottomMargin=bottomMargin;
1105 			this.indent=indent;
1106 		}
process(Processor x, Element element)1107 		public void process(Processor x, Element element) throws IOException {
1108 			x.blockBoundary(RendererCSS.getTopMargin(element,topMargin));
1109 			int leftMargin=RendererCSS.getLeftMargin(element,indent ? x.blockIndentSize : 0);
1110 			x.indentSize+=leftMargin;
1111 			processBlockContent(x,element);
1112 			x.indentSize-=leftMargin;
1113 			x.blockBoundary(RendererCSS.getBottomMargin(element,bottomMargin));
1114 		}
newTopMargin(int topMargin)1115 		public AbstractBlockElementHandler newTopMargin(int topMargin) {
1116 			return newInstance(topMargin,this.bottomMargin,this.indent);
1117 		}
getTopMargin()1118 		public int getTopMargin() {
1119 			return topMargin;
1120 		}
newBottomMargin(int bottomMargin)1121 		public AbstractBlockElementHandler newBottomMargin(int bottomMargin) {
1122 			return newInstance(this.topMargin,bottomMargin,this.indent);
1123 		}
getBottomMargin()1124 		public int getBottomMargin() {
1125 			return bottomMargin;
1126 		}
newIndent(boolean indent)1127 		public AbstractBlockElementHandler newIndent(boolean indent) {
1128 			return newInstance(this.topMargin,this.bottomMargin,indent);
1129 		}
isIndent()1130 		public boolean isIndent() {
1131 			return indent;
1132 		}
processBlockContent(Processor x, Element element)1133 		abstract protected void processBlockContent(Processor x, Element element) throws IOException;
newInstance(int topMargin, int bottomMargin, boolean indent)1134 		abstract protected AbstractBlockElementHandler newInstance(int topMargin, int bottomMargin, boolean indent);
1135 	}
1136 
1137 	private static final class StandardBlockElementHandler extends AbstractBlockElementHandler {
1138 		public static final ElementHandler INSTANCE_0_0=new StandardBlockElementHandler(0,0,false);
1139 		public static final ElementHandler INSTANCE_1_1=new StandardBlockElementHandler(1,1,false);
1140 		public static final ElementHandler INSTANCE_2_1=new StandardBlockElementHandler(2,1,false);
1141 		public static final ElementHandler INSTANCE_0_0_INDENT=new StandardBlockElementHandler(0,0,true);
1142 		public static final ElementHandler INSTANCE_1_1_INDENT=new StandardBlockElementHandler(1,1,true);
StandardBlockElementHandler(int topMargin, int bottomMargin, boolean indent)1143 		private StandardBlockElementHandler(int topMargin, int bottomMargin, boolean indent) {
1144 			super(topMargin,bottomMargin,indent);
1145 		}
processBlockContent(Processor x, Element element)1146 		protected void processBlockContent(Processor x, Element element) throws IOException {
1147 			x.appendElementContent(element);
1148 		}
newInstance(int topMargin, int bottomMargin, boolean indent)1149 		protected AbstractBlockElementHandler newInstance(int topMargin, int bottomMargin, boolean indent) {
1150 			return new StandardBlockElementHandler(topMargin,bottomMargin,indent);
1151 		}
1152 	}
1153 
1154 	private static final class A_ElementHandler implements ElementHandler {
1155 		public static final ElementHandler INSTANCE=new A_ElementHandler();
process(Processor x, Element element)1156 		public void process(Processor x, Element element) throws IOException {
1157 			x.appendElementContent(element);
1158 			if (!x.includeHyperlinkURLs) return;
1159 			String renderedHyperlinkURL=x.renderer.renderHyperlinkURL(element.getStartTag());
1160 			if (renderedHyperlinkURL==null) return;
1161 			int linkLength=renderedHyperlinkURL.length()+1;
1162 			if (x.col+linkLength>=x.maxLineLength) {
1163 				x.startNewLine(0);
1164 			} else {
1165 				x.append(' ');
1166 			}
1167 			x.append(renderedHyperlinkURL);
1168 			x.lastCharWhiteSpace=true;
1169 		}
1170 	}
1171 
1172 	private static final class BR_ElementHandler implements ElementHandler {
1173 		public static final ElementHandler INSTANCE=new BR_ElementHandler();
process(Processor x, Element element)1174 		public void process(Processor x, Element element) throws IOException {
1175 			if (x.isBlockBoundary() && !x.atStartOfLine && !x.skipInitialNewLines) x.newLine(); // add an extra new line if we're at a block boundary and aren't already at the start of the next line and it's not the first element after <li>
1176 			x.newLine();
1177 			x.blockBoundary(0);
1178 		}
1179 	}
1180 
1181 	private static final class HR_ElementHandler extends AbstractBlockElementHandler {
1182 		public static final ElementHandler INSTANCE=new HR_ElementHandler();
HR_ElementHandler()1183 		private HR_ElementHandler() {
1184 			this(0,0,false);
1185 		}
HR_ElementHandler(int topMargin, int bottomMargin, boolean indent)1186 		private HR_ElementHandler(int topMargin, int bottomMargin, boolean indent) {
1187 			super(topMargin,bottomMargin,indent);
1188 		}
processBlockContent(Processor x, Element element)1189 		protected void processBlockContent(Processor x, Element element) throws IOException {
1190 			x.appendBlockVerticalMargin();
1191 			final int maxCol=x.maxLineLength-4;
1192 			x.append('-');
1193 			for (int i=x.col; i<maxCol; i++) x.appendable.append('-');
1194 			x.col=maxCol;
1195 		}
newInstance(int topMargin, int bottomMargin, boolean indent)1196 		protected AbstractBlockElementHandler newInstance(int topMargin, int bottomMargin, boolean indent) {
1197 			return new HR_ElementHandler(topMargin,bottomMargin,indent);
1198 		}
1199 	}
1200 
1201 	private static final class AlternateTextElementHandler implements ElementHandler {
1202 		public static final ElementHandler INSTANCE=new AlternateTextElementHandler();
process(Processor x, Element element)1203 		public void process(Processor x, Element element) throws IOException {
1204 			if (!x.includeAlternateText) return;
1205 			String text=x.renderer.renderAlternateText(element.getStartTag());
1206 			if (text==null) return;
1207 			x.appendText(text);
1208 		}
1209 	}
1210 
1211 	private static final class ListElementHandler extends AbstractBlockElementHandler {
1212 		public static final ElementHandler INSTANCE_OL=new ListElementHandler(0);
1213 		public static final ElementHandler INSTANCE_UL=new ListElementHandler(UNORDERED_LIST);
1214 		private final int initialListBulletNumber;
ListElementHandler(int initialListBulletNumber)1215 		private ListElementHandler(int initialListBulletNumber) {
1216 			this(initialListBulletNumber,0,0,false);
1217 		}
ListElementHandler(int initialListBulletNumber, int topMargin, int bottomMargin, boolean indent)1218 		private ListElementHandler(int initialListBulletNumber, int topMargin, int bottomMargin, boolean indent) {
1219 			super(topMargin,bottomMargin,indent);
1220 			this.initialListBulletNumber=initialListBulletNumber;
1221 		}
processBlockContent(Processor x, Element element)1222 		protected void processBlockContent(Processor x, Element element) throws IOException {
1223 			int oldListBulletNumber=x.listBulletNumber;
1224 			x.listBulletNumber=initialListBulletNumber;
1225 			x.listIndentLevel++;
1226 			x.appendElementContent(element);
1227 			x.listIndentLevel--;
1228 			x.listBulletNumber=oldListBulletNumber;
1229 		}
newInstance(int topMargin, int bottomMargin, boolean indent)1230 		protected AbstractBlockElementHandler newInstance(int topMargin, int bottomMargin, boolean indent) {
1231 			return new ListElementHandler(initialListBulletNumber,topMargin,bottomMargin,indent);
1232 		}
1233 	}
1234 
1235 	private static final class LI_ElementHandler extends AbstractBlockElementHandler {
1236 		public static final ElementHandler INSTANCE=new LI_ElementHandler();
LI_ElementHandler()1237 		private LI_ElementHandler() {
1238 			this(0,0,false);
1239 		}
LI_ElementHandler(int topMargin, int bottomMargin, boolean indent)1240 		private LI_ElementHandler(int topMargin, int bottomMargin, boolean indent) {
1241 			super(topMargin,bottomMargin,indent);
1242 		}
processBlockContent(Processor x, Element element)1243 		protected void processBlockContent(Processor x, Element element) throws IOException {
1244 			if (x.listBulletNumber!=UNORDERED_LIST) x.listBulletNumber++;
1245 			x.bullet=true;
1246 			x.appendBlockVerticalMargin();
1247 			x.appendIndent();
1248 			x.skipInitialNewLines=true;
1249 			x.blockBoundary(0); // this shouldn't result in the output of any new lines but ensures surrounding white space is ignored
1250 			x.appendElementContent(element);
1251 			x.bullet=false;
1252 		}
newInstance(int topMargin, int bottomMargin, boolean indent)1253 		protected AbstractBlockElementHandler newInstance(int topMargin, int bottomMargin, boolean indent) {
1254 			return new LI_ElementHandler(topMargin,bottomMargin,indent);
1255 		}
1256 	}
1257 
1258 	private static final class PRE_ElementHandler extends AbstractBlockElementHandler {
1259 		public static final ElementHandler INSTANCE=new PRE_ElementHandler();
PRE_ElementHandler()1260 		private PRE_ElementHandler() {
1261 			this(1,1,false);
1262 		}
PRE_ElementHandler(int topMargin, int bottomMargin, boolean indent)1263 		private PRE_ElementHandler(int topMargin, int bottomMargin, boolean indent) {
1264 			super(topMargin,bottomMargin,indent);
1265 		}
processBlockContent(Processor x, Element element)1266 		protected void processBlockContent(Processor x, Element element) throws IOException {
1267 			boolean oldPreformatted=x.preformatted; // should always be false
1268 			x.preformatted=true;
1269 			x.appendElementContent(element);
1270 			x.preformatted=oldPreformatted;
1271 		}
newInstance(int topMargin, int bottomMargin, boolean indent)1272 		protected AbstractBlockElementHandler newInstance(int topMargin, int bottomMargin, boolean indent) {
1273 			return new PRE_ElementHandler(topMargin,bottomMargin,indent);
1274 		}
1275 	}
1276 
1277 	private static final class TD_ElementHandler implements ElementHandler {
1278 		public static final ElementHandler INSTANCE=new TD_ElementHandler();
process(Processor x, Element element)1279 		public void process(Processor x, Element element) throws IOException {
1280 			if (!x.isBlockBoundary()) x.append(x.tableCellSeparator);
1281 			x.lastCharWhiteSpace=false;
1282 			x.appendElementContent(element);
1283 		}
1284 	}
1285 
1286 }