1 /*
2  * $Id$
3  *
4  * Copyright 2007 by Howard Shank (hgshank@yahoo.com)
5  *
6  * The contents of this file are subject to the Mozilla Public License Version 1.1
7  * (the "License"); you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the License.
13  *
14  * The Original Code is 'iText, a free JAVA-PDF library'.
15  *
16  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17  * the Initial Developer are Copyright (C) 1999-2006 by Bruno Lowagie.
18  * All Rights Reserved.
19  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20  * are Copyright (C) 2000-2006 by Paulo Soares. All Rights Reserved.
21  *
22  * Contributor(s): all the names of the contributors are added in the source code
23  * where applicable.
24  *
25  * Alternatively, the contents of this file may be used under the terms of the
26  * LGPL license (the ?GNU LIBRARY GENERAL PUBLIC LICENSE?), in which case the
27  * provisions of LGPL are applicable instead of those above.  If you wish to
28  * allow use of your version of this file only under the terms of the LGPL
29  * License and not to allow others to use your version of this file under
30  * the MPL, indicate your decision by deleting the provisions above and
31  * replace them with the notice and other provisions required by the LGPL.
32  * If you do not delete the provisions above, a recipient may use your version
33  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
34  *
35  * This library is free software; you can redistribute it and/or modify it
36  * under the terms of the MPL as stated above or under the terms of the GNU
37  * Library General Public License as published by the Free Software Foundation;
38  * either version 2 of the License, or any later version.
39  *
40  * This library is distributed in the hope that it will be useful, but WITHOUT
41  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
43  * details.
44  *
45  * If you didn't download this code from the following link, you should check if
46  * you aren't using an obsolete version:
47  * http://www.lowagie.com/iText/
48  */
49 package com.lowagie.text.rtf.parser;
50 
51 import java.awt.Color;
52 import java.io.BufferedInputStream;
53 import java.io.IOException;
54 import java.io.InputStream;
55 import java.io.PushbackInputStream;
56 import java.util.ArrayList;
57 import java.util.Arrays;
58 import java.util.Date;
59 import java.util.EventListener;
60 import java.util.Iterator;
61 import java.util.Stack;
62 
63 import com.lowagie.text.Document;
64 import com.lowagie.text.DocumentException;
65 import com.lowagie.text.Element;
66 import com.lowagie.text.List;
67 import com.lowagie.text.rtf.direct.RtfDirectContent;
68 import com.lowagie.text.rtf.document.RtfDocument;
69 import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordData;
70 import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordListener;
71 import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordMgr;
72 import com.lowagie.text.rtf.parser.destinations.RtfDestination;
73 import com.lowagie.text.rtf.parser.destinations.RtfDestinationMgr;
74 
75 /**
76  * The RtfParser allows the importing of RTF documents or
77  * RTF document fragments. The RTF document or fragment is tokenised,
78  * font and color definitions corrected and then added to
79  * the document being written.
80  *
81  * @author Mark Hall (Mark.Hall@mail.room3b.eu)
82  * @author Howard Shank (hgshank@yahoo.com)
83  * @since 2.0.8
84  */
85 
86 public class RtfParser {
87 	/**
88 	 * Debugging flag.
89 	 */
90 	private static final boolean debugParser = false;	// DEBUG Files are unlikely to be read by any reader!
91 	private String logFile = null;
92 	private boolean logging = false;
93 	private boolean logAppend = false;
94 
95 	/**
96 	 * The iText element to add the RTF document to.
97 	 * @since 2.1.3
98 	 */
99 	private Element elem = null;
100 	/**
101 	 * The iText document to add the RTF document to.
102 	 */
103 	private Document document = null;
104 	/**
105 	 * The RtfDocument to add the RTF document or fragment to.
106 	 */
107 	private RtfDocument rtfDoc = null;
108 	/**
109 	 * The RtfKeywords that creates and handles keywords that are implemented.
110 	 */
111 	private RtfCtrlWordMgr rtfKeywordMgr = null;
112 	/**
113 	 * The RtfImportHeader to store imported font and color mappings in.
114 	 */
115 	private RtfImportMgr importMgr = null;
116 	/**
117 	 * The RtfDestinationMgr object to manage destinations.
118 	 */
119 	private RtfDestinationMgr destinationMgr = null;
120 	/**
121 	 * Stack for saving states for groups
122 	 */
123 	private Stack stackState = null;
124 	/**
125 	 * The current parser state.
126 	 */
127 	private RtfParserState currentState = null;
128 	/**
129 	 * The pushback reader to read the input stream.
130 	 */
131 	private PushbackInputStream pbReader = null;
132 	/**
133 	 * Conversion type. Identifies if we are doing in import or a convert.
134 	 */
135 	private int conversionType = TYPE_IMPORT_FULL;
136 
137 
138 	/*
139 	 * Bitmapping:
140 	 *
141 	 * 0111 1111 1111 1111 = Unkown state
142 	 * 0xxx xxxx xxxx xxxx = In Header
143 	 * 1xxx xxxx xxxx xxxx = In Document
144 	 * 2xxx xxxx xxxx xxxx = Reserved
145 	 * 4xxx xxxx xxxx xxxx = Other
146 	 * 8xxx xxxx xxxx xxxx = Errors
147 	 */
148 
149 	/*
150 	 * Header state values
151 	 */
152 
153 	/**
154 	 * Currently the RTF document header is being parsed.
155 	 */
156 	public static final int PARSER_IN_HEADER = (0x0 << 28) | 0x000000;
157 	/**
158 	 * Currently the RTF charset is being parsed.
159 	 */
160 	public static final int PARSER_IN_CHARSET = PARSER_IN_HEADER | 0x000001;
161 	/**
162 	 * Currently the RTF deffont is being parsed.
163 	 */
164 	public static final int PARSER_IN_DEFFONT = PARSER_IN_HEADER | 0x000002;
165 	/**
166 	 * Currently the RTF font table is being parsed.
167 	 */
168 	public static final int PARSER_IN_FONT_TABLE = PARSER_IN_HEADER | 0x000003;
169 	/**
170 	 * Currently a RTF font table info element is being parsed.
171 	 */
172 	public static final int PARSER_IN_FONT_TABLE_INFO = PARSER_IN_HEADER | 0x000004;
173 	/**
174 	 * Currently the RTF filetbl is being parsed.
175 	 */
176 	public static final int PARSER_IN_FILE_TABLE = PARSER_IN_HEADER | 0x000005;
177 	/**
178 	 * Currently the RTF color table is being parsed.
179 	 */
180 	public static final int PARSER_IN_COLOR_TABLE = PARSER_IN_HEADER | 0x000006;
181 	/**
182 	 * Currently the RTF  stylesheet is being parsed.
183 	 */
184 	public static final int PARSER_IN_STYLESHEET = PARSER_IN_HEADER | 0x000007;
185 	/**
186 	 * Currently the RTF listtables is being parsed.
187 	 */
188 	public static final int PARSER_IN_LIST_TABLE = PARSER_IN_HEADER | 0x000008;
189 	/**
190 	 * Currently the RTF listtable override is being parsed.
191 	 */
192 	public static final int PARSER_IN_LISTOVERRIDE_TABLE = PARSER_IN_HEADER | 0x000009;
193 	/**
194 	 * Currently the RTF revtbl is being parsed.
195 	 */
196 	public static final int PARSER_IN_REV_TABLE = PARSER_IN_HEADER | 0x00000A;
197 	/**
198 	 * Currently the RTF rsidtable is being parsed.
199 	 */
200 	public static final int PARSER_IN_RSID_TABLE = PARSER_IN_HEADER | 0x0000B;
201 	/**
202 	 * Currently the RTF generator is being parsed.
203 	 */
204 	public static final int PARSER_IN_GENERATOR = PARSER_IN_HEADER | 0x00000C;
205 	/**
206 	 * Currently the RTF Paragraph group properties Table (word 2002)
207 	 */
208 	public static final int PARSER_IN_PARAGRAPH_TABLE = PARSER_IN_HEADER | 0x00000E;
209 	/**
210 	 * Currently the RTF Old Properties.
211 	 */
212 	public static final int PARSER_IN_OLDCPROPS = PARSER_IN_HEADER | 0x00000F;
213 	/**
214 	 * Currently the RTF Old Properties.
215 	 */
216 	public static final int PARSER_IN_OLDPPROPS = PARSER_IN_HEADER | 0x000010;
217 	/**
218 	 * Currently the RTF Old Properties.
219 	 */
220 	public static final int PARSER_IN_OLDTPROPS = PARSER_IN_HEADER | 0x000012;
221 	/**
222 	 * Currently the RTF Old Properties.
223 	 */
224 	public static final int PARSER_IN_OLDSPROPS = PARSER_IN_HEADER | 0x000013;
225 	/**
226 	 * Currently the RTF User Protection Information.
227 	 */
228 	public static final int PARSER_IN_PROT_USER_TABLE = PARSER_IN_HEADER | 0x000014;
229 	/**
230 	 * Currently the Latent Style and Formatting usage restrictions
231 	 */
232 	public static final int PARSER_IN_LATENTSTYLES = PARSER_IN_HEADER | 0x000015;
233 
234 	public static final int PARSER_IN_PARAGRAPH_GROUP_PROPERTIES =PARSER_IN_HEADER | 0x000016;
235 
236 	/*
237 	 * Document state values
238 	 */
239 
240 	/**
241 	 * Currently the RTF document content is being parsed.
242 	 */
243 	public static final int PARSER_IN_DOCUMENT = (0x2 << 28 ) | 0x000000;
244 
245 	/**
246 	 * Currently the RTF info group is being parsed.
247 	 */
248 	public static final int PARSER_IN_INFO_GROUP = PARSER_IN_DOCUMENT | 0x000001;
249 
250 
251 	public static final int PARSER_IN_UPR = PARSER_IN_DOCUMENT | 0x000002;
252 	/**
253 	 * Currently a shppict control word is being parsed.
254 	 */
255 	public static final int PARSER_IN_SHPPICT = PARSER_IN_DOCUMENT | 0x000010; //16
256 	/**
257 	 * Currently a pict control word is being parsed.
258 	 */
259 	public static final int PARSER_IN_PICT = PARSER_IN_DOCUMENT | 0x000011; //17
260 	/**
261 	 * Currently a picprop control word is being parsed.
262 	 */
263 	public static final int PARSER_IN_PICPROP = PARSER_IN_DOCUMENT | 0x000012; //18
264 	/**
265 	 * Currently a blipuid control word is being parsed.
266 	 */
267 	public static final int PARSER_IN_BLIPUID = PARSER_IN_DOCUMENT | 0x000013; //19
268 
269 	/* other states */
270 	/**
271 	 * The parser is at the beginning or the end of the file.
272 	 */
273 	public static final int PARSER_STARTSTOP = (0x4 << 28)| 0x0001;
274 	/* ERRORS */
275 	/**
276 	 * Currently the parser is in an error state.
277 	 */
278 	public static final int PARSER_ERROR = (0x8 << 28) | 0x0000;
279 	/**
280 	 * The parser reached the end of the file.
281 	 */
282 	public static final int PARSER_ERROR_EOF = PARSER_ERROR | 0x0001;
283 	/**
284 	 * Currently the parser is in an unknown state.
285 	 */
286 	public static final int PARSER_IN_UNKNOWN = PARSER_ERROR | 0x0FFFFFFF;
287 
288 
289 	/**
290 	 * Conversion type is unknown
291 	 */
292 	public static final int TYPE_UNIDENTIFIED = -1;
293 	/**
294 	 * Conversion type is an import. Uses direct content to add everything.
295 	 * This is what the original import does.
296 	 */
297 	public static final int TYPE_IMPORT_FULL = 0;
298 	/**
299 	 * Conversion type is an import of a partial file/fragment. Uses direct content to add everything.
300 	 */
301 	public static final int TYPE_IMPORT_FRAGMENT = 1;
302 	/**
303 	 * Conversion type is a conversion. This uses the document (not rtfDoc) to add
304 	 * all the elements making it a different supported documents depending on the writer used.
305 	 */
306 	public static final int TYPE_CONVERT = 2;
307 	/**
308 	 * Conversion type to import a document into an element. i.e. Chapter, Section, Table Cell, etc.
309 	 * @since 2.1.4
310 	 */
311 	public static final int TYPE_IMPORT_INTO_ELEMENT = 3;
312 
313 
314 	/**
315 	 * Destination is normal. Text is processed.
316 	 */
317 	public static final int DESTINATION_NORMAL = 0;
318 	/**
319 	 * Destination is skipping. Text is ignored.
320 	 */
321 	public static final int DESTINATION_SKIP = 1;
322 
323 	//////////////////////////////////// TOKENISE VARIABLES ///////////////////
324 	/*
325 	 * State flags use 4/28 bitmask.
326 	 * First 4 bits (nibble) indicates major state. Used for unknown and error
327 	 * Last 28 bits indicates the value;
328 	 */
329 
330 	/**
331 	 * The RtfTokeniser is in its ground state. Any token may follow.
332 	 */
333 	public static final int TOKENISER_NORMAL = 0x00000000;
334 	/**
335 	 * The last token parsed was a slash.
336 	 */
337 	public static final int TOKENISER_SKIP_BYTES = 0x00000001;
338 	/**
339 	 * The RtfTokeniser is currently tokenising a control word.
340 	 */
341 	public static final int TOKENISER_SKIP_GROUP = 0x00000002;
342 	/**
343 	 * The RtfTokeniser is currently reading binary stream.
344 	 */
345 	public static final int TOKENISER_BINARY= 0x00000003;
346 	/**
347 	 * The RtfTokeniser is currently reading hex data.
348 	 */
349 	public static final int TOKENISER_HEX= 0x00000004;
350 	/**
351 	 * The RtfTokeniser ignore result
352 	 */
353 	public static final int TOKENISER_IGNORE_RESULT= 0x00000005;
354 	/**
355 	 * The RtfTokeniser is currently in error state
356 	 */
357 	public static final int TOKENISER_STATE_IN_ERROR =  0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000
358 	/**
359 	 * The RtfTokeniser is currently in an unkown state
360 	 */
361 	public static final int TOKENISER_STATE_IN_UNKOWN = 0xFF000000; // 1111 0000 0000 0000 0000 0000 0000 0000
362 
363 	/**
364 	 * The current group nesting level.
365 	 */
366 	private int groupLevel = 0;
367 	/**
368 	 * The current document group nesting level. Used for fragments.
369 	 */
370 	private int docGroupLevel = 0;
371 	/**
372 	 * When the tokeniser is Binary.
373 	 */
374 	private long binByteCount = 0;
375 	/**
376 	 * When the tokeniser is set to skip bytes, binSkipByteCount is the number of bytes to skip.
377 	 */
378 	private long binSkipByteCount = 0;
379 	/**
380 	 * When the tokeniser is set to skip to next group, this is the group indentifier to return to.
381 	 */
382 	private int skipGroupLevel = 0;
383 
384 	//RTF parser error codes
385 	public static final int  errOK =0;                        // Everything's fine!
386 	public static final int  errStackUnderflow   =  -1;       // Unmatched '}'
387 	public static final int  errStackOverflow    =  -2;       // Too many '{' -- memory exhausted
388 	public static final int  errUnmatchedBrace   =  -3;       // RTF ended during an open group.
389 	public static final int  errInvalidHex       =  -4;       // invalid hex character found in data
390 	public static final int  errBadTable         =  -5;       // RTF table (sym or prop) invalid
391 	public static final int  errAssertion        =  -6;       // Assertion failure
392 	public static final int  errEndOfFile        =  -7;       // End of file reached while reading RTF
393 	public static final int  errCtrlWordNotFound =  -8;		  // control word was not found
394 	//////////////////////////////////// TOKENISE VARIABLES ///////////////////
395 
396 
397 	//////////////////////////////////// STATS VARIABLES ///////////////////
398 	/**
399 	 * Total bytes read.
400 	 */
401 	private long byteCount = 0;
402 	/**
403 	 * Total control words processed.
404 	 *
405 	 * Contains both known and unknown.
406 	 *
407 	 * <code>ctrlWordCount</code> should equal
408 	 * <code>ctrlWrodHandlecCount</code> + <code>ctrlWordNotHandledCount</code + <code>ctrlWordSkippedCount</code>
409 	 */
410 	private long ctrlWordCount = 0;
411 	/**
412 	 * Total { encountered as an open group token.
413 	 */
414 	private long openGroupCount = 0;
415 	/**
416 	 * Total } encountered as a close group token.
417 	 */
418 	private long closeGroupCount = 0;
419 	/**
420 	 * Total clear text characters processed.
421 	 */
422 	private long characterCount = 0;
423 	/**
424 	 * Total control words recognized.
425 	 */
426 	private long ctrlWordHandledCount = 0;
427 	/**
428 	 * Total control words not handled.
429 	 */
430 	private long ctrlWordNotHandledCount = 0;
431 	/**
432 	 * Total control words skipped.
433 	 */
434 	private long ctrlWordSkippedCount = 0;
435 	/**
436 	 * Total groups skipped. Includes { and } as a group.
437 	 */
438 	private long groupSkippedCount = 0;
439 	/**
440 	 * Start time as a long.
441 	 */
442 	private long startTime = 0;
443 	/**
444 	 * Stop time as a long.
445 	 */
446 	private long endTime = 0;
447 	/**
448 	 * Start date as a date.
449 	 */
450 	private Date startDate = null;
451 	/**
452 	 * End date as a date.
453 	 */
454 	private Date endDate = null;
455 	//////////////////////////////////// STATS VARIABLES ///////////////////
456 	/**
457 	 * Last control word and parameter processed.
458 	 */
459 	private RtfCtrlWordData lastCtrlWordParam = null;
460 
461 	/** The <code>RtfCtrlWordListener</code>. */
462     private ArrayList listeners = new ArrayList();
463 
464 	/**
465 	 * Constructor
466 	 * @param doc
467 	 * @since 2.1.3
468 	 */
RtfParser(Document doc)469     public RtfParser(Document doc) {
470     	this.document = doc;
471     }
472 	/* *********
473 	 *  READER *
474 	 ***********/
475 	/**
476 	 * Imports a complete RTF document.
477 	 *
478 	 * @param readerIn
479 	 * 		The Reader to read the RTF document from.
480 	 * @param rtfDoc
481 	 * 		The RtfDocument to add the imported document to.
482 	 * @throws IOException On I/O errors.
483 	 *  @since 2.1.3
484 	 */
importRtfDocument(InputStream readerIn, RtfDocument rtfDoc)485 	public void importRtfDocument(InputStream readerIn, RtfDocument rtfDoc) throws IOException {
486 		if(readerIn == null || rtfDoc == null) return;
487 		this.init(TYPE_IMPORT_FULL, rtfDoc, readerIn, this.document, null);
488 		this.setCurrentDestination(RtfDestinationMgr.DESTINATION_NULL);
489 		startDate = new Date();
490 		startTime = System.currentTimeMillis();
491 		this.groupLevel = 0;
492 		try {
493 			this.tokenise();
494 		} catch (RuntimeException e) {
495 			// TODO Auto-generated catch block
496 			e.printStackTrace();
497 		}
498 		catch (Exception e) {
499 			// TODO Auto-generated catch block
500 			e.printStackTrace();
501 		}
502 		endTime = System.currentTimeMillis();
503 		endDate = new Date();
504 	}
505 	/**
506 	 * Imports a complete RTF document into an Element, i.e. Chapter, section, Table Cell, etc.
507 	 *
508 	 * @param elem The Element the document is to be imported into.
509 	 * @param readerIn
510 	 * 		The Reader to read the RTF document from.
511 	 * @param rtfDoc
512 	 * 		The RtfDocument to add the imported document to.
513 	 * @throws IOException On I/O errors.
514 	 * @since 2.1.4
515 	 */
importRtfDocumentIntoElement(Element elem, InputStream readerIn, RtfDocument rtfDoc)516 	public void importRtfDocumentIntoElement(Element elem, InputStream readerIn, RtfDocument rtfDoc) throws IOException {
517 		if(readerIn == null || rtfDoc == null || elem == null) return;
518 		this.init(TYPE_IMPORT_INTO_ELEMENT, rtfDoc, readerIn, this.document, elem);
519 		this.setCurrentDestination(RtfDestinationMgr.DESTINATION_NULL);
520 		startDate = new Date();
521 		startTime = System.currentTimeMillis();
522 		this.groupLevel = 0;
523 		try {
524 			this.tokenise();
525 		} catch (RuntimeException e) {
526 			// TODO Auto-generated catch block
527 			e.printStackTrace();
528 		}
529 		catch (Exception e) {
530 			// TODO Auto-generated catch block
531 			e.printStackTrace();
532 		}
533 		endTime = System.currentTimeMillis();
534 		endDate = new Date();
535 	}
536 	/**
537 	 * Converts an RTF document to an iText document.
538 	 *
539 	 * Usage: Create a parser object and call this method with the input stream and the iText Document object
540 	 *
541 	 * @param readerIn
542 	 * 		The Reader to read the RTF file from.
543 	 * @param doc
544 	 * 		The iText document that the RTF file is to be added to.
545 	 * @throws IOException
546 	 * 		On I/O errors.
547 	 *  @since 2.1.3
548 	 */
convertRtfDocument(InputStream readerIn, Document doc)549 	public void convertRtfDocument(InputStream readerIn, Document doc) throws IOException {
550 		if(readerIn == null || doc == null) return;
551 		this.init(TYPE_CONVERT, null, readerIn, doc, null);
552 		this.setCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT);
553 		startDate = new Date();
554 		startTime = System.currentTimeMillis();
555 		this.groupLevel = 0;
556 		this.tokenise();
557 		endTime = System.currentTimeMillis();
558 		endDate = new Date();
559 	}
560 
561 	/**
562 	 * Imports an RTF fragment.
563 	 *
564 	 * @param readerIn
565 	 * 		The Reader to read the RTF fragment from.
566 	 * @param rtfDoc
567 	 * 		The RTF document to add the RTF fragment to.
568 	 * @param importMappings
569 	 * 		The RtfImportMappings defining font and color mappings for the fragment.
570 	 * @throws IOException
571 	 * 		On I/O errors.
572 	 *   @since 2.1.3
573 	 */
importRtfFragment(InputStream readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings)574 	public void importRtfFragment(InputStream readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException {
575 	//public void importRtfFragment2(Reader readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException {
576 		if(readerIn == null || rtfDoc == null || importMappings==null) return;
577 		this.init(TYPE_IMPORT_FRAGMENT, rtfDoc, readerIn, null, null);
578 		this.handleImportMappings(importMappings);
579 		this.setCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT);
580 		this.groupLevel = 1;
581 		setParserState(RtfParser.PARSER_IN_DOCUMENT);
582 		startDate = new Date();
583 		startTime = System.currentTimeMillis();
584 		this.tokenise();
585 		endTime = System.currentTimeMillis();
586 		endDate = new Date();
587 	}
588 
589     // listener methods
590 
591 	/**
592 	 * Adds a <CODE>EventListener</CODE> to the <CODE>RtfCtrlWordMgr</CODE>.
593 	 *
594 	 * @param listener
595 	 *            the new EventListener.
596 	 * @since 2.1.3
597 	 */
addListener(EventListener listener)598 	public void addListener(EventListener listener) {
599 		listeners.add(listener);
600 	}
601 
602 	/**
603 	 * Removes a <CODE>EventListener</CODE> from the <CODE>RtfCtrlWordMgr</CODE>.
604 	 *
605 	 * @param listener
606 	 *            the EventListener that has to be removed.
607 	 *  @since 2.1.3
608 	 */
removeListener(EventListener listener)609 	public void removeListener(EventListener listener) {
610 		listeners.remove(listener);
611 	}
612 
613 	/**
614 	 * Initialize the parser object values.
615 	 *
616 	 * @param type Type of conversion or import
617 	 * @param rtfDoc The <code>RtfDocument</code>
618 	 * @param readerIn The input stream
619 	 * @param doc The iText <code>Document</code>
620 	 *   @since 2.1.3
621 	 */
init(int type, RtfDocument rtfDoc, InputStream readerIn, Document doc, Element elem)622 	private void init(int type, RtfDocument rtfDoc, InputStream readerIn, Document doc, Element elem) {
623 
624 		init_stats();
625 		// initialize reader to a PushbackReader
626 		this.pbReader = init_Reader(readerIn);
627 
628 		this.conversionType = type;
629 		this.rtfDoc = rtfDoc;
630 		this.document = doc;
631 		this.elem = elem;
632 		this.currentState = new RtfParserState();
633 		this.stackState = new Stack();
634 		this.setParserState(PARSER_STARTSTOP);
635 		this.importMgr = new RtfImportMgr(this.rtfDoc, this.document);
636 
637 		// get destination Mgr
638 		this.destinationMgr = RtfDestinationMgr.getInstance(this);
639 		// set the parser
640 		RtfDestinationMgr.setParser(this);
641 
642 
643 		// DEBUG INFO for timing and memory usage of RtfCtrlWordMgr object
644 		// create multiple new RtfCtrlWordMgr objects to check timing and memory usage
645 //		System.gc();
646 //		long endTime = 0;
647 //		Date endDate = null;
648 //		long endFree = 0;
649 //		DecimalFormat df = new DecimalFormat("#,##0");
650 //		Date startDate = new Date();
651 //		long startTime = System.currentTimeMillis();
652 //		long startFree = Runtime.getRuntime().freeMemory();
653 //		System.out.println("1:");
654 
655 		this.rtfKeywordMgr = new RtfCtrlWordMgr(this, this.pbReader);/////////DO NOT COMMENT OUT THIS LINE ///////////
656 
657 		Object listener;
658 		for (Iterator iterator = listeners.iterator(); iterator.hasNext();) {
659             listener = iterator.next();
660             if(listener instanceof RtfCtrlWordListener) {
661                 this.rtfKeywordMgr.addRtfCtrlWordListener((RtfCtrlWordListener)listener);
662             }
663         }
664 //		endFree = Runtime.getRuntime().freeMemory();
665 //		endTime = System.currentTimeMillis();
666 //		endDate = new Date();
667 //		System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
668 //		System.out.println("RtfCtrlWordMgr end date  : " + endDate.toLocaleString());
669 //		System.out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
670 //		System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
671 //		System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
672 //		System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
673 //
674 //		System.gc();
675 //		System.out.println("2:");
676 //		startDate = new Date();
677 //		startTime = System.currentTimeMillis();
678 //		startFree = Runtime.getRuntime().freeMemory();
679 //		RtfCtrlWordMgr rtfKeywordMgr2 = new RtfCtrlWordMgr(this, this.pbReader);
680 //		endFree = Runtime.getRuntime().freeMemory();
681 //		endTime = System.currentTimeMillis();
682 //		endDate = new Date();
683 //		System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
684 //		System.out.println("RtfCtrlWordMgr end date  : " + endDate.toLocaleString());
685 //		System.out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
686 //		System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
687 //		System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
688 //		System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
689 //
690 //		System.gc();
691 //		System.out.println("3:");
692 //		startDate = new Date();
693 //		startTime = System.currentTimeMillis();
694 //		startFree = Runtime.getRuntime().freeMemory();
695 //		RtfCtrlWordMgr rtfKeywordMgr3 = new RtfCtrlWordMgr(this, this.pbReader);
696 //		endFree = Runtime.getRuntime().freeMemory();
697 //		endTime = System.currentTimeMillis();
698 //		endDate = new Date();
699 //		System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
700 //		System.out.println("RtfCtrlWordMgr end date  : " + endDate.toLocaleString());
701 //		System.out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
702 //		System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
703 //		System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
704 //		System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
705 //
706 //		System.gc();
707 //		System.out.println("4:");
708 //		startDate = new Date();
709 //		startTime = System.currentTimeMillis();
710 //		startFree = Runtime.getRuntime().freeMemory();
711 //		RtfCtrlWordMgr rtfKeywordMgr4 = new RtfCtrlWordMgr(this, this.pbReader);
712 //		endFree = Runtime.getRuntime().freeMemory();
713 //		endTime = System.currentTimeMillis();
714 //		endDate = new Date();
715 //		System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
716 //		System.out.println("RtfCtrlWordMgr end date  : " + endDate.toLocaleString());
717 //		System.out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
718 //		System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
719 //		System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
720 //		System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
721 //
722 //		System.gc();
723 //		System.out.println("5:");
724 //		startDate = new Date();
725 //		startTime = System.currentTimeMillis();
726 //		startFree = Runtime.getRuntime().freeMemory();
727 //		RtfCtrlWordMgr rtfKeywordMgr5 = new RtfCtrlWordMgr(this, this.pbReader);
728 //		endFree = Runtime.getRuntime().freeMemory();
729 //		endTime = System.currentTimeMillis();
730 //		endDate = new Date();
731 //		System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
732 //		System.out.println("RtfCtrlWordMgr end date  : " + endDate.toLocaleString());
733 //		System.out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
734 //		System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
735 //		System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
736 //		System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
737 //		System.gc();
738 //		System.out.println("At ed:");
739 //		startDate = new Date();
740 //		startTime = System.currentTimeMillis();
741 //		startFree = Runtime.getRuntime().freeMemory();
742 //		//RtfCtrlWordMgr rtfKeywordMgr6 = new RtfCtrlWordMgr(this, this.pbReader);
743 //		endFree = Runtime.getRuntime().freeMemory();
744 //		endTime = System.currentTimeMillis();
745 //		endDate = new Date();
746 //		System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString());
747 //		System.out.println("RtfCtrlWordMgr end date  : " + endDate.toLocaleString());
748 //		System.out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
749 //		System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k");
750 //		System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k");
751 //		System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k");
752 	}
753 	/**
754 	 * Initialize the statistics values.
755 	 * @since 2.1.3
756 	 */
init_stats()757 	protected void init_stats() {
758 		byteCount = 0;
759 		ctrlWordCount = 0;
760 		openGroupCount = 0;
761 		closeGroupCount = 0;
762 		characterCount = 0;
763 		ctrlWordHandledCount = 0;
764 		ctrlWordNotHandledCount = 0;
765 		ctrlWordSkippedCount = 0;
766 		groupSkippedCount = 0;
767 		startTime = 0;
768 		endTime = 0;
769 		startDate = null;
770 		endDate = null;
771 	}
772 
773 	/**
774 	 * Casts the input reader to a PushbackReader or
775 	 * creates a new PushbackReader from the Reader passed in.
776 	 * The reader is also transformed into a BufferedReader if necessary.
777 	 *
778 	 * @param readerIn
779 	 * 		The Reader object for the input file.
780 	 * @return
781 	 * 		PushbackReader object
782 	 * @since 2.1.3
783 	 */
init_Reader(InputStream readerIn)784 	private PushbackInputStream init_Reader(InputStream readerIn) {
785 //		Reader newReader = readerIn;
786 //		// Initializing the reader as a BufferedReader
787 //		// cut test processing time by approximately 50%
788 //		// default uses 8192 character buffer
789 //		if(!(newReader instanceof BufferedReader)) {
790 //			newReader = new BufferedReader(newReader);	// Since JDK1.1
791 //		}
792 //		// Initializing the reader as a PushbackReader is
793 //		// a requirement of the parser to be able to put back
794 //		// read ahead characters.
795 //		if(!(newReader instanceof PushbackReader)) {
796 //			newReader = new PushbackReader(newReader);	// Since JDK1.1
797 //		}
798 
799 		if(!(readerIn instanceof BufferedInputStream)) {
800 			readerIn = new BufferedInputStream(readerIn);
801 		}
802 		if(!(readerIn instanceof PushbackInputStream)) {
803 			readerIn = new PushbackInputStream(readerIn);
804 		}
805 		// return the proper reader object to the parser setup
806 		return  (PushbackInputStream)readerIn;
807 	}
808 
809 	/**
810 	 * Imports the mappings defined in the RtfImportMappings into the
811 	 * RtfImportHeader of this RtfParser2.
812 	 *
813 	 * @param importMappings
814 	 * 		The RtfImportMappings to import.
815 	 * @since 2.1.3
816 	 */
handleImportMappings(RtfImportMappings importMappings)817 	private void handleImportMappings(RtfImportMappings importMappings) {
818 		Iterator it = importMappings.getFontMappings().keySet().iterator();
819 		while(it.hasNext()) {
820 			String fontNr = (String) it.next();
821 			this.importMgr.importFont(fontNr, (String) importMappings.getFontMappings().get(fontNr));
822 		}
823 		it = importMappings.getColorMappings().keySet().iterator();
824 		while(it.hasNext()) {
825 			String colorNr = (String) it.next();
826 			this.importMgr.importColor(colorNr, (Color) importMappings.getColorMappings().get(colorNr));
827 		}
828 		it = importMappings.getListMappings().keySet().iterator();
829 		while(it.hasNext()) {
830 			String listNr = (String) it.next();
831 			this.importMgr.importList(listNr, (String)importMappings.getListMappings().get(listNr));
832 		}
833 		it = importMappings.getStylesheetListMappings().keySet().iterator();
834 		while(it.hasNext()) {
835 			String stylesheetListNr = (String) it.next();
836 			this.importMgr.importStylesheetList(stylesheetListNr, (List) importMappings.getStylesheetListMappings().get(stylesheetListNr));
837 		}
838 
839 	}
840 
841 
842 	/* *****************************************
843 	 *   DOCUMENT CONTROL METHODS
844 	 *
845 	 *   Handles -
846 	 *   handleOpenGroup: 	Open groups		- '{'
847 	 *   handleCloseGroup: 	Close groups	- '}'
848 	 *   handleCtrlWord: 	Ctrl Words		- '\...'
849 	 *   handleCharacter: 	Characters		- Plain Text, etc.
850 	 *
851 	 */
852 
853 	/**
854 	 * Handles open group tokens. ({)
855 	 *
856 	 * @return errOK if ok, other if an error occurred.
857 	 * @since 2.1.3
858 	 */
handleOpenGroup()859 	public int handleOpenGroup() {
860 		int result = errOK;
861 		this.openGroupCount++;	// stats
862 		this.groupLevel++;		// current group level in tokeniser
863 		this.docGroupLevel++;	// current group level in document
864 		if (this.getTokeniserState() == TOKENISER_SKIP_GROUP) {
865 			this.groupSkippedCount++;
866 		}
867 
868 		RtfDestination dest = this.getCurrentDestination();
869 		boolean handled = false;
870 
871 		if(dest != null) {
872 			if(debugParser) {
873 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: before dest.handleOpeningSubGroup()");
874 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + dest.toString());
875 			}
876 			handled = dest.handleOpeningSubGroup();
877 			if(debugParser) {
878 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: after dest.handleOpeningSubGroup()");
879 			}
880 		}
881 
882 		this.stackState.push(this.currentState);
883 		this.currentState = new RtfParserState(this.currentState);
884 		// do not set this true until after the state is pushed
885 		// otherwise it inserts a { where one does not belong.
886 		this.currentState.newGroup = true;
887 		dest = this.getCurrentDestination();
888 
889 		if(debugParser) {
890 			RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: handleOpenGroup()");
891 			if(this.lastCtrlWordParam != null)
892 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: LastCtrlWord=" + this.lastCtrlWordParam.ctrlWord);
893 			RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: grouplevel=" + Integer.toString(groupLevel));
894 			RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + dest.toString());
895 		}
896 
897 		if(dest != null) {
898 			handled = dest.handleOpenGroup();
899 		}
900 
901 		if(debugParser) {
902 			RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: after dest.handleOpenGroup(); handled=" + Boolean.toString(handled));
903 		}
904 
905 		return result;
906 	}
outputDebug(Object doc, int groupLevel, String str)907 	public static void outputDebug(Object doc, int groupLevel, String str) {
908 		System.out.println(str);
909 		if(doc == null) return;
910 		if(groupLevel<0) groupLevel = 0;
911 		char[] a; Arrays.fill(a= new char[groupLevel*2], ' ');
912 		String spaces= new String(a);
913 		if(doc instanceof RtfDocument) {
914 			((RtfDocument)doc).add(new RtfDirectContent("\n" + spaces + str));
915 		}
916 		else
917 			if(doc instanceof Document) {
918 				try {
919 					((Document)doc).add(new RtfDirectContent("\n" + spaces + str));
920 				} catch (DocumentException e) {
921 					// TODO Auto-generated catch block
922 					e.printStackTrace();
923 				}
924 			}
925 	}
926 	/**
927 	 * Handles close group tokens. (})
928 	 *
929 	 * @return errOK if ok, other if an error occurred.
930 	 * @since 2.1.3
931 	 */
handleCloseGroup()932 	public int handleCloseGroup() {
933 		int result = errOK;
934 		this.closeGroupCount++;	// stats
935 
936 		if (this.getTokeniserState() != TOKENISER_SKIP_GROUP) {
937 			if(debugParser) {
938 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: handleCloseGroup()");
939 				if(this.lastCtrlWordParam != null)
940 					RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: LastCtrlWord=" + this.lastCtrlWordParam.ctrlWord);
941 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: grouplevel=" + Integer.toString(groupLevel));
942 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + this.getCurrentDestination().toString());
943 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "");
944 			}
945 			RtfDestination dest = this.getCurrentDestination();
946 			boolean handled = false;
947 
948 			if(dest != null) {
949 				handled = dest.handleCloseGroup();
950 			}
951 			if(debugParser) {
952 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: After dest.handleCloseGroup(); handled = " + Boolean.toString(handled));
953 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "");
954 			}
955 		}
956 
957 		if(this.stackState.size() >0 ) {
958 			this.currentState = (RtfParserState)this.stackState.pop();
959 		} else {
960 			result = errStackUnderflow;
961 		}
962 
963 		this.docGroupLevel--;
964 		this.groupLevel--;
965 
966 		if (this.getTokeniserState() == TOKENISER_SKIP_GROUP && this.groupLevel < this.skipGroupLevel) {
967 			this.setTokeniserState(TOKENISER_NORMAL);
968 		}
969 
970 		return result;
971 	}
972 
973 
974 	/**
975 	 * Handles control word tokens. Depending on the current
976 	 * state a control word can lead to a state change. When
977 	 * parsing the actual document contents, certain tabled
978 	 * values are remapped. i.e. colors, fonts, styles, etc.
979 	 *
980 	 * @param ctrlWordData The control word to handle.
981 	 * @return errOK if ok, other if an error occurred.
982 	 * @since 2.1.3
983 	 */
handleCtrlWord(RtfCtrlWordData ctrlWordData)984 	public int handleCtrlWord(RtfCtrlWordData ctrlWordData) {
985 		int result = errOK;
986 		this.ctrlWordCount++; // stats
987 
988 		if(debugParser) {
989 			RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: handleCtrlWord=" + ctrlWordData.ctrlWord + " param=[" + ctrlWordData.param + "]");
990 		}
991 
992 		if (this.getTokeniserState() == TOKENISER_SKIP_GROUP) {
993 			this.ctrlWordSkippedCount++;
994 			if(debugParser) {
995 				RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: SKIPPED");
996 			}
997 			return result;
998 		}
999 
1000 		//		RtfDestination dest = (RtfDestination)this.getCurrentDestination();
1001 //		boolean handled = false;
1002 //		if(dest != null) {
1003 //			handled = dest.handleControlWord(ctrlWordData);
1004 //		}
1005 
1006 		result = this.rtfKeywordMgr.handleKeyword(ctrlWordData, this.groupLevel);
1007 
1008 		if( result == errOK){
1009 			this.ctrlWordHandledCount++;
1010 		} else {
1011 			this.ctrlWordNotHandledCount++;
1012 			result = errOK;	// hack for now.
1013 		}
1014 
1015 		return result;
1016 	}
1017 
1018 	/**
1019 	 * Handles text tokens. These are either handed on to the
1020 	 * appropriate destination handler.
1021 	 *
1022 	 * @param nextChar
1023 	 * 		The text token to handle.
1024 	 * @return errOK if ok, other if an error occurred.
1025 	 * @since 2.1.3
1026 	 */
1027 //	public int handleCharacter(char[] nextChar) {
handleCharacter(int nextChar)1028 	public int handleCharacter(int nextChar) {
1029 		this.characterCount++;	// stats
1030 
1031 		if (this.getTokeniserState() == TOKENISER_SKIP_GROUP) {
1032 			return errOK;
1033 		}
1034 
1035 		boolean handled = false;
1036 
1037 		RtfDestination dest = this.getCurrentDestination();
1038 		if(dest != null) {
1039 			handled = dest.handleCharacter(nextChar);
1040 		}
1041 
1042 		return errOK;
1043 	}
1044 
1045 	/**
1046 	 * Get the state of the parser.
1047 	 *
1048 	 * @return
1049 	 * 		The current RtfParserState state object.
1050 	 * @since 2.1.3
1051 	 */
getState()1052 	public RtfParserState getState(){
1053 		return this.currentState;
1054 	}
1055 
1056 	/**
1057 	 * Get the current state of the parser.
1058 	 *
1059 	 * @return
1060 	 * 		The current state of the parser.
1061 	 * @since 2.1.3
1062 	 */
getParserState()1063 	public int getParserState(){
1064 		return this.currentState.parserState;
1065 	}
1066 
1067 	/**
1068 	 * Set the state value of the parser.
1069 	 *
1070 	 * @param newState
1071 	 * 		The new state for the parser
1072 	 * @return
1073 	 * 		The state of the parser.
1074 	 * @since 2.1.3
1075 	 */
setParserState(int newState)1076 	public int setParserState(int newState){
1077 		this.currentState.parserState = newState;
1078 		return this.currentState.parserState;
1079 	}
1080 
1081 	/**
1082 	 * Get the conversion type.
1083 	 *
1084 	 * @return
1085 	 * 		The type of the conversion. Import or Convert.
1086 	 * @since 2.1.3
1087 	 */
getConversionType()1088 	public int getConversionType() {
1089 		return this.conversionType;
1090 	}
1091 
1092 	/**
1093 	 * Get the RTF Document object.
1094 	 * @return
1095 	 * 		Returns the object rtfDoc.
1096 	 * @since 2.1.3
1097 	 */
getRtfDocument()1098 	public RtfDocument getRtfDocument() {
1099 		return this.rtfDoc;
1100 	}
1101 
1102 	/**
1103 	 * Get the Document object.
1104 	 * @return
1105 	 * 		Returns the object rtfDoc.
1106 	 * @since 2.1.3
1107 	 */
getDocument()1108 	public Document getDocument() {
1109 		return this.document;
1110 	}
1111 
1112 	/**
1113 	 * Get the RtfImportHeader object.
1114 	 * @return
1115 	 * 		Returns the object importHeader.
1116 	 * @since 2.1.3
1117 	 */
getImportManager()1118 	public RtfImportMgr getImportManager() {
1119 		return importMgr;
1120 	}
1121 
1122 
1123 	/////////////////////////////////////////////////////////////
1124 	// accessors for destinations
1125 	/**
1126 	 * Set the current destination object for the current state.
1127 	 * @param destination The destination value to set.
1128 	 * @since 2.1.3
1129 	 */
setCurrentDestination(String destination)1130 	public boolean setCurrentDestination(String destination) {
1131 			RtfDestination dest = RtfDestinationMgr.getDestination(destination);
1132 			if(dest != null) {
1133 				this.currentState.destination = dest;
1134 				return false;
1135 			} else {
1136 				this.setTokeniserStateSkipGroup();
1137 				return false;
1138 			}
1139 	}
1140 	/**
1141 	 * Get the current destination object.
1142 	 *
1143 	 * @return The current state destination
1144 	 * @since 2.1.3
1145 	 */
getCurrentDestination()1146 	public RtfDestination getCurrentDestination() {
1147 		return this.currentState.destination;
1148 	}
1149 	/**
1150 	 * Get a destination from the map
1151 	 *
1152 	 * @param destination The string destination.
1153 	 * @return The destination object from the map
1154 	 * @since 2.1.3
1155 	 */
getDestination(String destination)1156 	public RtfDestination getDestination(String destination) {
1157 		return RtfDestinationMgr.getDestination(destination);
1158 	}
1159 
1160 	/**
1161 	 * Helper method to determine if this is a new group.
1162 	 *
1163 	 * @return true if this is a new group, otherwise it returns false.
1164 	 * @since 2.1.3
1165 	 */
isNewGroup()1166 	public boolean isNewGroup() {
1167 		return this.currentState.newGroup;
1168 	}
1169 	/**
1170 	 * Helper method to set the new group flag
1171 	 * @param value The boolean value to set the flag
1172 	 * @return The value of newGroup
1173 	 * @since 2.1.3
1174 	 */
setNewGroup(boolean value)1175 	public boolean setNewGroup(boolean value) {
1176 		this.currentState.newGroup = value;
1177 		return this.currentState.newGroup;
1178 	}
1179 
1180 	/* ************
1181 	 *  TOKENISER *
1182 	 **************/
1183 
1184 	/**
1185 	 * Read through the input file and parse the data stream into tokens.
1186 	 *
1187 	 * @throws IOException on IO error.
1188 	 * @since 2.1.3
1189 	 */
tokenise()1190 	public void tokenise() throws IOException {
1191 		int errorCode = errOK;	// error code
1192 		int nextChar = 0;
1193 //		char[] nextChar = new char[1]; // input variable
1194 //		nextChar[0]=0;	// set to 0
1195 		this.setTokeniserState(TOKENISER_NORMAL);	// set initial tokeniser state
1196 
1197 
1198 //		while(this.pbReader.read(nextChar) != -1) {
1199 		while((nextChar = this.pbReader.read()) != -1) {
1200 			this.byteCount++;
1201 
1202 	        if (this.getTokeniserState() == TOKENISER_BINARY)                      // if we're parsing binary data, handle it directly
1203 	        {
1204 	            if ((errorCode = parseChar(nextChar)) != errOK)
1205 	                return;
1206 	        }  else {
1207 //				switch(nextChar[0]) {
1208 				switch(nextChar) {
1209 					case '{':	// scope delimiter - Open
1210 						this.handleOpenGroup();
1211 						break;
1212 					case '}':  // scope delimiter - Close
1213 						this.handleCloseGroup();
1214 						break;
1215 					case 0x0a:	// noise character
1216 					case 0x0d:	// noise character
1217 //						if(this.isImport()) {
1218 //							this.rtfDoc.add(new RtfDirectContent(new String(nextChar)));
1219 //						}
1220 						break;
1221 					case '\\':	// Control word start delimiter
1222 							if(parseCtrlWord(pbReader) != errOK) {
1223 							// TODO: Indicate some type of error
1224 							return;
1225 						}
1226 						break;
1227 					default:
1228 						if(groupLevel == 0) { // BOMs
1229 							break;
1230 						}
1231 						if(this.getTokeniserState() == TOKENISER_HEX) {
1232 							StringBuffer hexChars = new StringBuffer();
1233 							hexChars.append(nextChar);
1234 //							if(pbReader.read(nextChar) == -1) {
1235 							if((nextChar = pbReader.read()) == -1) {
1236 								return;
1237 							}
1238 							this.byteCount++;
1239 							hexChars.append(nextChar);
1240 	                    	try {
1241 //								nextChar[0]=(char)Integer.parseInt(hexChars.toString(), 16);
1242 								nextChar=Integer.parseInt(hexChars.toString(), 16);
1243 							} catch (NumberFormatException e) {
1244 								return;
1245 							}
1246 		                    this.setTokeniserState(TOKENISER_NORMAL);
1247 						}
1248 						if ((errorCode = parseChar(nextChar)) != errOK) {
1249                         	return; // some error occurred. we should send a
1250 									// real error
1251 						}
1252 						break;
1253 				}	// switch(nextChar[0])
1254 			}	// end if (this.getTokeniserState() == TOKENISER_BINARY)
1255 
1256 //	        if(groupLevel < 1 && this.isImportFragment()) return; //return errOK;
1257 //	        if(groupLevel < 0 && this.isImportFull()) return; //return errStackUnderflow;
1258 //	        if(groupLevel < 0 && this.isConvert()) return; //return errStackUnderflow;
1259 
1260 		}// end while(reader.read(nextChar) != -1)
1261 		RtfDestination dest = this.getCurrentDestination();
1262 		if(dest != null) {
1263 			dest.closeDestination();
1264 		}
1265 	}
1266 
1267 	/**
1268 	 * Process the character and send it to the current destination.
1269 	 * @param nextChar
1270 	 * 		The character to process
1271 	 * @return
1272 	 * 		Returns an error code or errOK if no error.
1273 	 * @since 2.1.3
1274 	 */
parseChar(int nextChar)1275 	private int parseChar(int nextChar) {
1276 		// figure out where to put the character
1277 		// needs to handle group levels for parsing
1278 		// examples
1279 		/*
1280 		 * {\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}
1281 		 * {\f7\fswiss\fcharset0\fprq2{\*\panose 020b0604020202030204}Helv{\*\falt Arial};} <- special case!!!!
1282 		 * {\f5\froman\fcharset0 Tahoma;}
1283 		 * {\f6\froman\fcharset0 Arial Black;}
1284 		 * {\info(\author name}{\company company name}}
1285 		 * ... document text ...
1286 		 */
1287 	    if (this.getTokeniserState() == TOKENISER_BINARY && --binByteCount <= 0)
1288 	    	this.setTokeniserStateNormal();
1289 	    if (this.getTokeniserState() == TOKENISER_SKIP_BYTES && --binSkipByteCount <= 0)
1290 	    	this.setTokeniserStateNormal();
1291 	    return this.handleCharacter(nextChar);
1292 	}
1293 
1294 	/**
1295 	 * Parses a keyword and it's parameter if one exists
1296 	 * @param reader
1297 	 * 		This is a pushback reader for file input.
1298 	 * @return
1299 	 * 		Returns an error code or errOK if no error.
1300 	 * @throws IOException
1301 	 * 		Catch any file read problem.
1302 	 * @since 2.1.3
1303 	 */
parseCtrlWord(PushbackInputStream reader)1304 	private int parseCtrlWord(PushbackInputStream reader) throws IOException {
1305 		int nextChar = 0;
1306 		int result = errOK;
1307 
1308 		if((nextChar = reader.read()) == -1) {
1309 			return errEndOfFile;
1310 		}
1311 		this.byteCount++;
1312 
1313 		StringBuffer parsedCtrlWord = new StringBuffer();
1314 		StringBuffer parsedParam= new StringBuffer();
1315 		RtfCtrlWordData ctrlWordParam = new RtfCtrlWordData();
1316 
1317 		if(!Character.isLetterOrDigit((char)nextChar)) {
1318 			parsedCtrlWord.append((char)nextChar);
1319 			ctrlWordParam.ctrlWord = parsedCtrlWord.toString();
1320 			result =  this.handleCtrlWord(ctrlWordParam);
1321 			lastCtrlWordParam = ctrlWordParam;
1322 			return result;
1323 		}
1324 
1325 		do {
1326 			parsedCtrlWord.append((char)nextChar);
1327 			//TODO: catch EOF
1328 			nextChar = reader.read();
1329 			this.byteCount++;
1330 		} while  (Character.isLetter((char)nextChar));
1331 
1332 		ctrlWordParam.ctrlWord = parsedCtrlWord.toString();
1333 
1334 		if(nextChar == '-') {
1335 			ctrlWordParam.isNeg = true;
1336 			if((nextChar = reader.read()) == -1) {
1337 					return errEndOfFile;
1338 			}
1339 			this.byteCount++;
1340 		}
1341 
1342 
1343 		if(Character.isDigit((char)nextChar)) {
1344 			ctrlWordParam.hasParam = true;
1345 			do {
1346 				parsedParam.append((char)nextChar);
1347 				//TODO: catch EOF
1348 				nextChar = reader.read();
1349 				this.byteCount++;
1350 				} while  (Character.isDigit((char)nextChar));
1351 
1352 			ctrlWordParam.param = parsedParam.toString();
1353 		}
1354 
1355 		// push this character back into the stream
1356 		if(nextChar != ' ') {
1357 			reader.unread(nextChar);
1358 		}
1359 
1360 	    if(debugParser) {
1361 	//	    // debug: insrsid6254399
1362 	//	    if(ctrlWordParam.ctrlWord.equals("proptype") && ctrlWordParam.param.equals("30")) {
1363 	//	    	System.out.print("Debug value found\n");
1364 	//	    }
1365 //		    if(ctrlWordParam.ctrlWord.equals("cf") ) {
1366 //		    	System.out.print("Debug value found\n");
1367 //		    }
1368 	    }
1369 
1370 		result = this.handleCtrlWord(ctrlWordParam);
1371 		lastCtrlWordParam = ctrlWordParam;
1372 		return result;
1373 
1374 	}
1375 
1376 	/**
1377 	 * Set the current state of the tokeniser.
1378 	 * @param value The new state of the tokeniser.
1379 	 * @return The state of the tokeniser.
1380 	 * @since 2.1.3
1381 	 */
setTokeniserState(int value)1382 	public int setTokeniserState(int value) {
1383 		this.currentState.tokeniserState = value;
1384 		return this.currentState.tokeniserState;
1385 	}
1386 
1387 	/**
1388 	 * Get the current state of the tokeniser.
1389 	 * @return The current state of the tokeniser.
1390 	 * @since 2.1.3
1391 	 */
getTokeniserState()1392 	public int getTokeniserState() {
1393 		return this.currentState.tokeniserState;
1394 	}
1395 
1396 	/**
1397 	 * Gets the current group level
1398 	 *
1399 	 * @return
1400 	 * 		The current group level value.
1401 	 * @since 2.1.3
1402 	 */
getLevel()1403 	public int getLevel() {
1404 		return this.groupLevel;
1405 	}
1406 
1407 
1408 	/**
1409 	 * Set the tokeniser state to skip to the end of the group.
1410 	 * Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
1411 	 * @since 2.1.3
1412 	 */
setTokeniserStateNormal()1413 	public void setTokeniserStateNormal() {
1414 		this.setTokeniserState(TOKENISER_NORMAL);
1415 	}
1416 
1417 	/**
1418 	 * Set the tokeniser state to skip to the end of the group.
1419 	 * Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level.
1420 	 * @since 2.1.3
1421 	 */
setTokeniserStateSkipGroup()1422 	public void setTokeniserStateSkipGroup() {
1423 		this.setTokeniserState(TOKENISER_SKIP_GROUP);
1424 		this.skipGroupLevel = this.groupLevel;
1425 	}
1426 
1427 	/**
1428 	 * Sets the number of bytes to skip and the state of the tokeniser.
1429 	 *
1430 	 * @param numberOfBytesToSkip
1431 	 * 			The numbere of bytes to skip in the file.
1432 	 * @since 2.1.3
1433 	 */
setTokeniserSkipBytes(long numberOfBytesToSkip)1434 	public void setTokeniserSkipBytes(long numberOfBytesToSkip) {
1435 		this.setTokeniserState(TOKENISER_SKIP_BYTES);
1436 		this.binSkipByteCount = numberOfBytesToSkip;
1437 	}
1438 
1439 	/**
1440 	 * Sets the number of binary bytes.
1441 	 *
1442 	 * @param binaryCount
1443 	 * 			The number of binary bytes.
1444 	 * @since 2.1.3
1445 	 */
setTokeniserStateBinary(int binaryCount)1446 	public void setTokeniserStateBinary(int binaryCount) {
1447 		this.setTokeniserState(TOKENISER_BINARY);
1448 		this.binByteCount = binaryCount;
1449 	}
1450 	/**
1451 	 * Sets the number of binary bytes.
1452 	 *
1453 	 * @param binaryCount
1454 	 * 			The number of binary bytes.
1455 	 * @since 2.1.3
1456 	 */
setTokeniserStateBinary(long binaryCount)1457 	public void setTokeniserStateBinary(long binaryCount) {
1458 		this.setTokeniserState(TOKENISER_BINARY);
1459 		this.binByteCount = binaryCount;
1460 	}
1461 	/**
1462 	 * Helper method to determin if conversion is TYPE_CONVERT
1463 	 * @return true if TYPE_CONVERT, otherwise false
1464 	 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_CONVERT
1465 	 * @since 2.1.3
1466 	 */
isConvert()1467 	public boolean isConvert() {
1468 		return (this.getConversionType() == RtfParser.TYPE_CONVERT);
1469 	}
1470 
1471 	/**
1472 	 * Helper method to determin if conversion is TYPE_IMPORT_FULL or TYPE_IMPORT_FRAGMENT
1473 	 * @return true if TYPE_CONVERT, otherwise false
1474 	 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL
1475 	 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT
1476 	 * @since 2.1.3
1477 	 */
isImport()1478 	public boolean isImport() {
1479 		return (isImportFull() || this.isImportFragment());
1480 	}
1481 	/**
1482 	 * Helper method to determin if conversion is TYPE_IMPORT_FULL
1483 	 * @return true if TYPE_CONVERT, otherwise false
1484 	 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL
1485 	 * @since 2.1.3
1486 	 */
isImportFull()1487 	public boolean isImportFull() {
1488 		return (this.getConversionType() == RtfParser.TYPE_IMPORT_FULL);
1489 	}
1490 	/**
1491 	 * Helper method to determin if conversion is TYPE_IMPORT_FRAGMENT
1492 	 * @return true if TYPE_CONVERT, otherwise false
1493 	 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT
1494 	 * @since 2.1.3
1495 	 */
isImportFragment()1496 	public boolean isImportFragment() {
1497 		return (this.getConversionType() == RtfParser.TYPE_IMPORT_FRAGMENT);
1498 	}
1499 	/**
1500 	 * Helper method to indicate if this control word was a \* control word.
1501 	 * @return true if it was a \* control word, otherwise false
1502 	 * @since 2.1.3
1503 	 */
getExtendedDestination()1504 	public boolean getExtendedDestination() {
1505 		return this.currentState.isExtendedDestination;
1506 	}
1507 	/**
1508 	 * Helper method to set the extended control word flag.
1509 	 * @param value Boolean to set the value to.
1510 	 * @return isExtendedDestination.
1511 	 * @since 2.1.3
1512 	 */
setExtendedDestination(boolean value)1513 	public boolean setExtendedDestination(boolean value) {
1514 		this.currentState.isExtendedDestination = value;
1515 		return this.currentState.isExtendedDestination;
1516 	}
1517 
1518 	/**
1519 	 * Get the logfile name.
1520 	 *
1521 	 * @return the logFile
1522 	 * @since 2.1.3
1523 	 */
getLogFile()1524 	public String getLogFile() {
1525 		return logFile;
1526 	}
1527 
1528 	/**
1529 	 * Set the logFile name
1530 	 *
1531 	 * @param logFile the logFile to set
1532 	 * @since 2.1.3
1533 	 */
setLogFile(String logFile)1534 	public void setLogFile(String logFile) {
1535 		this.logFile = logFile;
1536 	}
1537 	/**
1538 	 * Set the logFile name
1539 	 *
1540 	 * @param logFile the logFile to set
1541 	 * @since 2.1.3
1542 	 */
setLogFile(String logFile, boolean logAppend)1543 	public void setLogFile(String logFile, boolean logAppend) {
1544 		this.logFile = logFile;
1545 		this.setLogAppend(logAppend);
1546 	}
1547 
1548 	/**
1549 	 * Get flag indicating if logging is on or off.
1550 	 *
1551 	 * @return the logging
1552 	 * @since 2.1.3
1553 	 */
isLogging()1554 	public boolean isLogging() {
1555 		return logging;
1556 	}
1557 
1558 	/**
1559 	 * Set flag indicating if logging is on or off
1560 	 * @param logging <code>true</code> to turn on logging, <code>false</code> to turn off logging.
1561 	 * @since 2.1.3
1562 	 */
setLogging(boolean logging)1563 	public void setLogging(boolean logging) {
1564 		this.logging = logging;
1565 	}
1566 
1567 	/**
1568 	 * @return the logAppend
1569 	 * @since 2.1.3
1570 	 */
isLogAppend()1571 	public boolean isLogAppend() {
1572 		return logAppend;
1573 	}
1574 
1575 	/**
1576 	 * @param logAppend the logAppend to set
1577 	 * @since 2.1.3
1578 	 */
setLogAppend(boolean logAppend)1579 	public void setLogAppend(boolean logAppend) {
1580 		this.logAppend = logAppend;
1581 	}
1582 
1583 /*
1584  *	Statistics
1585  *
1586  	public void printStats(PrintStream out) {
1587 		if(out == null) return;
1588 
1589 		out.println("");
1590 		out.println("Parser statistics:");
1591 		out.println("Process start date: " + startDate.toLocaleString());
1592 		out.println("Process end date  : " + endDate.toLocaleString());
1593 		out.println("  Elapsed time    : " + Long.toString(endTime - startTime) + " milliseconds.");
1594 		out.println("Total bytes read  : " + Long.toString(byteCount));
1595 		out.println("Open group count  : " + Long.toString(openGroupCount));
1596 		out.print("Close group count : " + Long.toString(closeGroupCount));
1597 		out.println(" (Groups Skipped): " + Long.toString(groupSkippedCount));
1598 		out.print("Control word count: " + Long.toString(ctrlWordCount));
1599 		out.print(" - Handled: " + Long.toString(ctrlWordHandledCount));
1600 		out.print(" Not Handled: " + Long.toString(ctrlWordNotHandledCount));
1601 		out.println(" Skipped: " + Long.toString(ctrlWordSkippedCount));
1602 		out.println("Plain text char count: " + Long.toString(characterCount));
1603 	}*/
1604 }
1605