1 /* 2 * $Id$ 3 * 4 * Copyright 2007 by Howard Shank (hgshank@yahoo.com) 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 1.1 7 * (the "License"); you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at http://www.mozilla.org/MPL/ 9 * 10 * Software distributed under the License is distributed on an "AS IS" basis, 11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 * for the specific language governing rights and limitations under the License. 13 * 14 * The Original Code is 'iText, a free JAVA-PDF library'. 15 * 16 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by 17 * the Initial Developer are Copyright (C) 1999-2006 by Bruno Lowagie. 18 * All Rights Reserved. 19 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer 20 * are Copyright (C) 2000-2006 by Paulo Soares. All Rights Reserved. 21 * 22 * Contributor(s): all the names of the contributors are added in the source code 23 * where applicable. 24 * 25 * Alternatively, the contents of this file may be used under the terms of the 26 * LGPL license (the ?GNU LIBRARY GENERAL PUBLIC LICENSE?), in which case the 27 * provisions of LGPL are applicable instead of those above. If you wish to 28 * allow use of your version of this file only under the terms of the LGPL 29 * License and not to allow others to use your version of this file under 30 * the MPL, indicate your decision by deleting the provisions above and 31 * replace them with the notice and other provisions required by the LGPL. 32 * If you do not delete the provisions above, a recipient may use your version 33 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. 34 * 35 * This library is free software; you can redistribute it and/or modify it 36 * under the terms of the MPL as stated above or under the terms of the GNU 37 * Library General Public License as published by the Free Software Foundation; 38 * either version 2 of the License, or any later version. 39 * 40 * This library is distributed in the hope that it will be useful, but WITHOUT 41 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 42 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more 43 * details. 44 * 45 * If you didn't download this code from the following link, you should check if 46 * you aren't using an obsolete version: 47 * http://www.lowagie.com/iText/ 48 */ 49 package com.lowagie.text.rtf.parser; 50 51 import java.awt.Color; 52 import java.io.BufferedInputStream; 53 import java.io.IOException; 54 import java.io.InputStream; 55 import java.io.PushbackInputStream; 56 import java.util.ArrayList; 57 import java.util.Arrays; 58 import java.util.Date; 59 import java.util.EventListener; 60 import java.util.Iterator; 61 import java.util.Stack; 62 63 import com.lowagie.text.Document; 64 import com.lowagie.text.DocumentException; 65 import com.lowagie.text.Element; 66 import com.lowagie.text.List; 67 import com.lowagie.text.rtf.direct.RtfDirectContent; 68 import com.lowagie.text.rtf.document.RtfDocument; 69 import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordData; 70 import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordListener; 71 import com.lowagie.text.rtf.parser.ctrlwords.RtfCtrlWordMgr; 72 import com.lowagie.text.rtf.parser.destinations.RtfDestination; 73 import com.lowagie.text.rtf.parser.destinations.RtfDestinationMgr; 74 75 /** 76 * The RtfParser allows the importing of RTF documents or 77 * RTF document fragments. The RTF document or fragment is tokenised, 78 * font and color definitions corrected and then added to 79 * the document being written. 80 * 81 * @author Mark Hall (Mark.Hall@mail.room3b.eu) 82 * @author Howard Shank (hgshank@yahoo.com) 83 * @since 2.0.8 84 */ 85 86 public class RtfParser { 87 /** 88 * Debugging flag. 89 */ 90 private static final boolean debugParser = false; // DEBUG Files are unlikely to be read by any reader! 91 private String logFile = null; 92 private boolean logging = false; 93 private boolean logAppend = false; 94 95 /** 96 * The iText element to add the RTF document to. 97 * @since 2.1.3 98 */ 99 private Element elem = null; 100 /** 101 * The iText document to add the RTF document to. 102 */ 103 private Document document = null; 104 /** 105 * The RtfDocument to add the RTF document or fragment to. 106 */ 107 private RtfDocument rtfDoc = null; 108 /** 109 * The RtfKeywords that creates and handles keywords that are implemented. 110 */ 111 private RtfCtrlWordMgr rtfKeywordMgr = null; 112 /** 113 * The RtfImportHeader to store imported font and color mappings in. 114 */ 115 private RtfImportMgr importMgr = null; 116 /** 117 * The RtfDestinationMgr object to manage destinations. 118 */ 119 private RtfDestinationMgr destinationMgr = null; 120 /** 121 * Stack for saving states for groups 122 */ 123 private Stack stackState = null; 124 /** 125 * The current parser state. 126 */ 127 private RtfParserState currentState = null; 128 /** 129 * The pushback reader to read the input stream. 130 */ 131 private PushbackInputStream pbReader = null; 132 /** 133 * Conversion type. Identifies if we are doing in import or a convert. 134 */ 135 private int conversionType = TYPE_IMPORT_FULL; 136 137 138 /* 139 * Bitmapping: 140 * 141 * 0111 1111 1111 1111 = Unkown state 142 * 0xxx xxxx xxxx xxxx = In Header 143 * 1xxx xxxx xxxx xxxx = In Document 144 * 2xxx xxxx xxxx xxxx = Reserved 145 * 4xxx xxxx xxxx xxxx = Other 146 * 8xxx xxxx xxxx xxxx = Errors 147 */ 148 149 /* 150 * Header state values 151 */ 152 153 /** 154 * Currently the RTF document header is being parsed. 155 */ 156 public static final int PARSER_IN_HEADER = (0x0 << 28) | 0x000000; 157 /** 158 * Currently the RTF charset is being parsed. 159 */ 160 public static final int PARSER_IN_CHARSET = PARSER_IN_HEADER | 0x000001; 161 /** 162 * Currently the RTF deffont is being parsed. 163 */ 164 public static final int PARSER_IN_DEFFONT = PARSER_IN_HEADER | 0x000002; 165 /** 166 * Currently the RTF font table is being parsed. 167 */ 168 public static final int PARSER_IN_FONT_TABLE = PARSER_IN_HEADER | 0x000003; 169 /** 170 * Currently a RTF font table info element is being parsed. 171 */ 172 public static final int PARSER_IN_FONT_TABLE_INFO = PARSER_IN_HEADER | 0x000004; 173 /** 174 * Currently the RTF filetbl is being parsed. 175 */ 176 public static final int PARSER_IN_FILE_TABLE = PARSER_IN_HEADER | 0x000005; 177 /** 178 * Currently the RTF color table is being parsed. 179 */ 180 public static final int PARSER_IN_COLOR_TABLE = PARSER_IN_HEADER | 0x000006; 181 /** 182 * Currently the RTF stylesheet is being parsed. 183 */ 184 public static final int PARSER_IN_STYLESHEET = PARSER_IN_HEADER | 0x000007; 185 /** 186 * Currently the RTF listtables is being parsed. 187 */ 188 public static final int PARSER_IN_LIST_TABLE = PARSER_IN_HEADER | 0x000008; 189 /** 190 * Currently the RTF listtable override is being parsed. 191 */ 192 public static final int PARSER_IN_LISTOVERRIDE_TABLE = PARSER_IN_HEADER | 0x000009; 193 /** 194 * Currently the RTF revtbl is being parsed. 195 */ 196 public static final int PARSER_IN_REV_TABLE = PARSER_IN_HEADER | 0x00000A; 197 /** 198 * Currently the RTF rsidtable is being parsed. 199 */ 200 public static final int PARSER_IN_RSID_TABLE = PARSER_IN_HEADER | 0x0000B; 201 /** 202 * Currently the RTF generator is being parsed. 203 */ 204 public static final int PARSER_IN_GENERATOR = PARSER_IN_HEADER | 0x00000C; 205 /** 206 * Currently the RTF Paragraph group properties Table (word 2002) 207 */ 208 public static final int PARSER_IN_PARAGRAPH_TABLE = PARSER_IN_HEADER | 0x00000E; 209 /** 210 * Currently the RTF Old Properties. 211 */ 212 public static final int PARSER_IN_OLDCPROPS = PARSER_IN_HEADER | 0x00000F; 213 /** 214 * Currently the RTF Old Properties. 215 */ 216 public static final int PARSER_IN_OLDPPROPS = PARSER_IN_HEADER | 0x000010; 217 /** 218 * Currently the RTF Old Properties. 219 */ 220 public static final int PARSER_IN_OLDTPROPS = PARSER_IN_HEADER | 0x000012; 221 /** 222 * Currently the RTF Old Properties. 223 */ 224 public static final int PARSER_IN_OLDSPROPS = PARSER_IN_HEADER | 0x000013; 225 /** 226 * Currently the RTF User Protection Information. 227 */ 228 public static final int PARSER_IN_PROT_USER_TABLE = PARSER_IN_HEADER | 0x000014; 229 /** 230 * Currently the Latent Style and Formatting usage restrictions 231 */ 232 public static final int PARSER_IN_LATENTSTYLES = PARSER_IN_HEADER | 0x000015; 233 234 public static final int PARSER_IN_PARAGRAPH_GROUP_PROPERTIES =PARSER_IN_HEADER | 0x000016; 235 236 /* 237 * Document state values 238 */ 239 240 /** 241 * Currently the RTF document content is being parsed. 242 */ 243 public static final int PARSER_IN_DOCUMENT = (0x2 << 28 ) | 0x000000; 244 245 /** 246 * Currently the RTF info group is being parsed. 247 */ 248 public static final int PARSER_IN_INFO_GROUP = PARSER_IN_DOCUMENT | 0x000001; 249 250 251 public static final int PARSER_IN_UPR = PARSER_IN_DOCUMENT | 0x000002; 252 /** 253 * Currently a shppict control word is being parsed. 254 */ 255 public static final int PARSER_IN_SHPPICT = PARSER_IN_DOCUMENT | 0x000010; //16 256 /** 257 * Currently a pict control word is being parsed. 258 */ 259 public static final int PARSER_IN_PICT = PARSER_IN_DOCUMENT | 0x000011; //17 260 /** 261 * Currently a picprop control word is being parsed. 262 */ 263 public static final int PARSER_IN_PICPROP = PARSER_IN_DOCUMENT | 0x000012; //18 264 /** 265 * Currently a blipuid control word is being parsed. 266 */ 267 public static final int PARSER_IN_BLIPUID = PARSER_IN_DOCUMENT | 0x000013; //19 268 269 /* other states */ 270 /** 271 * The parser is at the beginning or the end of the file. 272 */ 273 public static final int PARSER_STARTSTOP = (0x4 << 28)| 0x0001; 274 /* ERRORS */ 275 /** 276 * Currently the parser is in an error state. 277 */ 278 public static final int PARSER_ERROR = (0x8 << 28) | 0x0000; 279 /** 280 * The parser reached the end of the file. 281 */ 282 public static final int PARSER_ERROR_EOF = PARSER_ERROR | 0x0001; 283 /** 284 * Currently the parser is in an unknown state. 285 */ 286 public static final int PARSER_IN_UNKNOWN = PARSER_ERROR | 0x0FFFFFFF; 287 288 289 /** 290 * Conversion type is unknown 291 */ 292 public static final int TYPE_UNIDENTIFIED = -1; 293 /** 294 * Conversion type is an import. Uses direct content to add everything. 295 * This is what the original import does. 296 */ 297 public static final int TYPE_IMPORT_FULL = 0; 298 /** 299 * Conversion type is an import of a partial file/fragment. Uses direct content to add everything. 300 */ 301 public static final int TYPE_IMPORT_FRAGMENT = 1; 302 /** 303 * Conversion type is a conversion. This uses the document (not rtfDoc) to add 304 * all the elements making it a different supported documents depending on the writer used. 305 */ 306 public static final int TYPE_CONVERT = 2; 307 /** 308 * Conversion type to import a document into an element. i.e. Chapter, Section, Table Cell, etc. 309 * @since 2.1.4 310 */ 311 public static final int TYPE_IMPORT_INTO_ELEMENT = 3; 312 313 314 /** 315 * Destination is normal. Text is processed. 316 */ 317 public static final int DESTINATION_NORMAL = 0; 318 /** 319 * Destination is skipping. Text is ignored. 320 */ 321 public static final int DESTINATION_SKIP = 1; 322 323 //////////////////////////////////// TOKENISE VARIABLES /////////////////// 324 /* 325 * State flags use 4/28 bitmask. 326 * First 4 bits (nibble) indicates major state. Used for unknown and error 327 * Last 28 bits indicates the value; 328 */ 329 330 /** 331 * The RtfTokeniser is in its ground state. Any token may follow. 332 */ 333 public static final int TOKENISER_NORMAL = 0x00000000; 334 /** 335 * The last token parsed was a slash. 336 */ 337 public static final int TOKENISER_SKIP_BYTES = 0x00000001; 338 /** 339 * The RtfTokeniser is currently tokenising a control word. 340 */ 341 public static final int TOKENISER_SKIP_GROUP = 0x00000002; 342 /** 343 * The RtfTokeniser is currently reading binary stream. 344 */ 345 public static final int TOKENISER_BINARY= 0x00000003; 346 /** 347 * The RtfTokeniser is currently reading hex data. 348 */ 349 public static final int TOKENISER_HEX= 0x00000004; 350 /** 351 * The RtfTokeniser ignore result 352 */ 353 public static final int TOKENISER_IGNORE_RESULT= 0x00000005; 354 /** 355 * The RtfTokeniser is currently in error state 356 */ 357 public static final int TOKENISER_STATE_IN_ERROR = 0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000 358 /** 359 * The RtfTokeniser is currently in an unkown state 360 */ 361 public static final int TOKENISER_STATE_IN_UNKOWN = 0xFF000000; // 1111 0000 0000 0000 0000 0000 0000 0000 362 363 /** 364 * The current group nesting level. 365 */ 366 private int groupLevel = 0; 367 /** 368 * The current document group nesting level. Used for fragments. 369 */ 370 private int docGroupLevel = 0; 371 /** 372 * When the tokeniser is Binary. 373 */ 374 private long binByteCount = 0; 375 /** 376 * When the tokeniser is set to skip bytes, binSkipByteCount is the number of bytes to skip. 377 */ 378 private long binSkipByteCount = 0; 379 /** 380 * When the tokeniser is set to skip to next group, this is the group indentifier to return to. 381 */ 382 private int skipGroupLevel = 0; 383 384 //RTF parser error codes 385 public static final int errOK =0; // Everything's fine! 386 public static final int errStackUnderflow = -1; // Unmatched '}' 387 public static final int errStackOverflow = -2; // Too many '{' -- memory exhausted 388 public static final int errUnmatchedBrace = -3; // RTF ended during an open group. 389 public static final int errInvalidHex = -4; // invalid hex character found in data 390 public static final int errBadTable = -5; // RTF table (sym or prop) invalid 391 public static final int errAssertion = -6; // Assertion failure 392 public static final int errEndOfFile = -7; // End of file reached while reading RTF 393 public static final int errCtrlWordNotFound = -8; // control word was not found 394 //////////////////////////////////// TOKENISE VARIABLES /////////////////// 395 396 397 //////////////////////////////////// STATS VARIABLES /////////////////// 398 /** 399 * Total bytes read. 400 */ 401 private long byteCount = 0; 402 /** 403 * Total control words processed. 404 * 405 * Contains both known and unknown. 406 * 407 * <code>ctrlWordCount</code> should equal 408 * <code>ctrlWrodHandlecCount</code> + <code>ctrlWordNotHandledCount</code + <code>ctrlWordSkippedCount</code> 409 */ 410 private long ctrlWordCount = 0; 411 /** 412 * Total { encountered as an open group token. 413 */ 414 private long openGroupCount = 0; 415 /** 416 * Total } encountered as a close group token. 417 */ 418 private long closeGroupCount = 0; 419 /** 420 * Total clear text characters processed. 421 */ 422 private long characterCount = 0; 423 /** 424 * Total control words recognized. 425 */ 426 private long ctrlWordHandledCount = 0; 427 /** 428 * Total control words not handled. 429 */ 430 private long ctrlWordNotHandledCount = 0; 431 /** 432 * Total control words skipped. 433 */ 434 private long ctrlWordSkippedCount = 0; 435 /** 436 * Total groups skipped. Includes { and } as a group. 437 */ 438 private long groupSkippedCount = 0; 439 /** 440 * Start time as a long. 441 */ 442 private long startTime = 0; 443 /** 444 * Stop time as a long. 445 */ 446 private long endTime = 0; 447 /** 448 * Start date as a date. 449 */ 450 private Date startDate = null; 451 /** 452 * End date as a date. 453 */ 454 private Date endDate = null; 455 //////////////////////////////////// STATS VARIABLES /////////////////// 456 /** 457 * Last control word and parameter processed. 458 */ 459 private RtfCtrlWordData lastCtrlWordParam = null; 460 461 /** The <code>RtfCtrlWordListener</code>. */ 462 private ArrayList listeners = new ArrayList(); 463 464 /** 465 * Constructor 466 * @param doc 467 * @since 2.1.3 468 */ RtfParser(Document doc)469 public RtfParser(Document doc) { 470 this.document = doc; 471 } 472 /* ********* 473 * READER * 474 ***********/ 475 /** 476 * Imports a complete RTF document. 477 * 478 * @param readerIn 479 * The Reader to read the RTF document from. 480 * @param rtfDoc 481 * The RtfDocument to add the imported document to. 482 * @throws IOException On I/O errors. 483 * @since 2.1.3 484 */ importRtfDocument(InputStream readerIn, RtfDocument rtfDoc)485 public void importRtfDocument(InputStream readerIn, RtfDocument rtfDoc) throws IOException { 486 if(readerIn == null || rtfDoc == null) return; 487 this.init(TYPE_IMPORT_FULL, rtfDoc, readerIn, this.document, null); 488 this.setCurrentDestination(RtfDestinationMgr.DESTINATION_NULL); 489 startDate = new Date(); 490 startTime = System.currentTimeMillis(); 491 this.groupLevel = 0; 492 try { 493 this.tokenise(); 494 } catch (RuntimeException e) { 495 // TODO Auto-generated catch block 496 e.printStackTrace(); 497 } 498 catch (Exception e) { 499 // TODO Auto-generated catch block 500 e.printStackTrace(); 501 } 502 endTime = System.currentTimeMillis(); 503 endDate = new Date(); 504 } 505 /** 506 * Imports a complete RTF document into an Element, i.e. Chapter, section, Table Cell, etc. 507 * 508 * @param elem The Element the document is to be imported into. 509 * @param readerIn 510 * The Reader to read the RTF document from. 511 * @param rtfDoc 512 * The RtfDocument to add the imported document to. 513 * @throws IOException On I/O errors. 514 * @since 2.1.4 515 */ importRtfDocumentIntoElement(Element elem, InputStream readerIn, RtfDocument rtfDoc)516 public void importRtfDocumentIntoElement(Element elem, InputStream readerIn, RtfDocument rtfDoc) throws IOException { 517 if(readerIn == null || rtfDoc == null || elem == null) return; 518 this.init(TYPE_IMPORT_INTO_ELEMENT, rtfDoc, readerIn, this.document, elem); 519 this.setCurrentDestination(RtfDestinationMgr.DESTINATION_NULL); 520 startDate = new Date(); 521 startTime = System.currentTimeMillis(); 522 this.groupLevel = 0; 523 try { 524 this.tokenise(); 525 } catch (RuntimeException e) { 526 // TODO Auto-generated catch block 527 e.printStackTrace(); 528 } 529 catch (Exception e) { 530 // TODO Auto-generated catch block 531 e.printStackTrace(); 532 } 533 endTime = System.currentTimeMillis(); 534 endDate = new Date(); 535 } 536 /** 537 * Converts an RTF document to an iText document. 538 * 539 * Usage: Create a parser object and call this method with the input stream and the iText Document object 540 * 541 * @param readerIn 542 * The Reader to read the RTF file from. 543 * @param doc 544 * The iText document that the RTF file is to be added to. 545 * @throws IOException 546 * On I/O errors. 547 * @since 2.1.3 548 */ convertRtfDocument(InputStream readerIn, Document doc)549 public void convertRtfDocument(InputStream readerIn, Document doc) throws IOException { 550 if(readerIn == null || doc == null) return; 551 this.init(TYPE_CONVERT, null, readerIn, doc, null); 552 this.setCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT); 553 startDate = new Date(); 554 startTime = System.currentTimeMillis(); 555 this.groupLevel = 0; 556 this.tokenise(); 557 endTime = System.currentTimeMillis(); 558 endDate = new Date(); 559 } 560 561 /** 562 * Imports an RTF fragment. 563 * 564 * @param readerIn 565 * The Reader to read the RTF fragment from. 566 * @param rtfDoc 567 * The RTF document to add the RTF fragment to. 568 * @param importMappings 569 * The RtfImportMappings defining font and color mappings for the fragment. 570 * @throws IOException 571 * On I/O errors. 572 * @since 2.1.3 573 */ importRtfFragment(InputStream readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings)574 public void importRtfFragment(InputStream readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException { 575 //public void importRtfFragment2(Reader readerIn, RtfDocument rtfDoc, RtfImportMappings importMappings) throws IOException { 576 if(readerIn == null || rtfDoc == null || importMappings==null) return; 577 this.init(TYPE_IMPORT_FRAGMENT, rtfDoc, readerIn, null, null); 578 this.handleImportMappings(importMappings); 579 this.setCurrentDestination(RtfDestinationMgr.DESTINATION_DOCUMENT); 580 this.groupLevel = 1; 581 setParserState(RtfParser.PARSER_IN_DOCUMENT); 582 startDate = new Date(); 583 startTime = System.currentTimeMillis(); 584 this.tokenise(); 585 endTime = System.currentTimeMillis(); 586 endDate = new Date(); 587 } 588 589 // listener methods 590 591 /** 592 * Adds a <CODE>EventListener</CODE> to the <CODE>RtfCtrlWordMgr</CODE>. 593 * 594 * @param listener 595 * the new EventListener. 596 * @since 2.1.3 597 */ addListener(EventListener listener)598 public void addListener(EventListener listener) { 599 listeners.add(listener); 600 } 601 602 /** 603 * Removes a <CODE>EventListener</CODE> from the <CODE>RtfCtrlWordMgr</CODE>. 604 * 605 * @param listener 606 * the EventListener that has to be removed. 607 * @since 2.1.3 608 */ removeListener(EventListener listener)609 public void removeListener(EventListener listener) { 610 listeners.remove(listener); 611 } 612 613 /** 614 * Initialize the parser object values. 615 * 616 * @param type Type of conversion or import 617 * @param rtfDoc The <code>RtfDocument</code> 618 * @param readerIn The input stream 619 * @param doc The iText <code>Document</code> 620 * @since 2.1.3 621 */ init(int type, RtfDocument rtfDoc, InputStream readerIn, Document doc, Element elem)622 private void init(int type, RtfDocument rtfDoc, InputStream readerIn, Document doc, Element elem) { 623 624 init_stats(); 625 // initialize reader to a PushbackReader 626 this.pbReader = init_Reader(readerIn); 627 628 this.conversionType = type; 629 this.rtfDoc = rtfDoc; 630 this.document = doc; 631 this.elem = elem; 632 this.currentState = new RtfParserState(); 633 this.stackState = new Stack(); 634 this.setParserState(PARSER_STARTSTOP); 635 this.importMgr = new RtfImportMgr(this.rtfDoc, this.document); 636 637 // get destination Mgr 638 this.destinationMgr = RtfDestinationMgr.getInstance(this); 639 // set the parser 640 RtfDestinationMgr.setParser(this); 641 642 643 // DEBUG INFO for timing and memory usage of RtfCtrlWordMgr object 644 // create multiple new RtfCtrlWordMgr objects to check timing and memory usage 645 // System.gc(); 646 // long endTime = 0; 647 // Date endDate = null; 648 // long endFree = 0; 649 // DecimalFormat df = new DecimalFormat("#,##0"); 650 // Date startDate = new Date(); 651 // long startTime = System.currentTimeMillis(); 652 // long startFree = Runtime.getRuntime().freeMemory(); 653 // System.out.println("1:"); 654 655 this.rtfKeywordMgr = new RtfCtrlWordMgr(this, this.pbReader);/////////DO NOT COMMENT OUT THIS LINE /////////// 656 657 Object listener; 658 for (Iterator iterator = listeners.iterator(); iterator.hasNext();) { 659 listener = iterator.next(); 660 if(listener instanceof RtfCtrlWordListener) { 661 this.rtfKeywordMgr.addRtfCtrlWordListener((RtfCtrlWordListener)listener); 662 } 663 } 664 // endFree = Runtime.getRuntime().freeMemory(); 665 // endTime = System.currentTimeMillis(); 666 // endDate = new Date(); 667 // System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString()); 668 // System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString()); 669 // System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 670 // System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k"); 671 // System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k"); 672 // System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k"); 673 // 674 // System.gc(); 675 // System.out.println("2:"); 676 // startDate = new Date(); 677 // startTime = System.currentTimeMillis(); 678 // startFree = Runtime.getRuntime().freeMemory(); 679 // RtfCtrlWordMgr rtfKeywordMgr2 = new RtfCtrlWordMgr(this, this.pbReader); 680 // endFree = Runtime.getRuntime().freeMemory(); 681 // endTime = System.currentTimeMillis(); 682 // endDate = new Date(); 683 // System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString()); 684 // System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString()); 685 // System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 686 // System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k"); 687 // System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k"); 688 // System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k"); 689 // 690 // System.gc(); 691 // System.out.println("3:"); 692 // startDate = new Date(); 693 // startTime = System.currentTimeMillis(); 694 // startFree = Runtime.getRuntime().freeMemory(); 695 // RtfCtrlWordMgr rtfKeywordMgr3 = new RtfCtrlWordMgr(this, this.pbReader); 696 // endFree = Runtime.getRuntime().freeMemory(); 697 // endTime = System.currentTimeMillis(); 698 // endDate = new Date(); 699 // System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString()); 700 // System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString()); 701 // System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 702 // System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k"); 703 // System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k"); 704 // System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k"); 705 // 706 // System.gc(); 707 // System.out.println("4:"); 708 // startDate = new Date(); 709 // startTime = System.currentTimeMillis(); 710 // startFree = Runtime.getRuntime().freeMemory(); 711 // RtfCtrlWordMgr rtfKeywordMgr4 = new RtfCtrlWordMgr(this, this.pbReader); 712 // endFree = Runtime.getRuntime().freeMemory(); 713 // endTime = System.currentTimeMillis(); 714 // endDate = new Date(); 715 // System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString()); 716 // System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString()); 717 // System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 718 // System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k"); 719 // System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k"); 720 // System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k"); 721 // 722 // System.gc(); 723 // System.out.println("5:"); 724 // startDate = new Date(); 725 // startTime = System.currentTimeMillis(); 726 // startFree = Runtime.getRuntime().freeMemory(); 727 // RtfCtrlWordMgr rtfKeywordMgr5 = new RtfCtrlWordMgr(this, this.pbReader); 728 // endFree = Runtime.getRuntime().freeMemory(); 729 // endTime = System.currentTimeMillis(); 730 // endDate = new Date(); 731 // System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString()); 732 // System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString()); 733 // System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 734 // System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k"); 735 // System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k"); 736 // System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k"); 737 // System.gc(); 738 // System.out.println("At ed:"); 739 // startDate = new Date(); 740 // startTime = System.currentTimeMillis(); 741 // startFree = Runtime.getRuntime().freeMemory(); 742 // //RtfCtrlWordMgr rtfKeywordMgr6 = new RtfCtrlWordMgr(this, this.pbReader); 743 // endFree = Runtime.getRuntime().freeMemory(); 744 // endTime = System.currentTimeMillis(); 745 // endDate = new Date(); 746 // System.out.println("RtfCtrlWordMgr start date: " + startDate.toLocaleString()); 747 // System.out.println("RtfCtrlWordMgr end date : " + endDate.toLocaleString()); 748 // System.out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 749 // System.out.println("Begin Constructor RtfCtrlWordMgr , free mem is " + df.format(startFree / 1024) + "k"); 750 // System.out.println("End Constructor RtfCtrlWordMgr , free mem is " + df.format(endFree / 1024) + "k"); 751 // System.out.println("RtfCtrlWordMgr used approximately " + df.format((startFree - endFree) / 1024) + "k"); 752 } 753 /** 754 * Initialize the statistics values. 755 * @since 2.1.3 756 */ init_stats()757 protected void init_stats() { 758 byteCount = 0; 759 ctrlWordCount = 0; 760 openGroupCount = 0; 761 closeGroupCount = 0; 762 characterCount = 0; 763 ctrlWordHandledCount = 0; 764 ctrlWordNotHandledCount = 0; 765 ctrlWordSkippedCount = 0; 766 groupSkippedCount = 0; 767 startTime = 0; 768 endTime = 0; 769 startDate = null; 770 endDate = null; 771 } 772 773 /** 774 * Casts the input reader to a PushbackReader or 775 * creates a new PushbackReader from the Reader passed in. 776 * The reader is also transformed into a BufferedReader if necessary. 777 * 778 * @param readerIn 779 * The Reader object for the input file. 780 * @return 781 * PushbackReader object 782 * @since 2.1.3 783 */ init_Reader(InputStream readerIn)784 private PushbackInputStream init_Reader(InputStream readerIn) { 785 // Reader newReader = readerIn; 786 // // Initializing the reader as a BufferedReader 787 // // cut test processing time by approximately 50% 788 // // default uses 8192 character buffer 789 // if(!(newReader instanceof BufferedReader)) { 790 // newReader = new BufferedReader(newReader); // Since JDK1.1 791 // } 792 // // Initializing the reader as a PushbackReader is 793 // // a requirement of the parser to be able to put back 794 // // read ahead characters. 795 // if(!(newReader instanceof PushbackReader)) { 796 // newReader = new PushbackReader(newReader); // Since JDK1.1 797 // } 798 799 if(!(readerIn instanceof BufferedInputStream)) { 800 readerIn = new BufferedInputStream(readerIn); 801 } 802 if(!(readerIn instanceof PushbackInputStream)) { 803 readerIn = new PushbackInputStream(readerIn); 804 } 805 // return the proper reader object to the parser setup 806 return (PushbackInputStream)readerIn; 807 } 808 809 /** 810 * Imports the mappings defined in the RtfImportMappings into the 811 * RtfImportHeader of this RtfParser2. 812 * 813 * @param importMappings 814 * The RtfImportMappings to import. 815 * @since 2.1.3 816 */ handleImportMappings(RtfImportMappings importMappings)817 private void handleImportMappings(RtfImportMappings importMappings) { 818 Iterator it = importMappings.getFontMappings().keySet().iterator(); 819 while(it.hasNext()) { 820 String fontNr = (String) it.next(); 821 this.importMgr.importFont(fontNr, (String) importMappings.getFontMappings().get(fontNr)); 822 } 823 it = importMappings.getColorMappings().keySet().iterator(); 824 while(it.hasNext()) { 825 String colorNr = (String) it.next(); 826 this.importMgr.importColor(colorNr, (Color) importMappings.getColorMappings().get(colorNr)); 827 } 828 it = importMappings.getListMappings().keySet().iterator(); 829 while(it.hasNext()) { 830 String listNr = (String) it.next(); 831 this.importMgr.importList(listNr, (String)importMappings.getListMappings().get(listNr)); 832 } 833 it = importMappings.getStylesheetListMappings().keySet().iterator(); 834 while(it.hasNext()) { 835 String stylesheetListNr = (String) it.next(); 836 this.importMgr.importStylesheetList(stylesheetListNr, (List) importMappings.getStylesheetListMappings().get(stylesheetListNr)); 837 } 838 839 } 840 841 842 /* ***************************************** 843 * DOCUMENT CONTROL METHODS 844 * 845 * Handles - 846 * handleOpenGroup: Open groups - '{' 847 * handleCloseGroup: Close groups - '}' 848 * handleCtrlWord: Ctrl Words - '\...' 849 * handleCharacter: Characters - Plain Text, etc. 850 * 851 */ 852 853 /** 854 * Handles open group tokens. ({) 855 * 856 * @return errOK if ok, other if an error occurred. 857 * @since 2.1.3 858 */ handleOpenGroup()859 public int handleOpenGroup() { 860 int result = errOK; 861 this.openGroupCount++; // stats 862 this.groupLevel++; // current group level in tokeniser 863 this.docGroupLevel++; // current group level in document 864 if (this.getTokeniserState() == TOKENISER_SKIP_GROUP) { 865 this.groupSkippedCount++; 866 } 867 868 RtfDestination dest = this.getCurrentDestination(); 869 boolean handled = false; 870 871 if(dest != null) { 872 if(debugParser) { 873 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: before dest.handleOpeningSubGroup()"); 874 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + dest.toString()); 875 } 876 handled = dest.handleOpeningSubGroup(); 877 if(debugParser) { 878 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: after dest.handleOpeningSubGroup()"); 879 } 880 } 881 882 this.stackState.push(this.currentState); 883 this.currentState = new RtfParserState(this.currentState); 884 // do not set this true until after the state is pushed 885 // otherwise it inserts a { where one does not belong. 886 this.currentState.newGroup = true; 887 dest = this.getCurrentDestination(); 888 889 if(debugParser) { 890 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: handleOpenGroup()"); 891 if(this.lastCtrlWordParam != null) 892 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: LastCtrlWord=" + this.lastCtrlWordParam.ctrlWord); 893 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: grouplevel=" + Integer.toString(groupLevel)); 894 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + dest.toString()); 895 } 896 897 if(dest != null) { 898 handled = dest.handleOpenGroup(); 899 } 900 901 if(debugParser) { 902 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: after dest.handleOpenGroup(); handled=" + Boolean.toString(handled)); 903 } 904 905 return result; 906 } outputDebug(Object doc, int groupLevel, String str)907 public static void outputDebug(Object doc, int groupLevel, String str) { 908 System.out.println(str); 909 if(doc == null) return; 910 if(groupLevel<0) groupLevel = 0; 911 char[] a; Arrays.fill(a= new char[groupLevel*2], ' '); 912 String spaces= new String(a); 913 if(doc instanceof RtfDocument) { 914 ((RtfDocument)doc).add(new RtfDirectContent("\n" + spaces + str)); 915 } 916 else 917 if(doc instanceof Document) { 918 try { 919 ((Document)doc).add(new RtfDirectContent("\n" + spaces + str)); 920 } catch (DocumentException e) { 921 // TODO Auto-generated catch block 922 e.printStackTrace(); 923 } 924 } 925 } 926 /** 927 * Handles close group tokens. (}) 928 * 929 * @return errOK if ok, other if an error occurred. 930 * @since 2.1.3 931 */ handleCloseGroup()932 public int handleCloseGroup() { 933 int result = errOK; 934 this.closeGroupCount++; // stats 935 936 if (this.getTokeniserState() != TOKENISER_SKIP_GROUP) { 937 if(debugParser) { 938 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: handleCloseGroup()"); 939 if(this.lastCtrlWordParam != null) 940 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: LastCtrlWord=" + this.lastCtrlWordParam.ctrlWord); 941 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: grouplevel=" + Integer.toString(groupLevel)); 942 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: destination=" + this.getCurrentDestination().toString()); 943 RtfParser.outputDebug(this.rtfDoc, groupLevel, ""); 944 } 945 RtfDestination dest = this.getCurrentDestination(); 946 boolean handled = false; 947 948 if(dest != null) { 949 handled = dest.handleCloseGroup(); 950 } 951 if(debugParser) { 952 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: After dest.handleCloseGroup(); handled = " + Boolean.toString(handled)); 953 RtfParser.outputDebug(this.rtfDoc, groupLevel, ""); 954 } 955 } 956 957 if(this.stackState.size() >0 ) { 958 this.currentState = (RtfParserState)this.stackState.pop(); 959 } else { 960 result = errStackUnderflow; 961 } 962 963 this.docGroupLevel--; 964 this.groupLevel--; 965 966 if (this.getTokeniserState() == TOKENISER_SKIP_GROUP && this.groupLevel < this.skipGroupLevel) { 967 this.setTokeniserState(TOKENISER_NORMAL); 968 } 969 970 return result; 971 } 972 973 974 /** 975 * Handles control word tokens. Depending on the current 976 * state a control word can lead to a state change. When 977 * parsing the actual document contents, certain tabled 978 * values are remapped. i.e. colors, fonts, styles, etc. 979 * 980 * @param ctrlWordData The control word to handle. 981 * @return errOK if ok, other if an error occurred. 982 * @since 2.1.3 983 */ handleCtrlWord(RtfCtrlWordData ctrlWordData)984 public int handleCtrlWord(RtfCtrlWordData ctrlWordData) { 985 int result = errOK; 986 this.ctrlWordCount++; // stats 987 988 if(debugParser) { 989 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: handleCtrlWord=" + ctrlWordData.ctrlWord + " param=[" + ctrlWordData.param + "]"); 990 } 991 992 if (this.getTokeniserState() == TOKENISER_SKIP_GROUP) { 993 this.ctrlWordSkippedCount++; 994 if(debugParser) { 995 RtfParser.outputDebug(this.rtfDoc, groupLevel, "DEBUG: SKIPPED"); 996 } 997 return result; 998 } 999 1000 // RtfDestination dest = (RtfDestination)this.getCurrentDestination(); 1001 // boolean handled = false; 1002 // if(dest != null) { 1003 // handled = dest.handleControlWord(ctrlWordData); 1004 // } 1005 1006 result = this.rtfKeywordMgr.handleKeyword(ctrlWordData, this.groupLevel); 1007 1008 if( result == errOK){ 1009 this.ctrlWordHandledCount++; 1010 } else { 1011 this.ctrlWordNotHandledCount++; 1012 result = errOK; // hack for now. 1013 } 1014 1015 return result; 1016 } 1017 1018 /** 1019 * Handles text tokens. These are either handed on to the 1020 * appropriate destination handler. 1021 * 1022 * @param nextChar 1023 * The text token to handle. 1024 * @return errOK if ok, other if an error occurred. 1025 * @since 2.1.3 1026 */ 1027 // public int handleCharacter(char[] nextChar) { handleCharacter(int nextChar)1028 public int handleCharacter(int nextChar) { 1029 this.characterCount++; // stats 1030 1031 if (this.getTokeniserState() == TOKENISER_SKIP_GROUP) { 1032 return errOK; 1033 } 1034 1035 boolean handled = false; 1036 1037 RtfDestination dest = this.getCurrentDestination(); 1038 if(dest != null) { 1039 handled = dest.handleCharacter(nextChar); 1040 } 1041 1042 return errOK; 1043 } 1044 1045 /** 1046 * Get the state of the parser. 1047 * 1048 * @return 1049 * The current RtfParserState state object. 1050 * @since 2.1.3 1051 */ getState()1052 public RtfParserState getState(){ 1053 return this.currentState; 1054 } 1055 1056 /** 1057 * Get the current state of the parser. 1058 * 1059 * @return 1060 * The current state of the parser. 1061 * @since 2.1.3 1062 */ getParserState()1063 public int getParserState(){ 1064 return this.currentState.parserState; 1065 } 1066 1067 /** 1068 * Set the state value of the parser. 1069 * 1070 * @param newState 1071 * The new state for the parser 1072 * @return 1073 * The state of the parser. 1074 * @since 2.1.3 1075 */ setParserState(int newState)1076 public int setParserState(int newState){ 1077 this.currentState.parserState = newState; 1078 return this.currentState.parserState; 1079 } 1080 1081 /** 1082 * Get the conversion type. 1083 * 1084 * @return 1085 * The type of the conversion. Import or Convert. 1086 * @since 2.1.3 1087 */ getConversionType()1088 public int getConversionType() { 1089 return this.conversionType; 1090 } 1091 1092 /** 1093 * Get the RTF Document object. 1094 * @return 1095 * Returns the object rtfDoc. 1096 * @since 2.1.3 1097 */ getRtfDocument()1098 public RtfDocument getRtfDocument() { 1099 return this.rtfDoc; 1100 } 1101 1102 /** 1103 * Get the Document object. 1104 * @return 1105 * Returns the object rtfDoc. 1106 * @since 2.1.3 1107 */ getDocument()1108 public Document getDocument() { 1109 return this.document; 1110 } 1111 1112 /** 1113 * Get the RtfImportHeader object. 1114 * @return 1115 * Returns the object importHeader. 1116 * @since 2.1.3 1117 */ getImportManager()1118 public RtfImportMgr getImportManager() { 1119 return importMgr; 1120 } 1121 1122 1123 ///////////////////////////////////////////////////////////// 1124 // accessors for destinations 1125 /** 1126 * Set the current destination object for the current state. 1127 * @param destination The destination value to set. 1128 * @since 2.1.3 1129 */ setCurrentDestination(String destination)1130 public boolean setCurrentDestination(String destination) { 1131 RtfDestination dest = RtfDestinationMgr.getDestination(destination); 1132 if(dest != null) { 1133 this.currentState.destination = dest; 1134 return false; 1135 } else { 1136 this.setTokeniserStateSkipGroup(); 1137 return false; 1138 } 1139 } 1140 /** 1141 * Get the current destination object. 1142 * 1143 * @return The current state destination 1144 * @since 2.1.3 1145 */ getCurrentDestination()1146 public RtfDestination getCurrentDestination() { 1147 return this.currentState.destination; 1148 } 1149 /** 1150 * Get a destination from the map 1151 * 1152 * @param destination The string destination. 1153 * @return The destination object from the map 1154 * @since 2.1.3 1155 */ getDestination(String destination)1156 public RtfDestination getDestination(String destination) { 1157 return RtfDestinationMgr.getDestination(destination); 1158 } 1159 1160 /** 1161 * Helper method to determine if this is a new group. 1162 * 1163 * @return true if this is a new group, otherwise it returns false. 1164 * @since 2.1.3 1165 */ isNewGroup()1166 public boolean isNewGroup() { 1167 return this.currentState.newGroup; 1168 } 1169 /** 1170 * Helper method to set the new group flag 1171 * @param value The boolean value to set the flag 1172 * @return The value of newGroup 1173 * @since 2.1.3 1174 */ setNewGroup(boolean value)1175 public boolean setNewGroup(boolean value) { 1176 this.currentState.newGroup = value; 1177 return this.currentState.newGroup; 1178 } 1179 1180 /* ************ 1181 * TOKENISER * 1182 **************/ 1183 1184 /** 1185 * Read through the input file and parse the data stream into tokens. 1186 * 1187 * @throws IOException on IO error. 1188 * @since 2.1.3 1189 */ tokenise()1190 public void tokenise() throws IOException { 1191 int errorCode = errOK; // error code 1192 int nextChar = 0; 1193 // char[] nextChar = new char[1]; // input variable 1194 // nextChar[0]=0; // set to 0 1195 this.setTokeniserState(TOKENISER_NORMAL); // set initial tokeniser state 1196 1197 1198 // while(this.pbReader.read(nextChar) != -1) { 1199 while((nextChar = this.pbReader.read()) != -1) { 1200 this.byteCount++; 1201 1202 if (this.getTokeniserState() == TOKENISER_BINARY) // if we're parsing binary data, handle it directly 1203 { 1204 if ((errorCode = parseChar(nextChar)) != errOK) 1205 return; 1206 } else { 1207 // switch(nextChar[0]) { 1208 switch(nextChar) { 1209 case '{': // scope delimiter - Open 1210 this.handleOpenGroup(); 1211 break; 1212 case '}': // scope delimiter - Close 1213 this.handleCloseGroup(); 1214 break; 1215 case 0x0a: // noise character 1216 case 0x0d: // noise character 1217 // if(this.isImport()) { 1218 // this.rtfDoc.add(new RtfDirectContent(new String(nextChar))); 1219 // } 1220 break; 1221 case '\\': // Control word start delimiter 1222 if(parseCtrlWord(pbReader) != errOK) { 1223 // TODO: Indicate some type of error 1224 return; 1225 } 1226 break; 1227 default: 1228 if(groupLevel == 0) { // BOMs 1229 break; 1230 } 1231 if(this.getTokeniserState() == TOKENISER_HEX) { 1232 StringBuffer hexChars = new StringBuffer(); 1233 hexChars.append(nextChar); 1234 // if(pbReader.read(nextChar) == -1) { 1235 if((nextChar = pbReader.read()) == -1) { 1236 return; 1237 } 1238 this.byteCount++; 1239 hexChars.append(nextChar); 1240 try { 1241 // nextChar[0]=(char)Integer.parseInt(hexChars.toString(), 16); 1242 nextChar=Integer.parseInt(hexChars.toString(), 16); 1243 } catch (NumberFormatException e) { 1244 return; 1245 } 1246 this.setTokeniserState(TOKENISER_NORMAL); 1247 } 1248 if ((errorCode = parseChar(nextChar)) != errOK) { 1249 return; // some error occurred. we should send a 1250 // real error 1251 } 1252 break; 1253 } // switch(nextChar[0]) 1254 } // end if (this.getTokeniserState() == TOKENISER_BINARY) 1255 1256 // if(groupLevel < 1 && this.isImportFragment()) return; //return errOK; 1257 // if(groupLevel < 0 && this.isImportFull()) return; //return errStackUnderflow; 1258 // if(groupLevel < 0 && this.isConvert()) return; //return errStackUnderflow; 1259 1260 }// end while(reader.read(nextChar) != -1) 1261 RtfDestination dest = this.getCurrentDestination(); 1262 if(dest != null) { 1263 dest.closeDestination(); 1264 } 1265 } 1266 1267 /** 1268 * Process the character and send it to the current destination. 1269 * @param nextChar 1270 * The character to process 1271 * @return 1272 * Returns an error code or errOK if no error. 1273 * @since 2.1.3 1274 */ parseChar(int nextChar)1275 private int parseChar(int nextChar) { 1276 // figure out where to put the character 1277 // needs to handle group levels for parsing 1278 // examples 1279 /* 1280 * {\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;} 1281 * {\f7\fswiss\fcharset0\fprq2{\*\panose 020b0604020202030204}Helv{\*\falt Arial};} <- special case!!!! 1282 * {\f5\froman\fcharset0 Tahoma;} 1283 * {\f6\froman\fcharset0 Arial Black;} 1284 * {\info(\author name}{\company company name}} 1285 * ... document text ... 1286 */ 1287 if (this.getTokeniserState() == TOKENISER_BINARY && --binByteCount <= 0) 1288 this.setTokeniserStateNormal(); 1289 if (this.getTokeniserState() == TOKENISER_SKIP_BYTES && --binSkipByteCount <= 0) 1290 this.setTokeniserStateNormal(); 1291 return this.handleCharacter(nextChar); 1292 } 1293 1294 /** 1295 * Parses a keyword and it's parameter if one exists 1296 * @param reader 1297 * This is a pushback reader for file input. 1298 * @return 1299 * Returns an error code or errOK if no error. 1300 * @throws IOException 1301 * Catch any file read problem. 1302 * @since 2.1.3 1303 */ parseCtrlWord(PushbackInputStream reader)1304 private int parseCtrlWord(PushbackInputStream reader) throws IOException { 1305 int nextChar = 0; 1306 int result = errOK; 1307 1308 if((nextChar = reader.read()) == -1) { 1309 return errEndOfFile; 1310 } 1311 this.byteCount++; 1312 1313 StringBuffer parsedCtrlWord = new StringBuffer(); 1314 StringBuffer parsedParam= new StringBuffer(); 1315 RtfCtrlWordData ctrlWordParam = new RtfCtrlWordData(); 1316 1317 if(!Character.isLetterOrDigit((char)nextChar)) { 1318 parsedCtrlWord.append((char)nextChar); 1319 ctrlWordParam.ctrlWord = parsedCtrlWord.toString(); 1320 result = this.handleCtrlWord(ctrlWordParam); 1321 lastCtrlWordParam = ctrlWordParam; 1322 return result; 1323 } 1324 1325 do { 1326 parsedCtrlWord.append((char)nextChar); 1327 //TODO: catch EOF 1328 nextChar = reader.read(); 1329 this.byteCount++; 1330 } while (Character.isLetter((char)nextChar)); 1331 1332 ctrlWordParam.ctrlWord = parsedCtrlWord.toString(); 1333 1334 if(nextChar == '-') { 1335 ctrlWordParam.isNeg = true; 1336 if((nextChar = reader.read()) == -1) { 1337 return errEndOfFile; 1338 } 1339 this.byteCount++; 1340 } 1341 1342 1343 if(Character.isDigit((char)nextChar)) { 1344 ctrlWordParam.hasParam = true; 1345 do { 1346 parsedParam.append((char)nextChar); 1347 //TODO: catch EOF 1348 nextChar = reader.read(); 1349 this.byteCount++; 1350 } while (Character.isDigit((char)nextChar)); 1351 1352 ctrlWordParam.param = parsedParam.toString(); 1353 } 1354 1355 // push this character back into the stream 1356 if(nextChar != ' ') { 1357 reader.unread(nextChar); 1358 } 1359 1360 if(debugParser) { 1361 // // debug: insrsid6254399 1362 // if(ctrlWordParam.ctrlWord.equals("proptype") && ctrlWordParam.param.equals("30")) { 1363 // System.out.print("Debug value found\n"); 1364 // } 1365 // if(ctrlWordParam.ctrlWord.equals("cf") ) { 1366 // System.out.print("Debug value found\n"); 1367 // } 1368 } 1369 1370 result = this.handleCtrlWord(ctrlWordParam); 1371 lastCtrlWordParam = ctrlWordParam; 1372 return result; 1373 1374 } 1375 1376 /** 1377 * Set the current state of the tokeniser. 1378 * @param value The new state of the tokeniser. 1379 * @return The state of the tokeniser. 1380 * @since 2.1.3 1381 */ setTokeniserState(int value)1382 public int setTokeniserState(int value) { 1383 this.currentState.tokeniserState = value; 1384 return this.currentState.tokeniserState; 1385 } 1386 1387 /** 1388 * Get the current state of the tokeniser. 1389 * @return The current state of the tokeniser. 1390 * @since 2.1.3 1391 */ getTokeniserState()1392 public int getTokeniserState() { 1393 return this.currentState.tokeniserState; 1394 } 1395 1396 /** 1397 * Gets the current group level 1398 * 1399 * @return 1400 * The current group level value. 1401 * @since 2.1.3 1402 */ getLevel()1403 public int getLevel() { 1404 return this.groupLevel; 1405 } 1406 1407 1408 /** 1409 * Set the tokeniser state to skip to the end of the group. 1410 * Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level. 1411 * @since 2.1.3 1412 */ setTokeniserStateNormal()1413 public void setTokeniserStateNormal() { 1414 this.setTokeniserState(TOKENISER_NORMAL); 1415 } 1416 1417 /** 1418 * Set the tokeniser state to skip to the end of the group. 1419 * Sets the state to TOKENISER_SKIP_GROUP and skipGroupLevel to the current group level. 1420 * @since 2.1.3 1421 */ setTokeniserStateSkipGroup()1422 public void setTokeniserStateSkipGroup() { 1423 this.setTokeniserState(TOKENISER_SKIP_GROUP); 1424 this.skipGroupLevel = this.groupLevel; 1425 } 1426 1427 /** 1428 * Sets the number of bytes to skip and the state of the tokeniser. 1429 * 1430 * @param numberOfBytesToSkip 1431 * The numbere of bytes to skip in the file. 1432 * @since 2.1.3 1433 */ setTokeniserSkipBytes(long numberOfBytesToSkip)1434 public void setTokeniserSkipBytes(long numberOfBytesToSkip) { 1435 this.setTokeniserState(TOKENISER_SKIP_BYTES); 1436 this.binSkipByteCount = numberOfBytesToSkip; 1437 } 1438 1439 /** 1440 * Sets the number of binary bytes. 1441 * 1442 * @param binaryCount 1443 * The number of binary bytes. 1444 * @since 2.1.3 1445 */ setTokeniserStateBinary(int binaryCount)1446 public void setTokeniserStateBinary(int binaryCount) { 1447 this.setTokeniserState(TOKENISER_BINARY); 1448 this.binByteCount = binaryCount; 1449 } 1450 /** 1451 * Sets the number of binary bytes. 1452 * 1453 * @param binaryCount 1454 * The number of binary bytes. 1455 * @since 2.1.3 1456 */ setTokeniserStateBinary(long binaryCount)1457 public void setTokeniserStateBinary(long binaryCount) { 1458 this.setTokeniserState(TOKENISER_BINARY); 1459 this.binByteCount = binaryCount; 1460 } 1461 /** 1462 * Helper method to determin if conversion is TYPE_CONVERT 1463 * @return true if TYPE_CONVERT, otherwise false 1464 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_CONVERT 1465 * @since 2.1.3 1466 */ isConvert()1467 public boolean isConvert() { 1468 return (this.getConversionType() == RtfParser.TYPE_CONVERT); 1469 } 1470 1471 /** 1472 * Helper method to determin if conversion is TYPE_IMPORT_FULL or TYPE_IMPORT_FRAGMENT 1473 * @return true if TYPE_CONVERT, otherwise false 1474 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL 1475 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT 1476 * @since 2.1.3 1477 */ isImport()1478 public boolean isImport() { 1479 return (isImportFull() || this.isImportFragment()); 1480 } 1481 /** 1482 * Helper method to determin if conversion is TYPE_IMPORT_FULL 1483 * @return true if TYPE_CONVERT, otherwise false 1484 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FULL 1485 * @since 2.1.3 1486 */ isImportFull()1487 public boolean isImportFull() { 1488 return (this.getConversionType() == RtfParser.TYPE_IMPORT_FULL); 1489 } 1490 /** 1491 * Helper method to determin if conversion is TYPE_IMPORT_FRAGMENT 1492 * @return true if TYPE_CONVERT, otherwise false 1493 * @see com.lowagie.text.rtf.parser.RtfParser#TYPE_IMPORT_FRAGMENT 1494 * @since 2.1.3 1495 */ isImportFragment()1496 public boolean isImportFragment() { 1497 return (this.getConversionType() == RtfParser.TYPE_IMPORT_FRAGMENT); 1498 } 1499 /** 1500 * Helper method to indicate if this control word was a \* control word. 1501 * @return true if it was a \* control word, otherwise false 1502 * @since 2.1.3 1503 */ getExtendedDestination()1504 public boolean getExtendedDestination() { 1505 return this.currentState.isExtendedDestination; 1506 } 1507 /** 1508 * Helper method to set the extended control word flag. 1509 * @param value Boolean to set the value to. 1510 * @return isExtendedDestination. 1511 * @since 2.1.3 1512 */ setExtendedDestination(boolean value)1513 public boolean setExtendedDestination(boolean value) { 1514 this.currentState.isExtendedDestination = value; 1515 return this.currentState.isExtendedDestination; 1516 } 1517 1518 /** 1519 * Get the logfile name. 1520 * 1521 * @return the logFile 1522 * @since 2.1.3 1523 */ getLogFile()1524 public String getLogFile() { 1525 return logFile; 1526 } 1527 1528 /** 1529 * Set the logFile name 1530 * 1531 * @param logFile the logFile to set 1532 * @since 2.1.3 1533 */ setLogFile(String logFile)1534 public void setLogFile(String logFile) { 1535 this.logFile = logFile; 1536 } 1537 /** 1538 * Set the logFile name 1539 * 1540 * @param logFile the logFile to set 1541 * @since 2.1.3 1542 */ setLogFile(String logFile, boolean logAppend)1543 public void setLogFile(String logFile, boolean logAppend) { 1544 this.logFile = logFile; 1545 this.setLogAppend(logAppend); 1546 } 1547 1548 /** 1549 * Get flag indicating if logging is on or off. 1550 * 1551 * @return the logging 1552 * @since 2.1.3 1553 */ isLogging()1554 public boolean isLogging() { 1555 return logging; 1556 } 1557 1558 /** 1559 * Set flag indicating if logging is on or off 1560 * @param logging <code>true</code> to turn on logging, <code>false</code> to turn off logging. 1561 * @since 2.1.3 1562 */ setLogging(boolean logging)1563 public void setLogging(boolean logging) { 1564 this.logging = logging; 1565 } 1566 1567 /** 1568 * @return the logAppend 1569 * @since 2.1.3 1570 */ isLogAppend()1571 public boolean isLogAppend() { 1572 return logAppend; 1573 } 1574 1575 /** 1576 * @param logAppend the logAppend to set 1577 * @since 2.1.3 1578 */ setLogAppend(boolean logAppend)1579 public void setLogAppend(boolean logAppend) { 1580 this.logAppend = logAppend; 1581 } 1582 1583 /* 1584 * Statistics 1585 * 1586 public void printStats(PrintStream out) { 1587 if(out == null) return; 1588 1589 out.println(""); 1590 out.println("Parser statistics:"); 1591 out.println("Process start date: " + startDate.toLocaleString()); 1592 out.println("Process end date : " + endDate.toLocaleString()); 1593 out.println(" Elapsed time : " + Long.toString(endTime - startTime) + " milliseconds."); 1594 out.println("Total bytes read : " + Long.toString(byteCount)); 1595 out.println("Open group count : " + Long.toString(openGroupCount)); 1596 out.print("Close group count : " + Long.toString(closeGroupCount)); 1597 out.println(" (Groups Skipped): " + Long.toString(groupSkippedCount)); 1598 out.print("Control word count: " + Long.toString(ctrlWordCount)); 1599 out.print(" - Handled: " + Long.toString(ctrlWordHandledCount)); 1600 out.print(" Not Handled: " + Long.toString(ctrlWordNotHandledCount)); 1601 out.println(" Skipped: " + Long.toString(ctrlWordSkippedCount)); 1602 out.println("Plain text char count: " + Long.toString(characterCount)); 1603 }*/ 1604 } 1605