1 /* 2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package javax.swing.text.rtf; 26 27 import java.io.*; 28 import java.lang.*; 29 30 /** 31 * <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax 32 * and passes a stream of control words, text, and begin/end group 33 * indications to its subclass. 34 * 35 * Normally programmers will only use <b>RTFReader</b>, a subclass of this class that knows what to 36 * do with the tokens this class parses. 37 * 38 * @see AbstractFilter 39 * @see RTFReader 40 */ 41 abstract class RTFParser extends AbstractFilter 42 { 43 /** The current RTF group nesting level. */ 44 public int level; 45 46 private int state; 47 private StringBuffer currentCharacters; 48 private String pendingKeyword; // where keywords go while we 49 // read their parameters 50 private int pendingCharacter; // for the \'xx construct 51 52 private long binaryBytesLeft; // in a \bin blob? 53 ByteArrayOutputStream binaryBuf; 54 private boolean[] savedSpecials; 55 56 /** A stream to which to write warnings and debugging information 57 * while parsing. This is set to <code>System.out</code> to log 58 * any anomalous information to stdout. */ 59 protected PrintStream warnings; 60 61 // value for the 'state' variable 62 private final int S_text = 0; // reading random text 63 private final int S_backslashed = 1; // read a backslash, waiting for next 64 private final int S_token = 2; // reading a multicharacter token 65 private final int S_parameter = 3; // reading a token's parameter 66 67 private final int S_aftertick = 4; // after reading \' 68 private final int S_aftertickc = 5; // after reading \'x 69 70 private final int S_inblob = 6; // in a \bin blob 71 72 /** Implemented by subclasses to interpret a parameter-less RTF keyword. 73 * The keyword is passed without the leading '/' or any delimiting 74 * whitespace. */ handleKeyword(String keyword)75 public abstract boolean handleKeyword(String keyword); 76 /** Implemented by subclasses to interpret a keyword with a parameter. 77 * @param keyword The keyword, as with <code>handleKeyword(String)</code>. 78 * @param parameter The parameter following the keyword. */ handleKeyword(String keyword, int parameter)79 public abstract boolean handleKeyword(String keyword, int parameter); 80 /** Implemented by subclasses to interpret text from the RTF stream. */ handleText(String text)81 public abstract void handleText(String text); handleText(char ch)82 public void handleText(char ch) 83 { handleText(String.valueOf(ch)); } 84 /** Implemented by subclasses to handle the contents of the \bin keyword. */ handleBinaryBlob(byte[] data)85 public abstract void handleBinaryBlob(byte[] data); 86 /** Implemented by subclasses to react to an increase 87 * in the nesting level. */ begingroup()88 public abstract void begingroup(); 89 /** Implemented by subclasses to react to the end of a group. */ endgroup()90 public abstract void endgroup(); 91 92 // table of non-text characters in rtf 93 static final boolean[] rtfSpecialsTable; 94 static { 95 rtfSpecialsTable = noSpecialsTable.clone(); 96 rtfSpecialsTable['\n'] = true; 97 rtfSpecialsTable['\r'] = true; 98 rtfSpecialsTable['{'] = true; 99 rtfSpecialsTable['}'] = true; 100 rtfSpecialsTable['\\'] = true; 101 } 102 RTFParser()103 public RTFParser() 104 { 105 currentCharacters = new StringBuffer(); 106 state = S_text; 107 pendingKeyword = null; 108 level = 0; 109 //warnings = System.out; 110 111 specialsTable = rtfSpecialsTable; 112 } 113 114 // TODO: Handle wrapup at end of file correctly. 115 writeSpecial(int b)116 public void writeSpecial(int b) 117 throws IOException 118 { 119 write((char)b); 120 } 121 warning(String s)122 protected void warning(String s) { 123 if (warnings != null) { 124 warnings.println(s); 125 } 126 } 127 write(String s)128 public void write(String s) 129 throws IOException 130 { 131 if (state != S_text) { 132 int index = 0; 133 int length = s.length(); 134 while(index < length && state != S_text) { 135 write(s.charAt(index)); 136 index ++; 137 } 138 139 if(index >= length) 140 return; 141 142 s = s.substring(index); 143 } 144 145 if (currentCharacters.length() > 0) 146 currentCharacters.append(s); 147 else 148 handleText(s); 149 } 150 151 @SuppressWarnings("fallthrough") write(char ch)152 public void write(char ch) 153 throws IOException 154 { 155 boolean ok; 156 157 switch (state) 158 { 159 case S_text: 160 if (ch == '\n' || ch == '\r') { 161 break; // unadorned newlines are ignored 162 } else if (ch == '{') { 163 if (currentCharacters.length() > 0) { 164 handleText(currentCharacters.toString()); 165 currentCharacters = new StringBuffer(); 166 } 167 level ++; 168 begingroup(); 169 } else if(ch == '}') { 170 if (currentCharacters.length() > 0) { 171 handleText(currentCharacters.toString()); 172 currentCharacters = new StringBuffer(); 173 } 174 if (level == 0) 175 throw new IOException("Too many close-groups in RTF text"); 176 endgroup(); 177 level --; 178 } else if(ch == '\\') { 179 if (currentCharacters.length() > 0) { 180 handleText(currentCharacters.toString()); 181 currentCharacters = new StringBuffer(); 182 } 183 state = S_backslashed; 184 } else { 185 currentCharacters.append(ch); 186 } 187 break; 188 case S_backslashed: 189 if (ch == '\'') { 190 state = S_aftertick; 191 break; 192 } 193 if (!Character.isLetter(ch)) { 194 char[] newstring = new char[1]; 195 newstring[0] = ch; 196 if (!handleKeyword(new String(newstring))) { 197 warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")"); 198 } 199 state = S_text; 200 pendingKeyword = null; 201 /* currentCharacters is already an empty stringBuffer */ 202 break; 203 } 204 205 state = S_token; 206 /* FALL THROUGH */ 207 case S_token: 208 if (Character.isLetter(ch)) { 209 currentCharacters.append(ch); 210 } else { 211 pendingKeyword = currentCharacters.toString(); 212 currentCharacters = new StringBuffer(); 213 214 // Parameter following? 215 if (Character.isDigit(ch) || (ch == '-')) { 216 state = S_parameter; 217 currentCharacters.append(ch); 218 } else { 219 ok = handleKeyword(pendingKeyword); 220 if (!ok) 221 warning("Unknown keyword: " + pendingKeyword); 222 pendingKeyword = null; 223 state = S_text; 224 225 // Non-space delimiters get included in the text 226 if (!Character.isWhitespace(ch)) 227 write(ch); 228 } 229 } 230 break; 231 case S_parameter: 232 if (Character.isDigit(ch)) { 233 currentCharacters.append(ch); 234 } else { 235 /* TODO: Test correct behavior of \bin keyword */ 236 237 if (pendingKeyword.equals("bin")) { /* magic layer-breaking kwd */ 238 long parameter = 0L; 239 try { 240 parameter = Long.parseLong(currentCharacters.toString()); 241 } catch (NumberFormatException e) { 242 warning("Illegal number format " + currentCharacters.toString() 243 + " in \bin tag"); 244 pendingKeyword = null; 245 currentCharacters = new StringBuffer(); 246 state = S_text; 247 // Delimiters here are interpreted as text too 248 if (!Character.isWhitespace(ch)) 249 write(ch); 250 break; 251 } 252 pendingKeyword = null; 253 state = S_inblob; 254 int maxBytes = 4 * 1024 * 1024; 255 binaryBytesLeft = parameter; 256 257 if (binaryBytesLeft > maxBytes) { 258 binaryBuf = new ByteArrayOutputStream(maxBytes); 259 } else if (binaryBytesLeft < 0) { 260 binaryBytesLeft = 0; 261 binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft); 262 } else { 263 binaryBuf = new ByteArrayOutputStream((int) binaryBytesLeft); 264 } 265 savedSpecials = specialsTable; 266 specialsTable = allSpecialsTable; 267 break; 268 } 269 270 int parameter = 0; 271 try { 272 parameter = Integer.parseInt(currentCharacters.toString()); 273 ok = handleKeyword(pendingKeyword, parameter); 274 if (!ok) { 275 warning("Unknown keyword: " + pendingKeyword + 276 " (param " + currentCharacters + ")"); 277 } 278 } catch (NumberFormatException e) { 279 warning("Illegal number format " + currentCharacters.toString() 280 + " in " + pendingKeyword + " tag"); 281 } 282 pendingKeyword = null; 283 currentCharacters = new StringBuffer(); 284 state = S_text; 285 286 // Delimiters here are interpreted as text too 287 if (!Character.isWhitespace(ch)) 288 write(ch); 289 } 290 break; 291 case S_aftertick: 292 if (Character.digit(ch, 16) == -1) 293 state = S_text; 294 else { 295 pendingCharacter = Character.digit(ch, 16); 296 state = S_aftertickc; 297 } 298 break; 299 case S_aftertickc: 300 state = S_text; 301 if (Character.digit(ch, 16) != -1) 302 { 303 pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16); 304 ch = translationTable[pendingCharacter]; 305 if (ch != 0) 306 handleText(ch); 307 } 308 break; 309 case S_inblob: 310 if (binaryBytesLeft > 0) { 311 binaryBuf.write(ch); 312 binaryBytesLeft--; 313 } 314 if (binaryBytesLeft == 0) { 315 state = S_text; 316 specialsTable = savedSpecials; 317 savedSpecials = null; 318 handleBinaryBlob(binaryBuf.toByteArray()); 319 binaryBuf = null; 320 } 321 } 322 } 323 324 /** Flushes any buffered but not yet written characters. 325 * Subclasses which override this method should call this 326 * method <em>before</em> flushing 327 * any of their own buffers. */ flush()328 public void flush() 329 throws IOException 330 { 331 super.flush(); 332 333 if (state == S_text && currentCharacters.length() > 0) { 334 handleText(currentCharacters.toString()); 335 currentCharacters = new StringBuffer(); 336 } 337 } 338 339 /** Closes the parser. Currently, this simply does a <code>flush()</code>, 340 * followed by some minimal consistency checks. */ close()341 public void close() 342 throws IOException 343 { 344 flush(); 345 346 if (state != S_text || level > 0) { 347 warning("Truncated RTF file."); 348 349 /* TODO: any sane way to handle termination in a non-S_text state? */ 350 /* probably not */ 351 352 /* this will cause subclasses to behave more reasonably 353 some of the time */ 354 while (level > 0) { 355 endgroup(); 356 level --; 357 } 358 } 359 360 super.close(); 361 } 362 363 } 364