1 /*
2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 package javax.swing.text.rtf;
26 
27 import java.io.*;
28 import java.lang.*;
29 
30 /**
31  * <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax
32  * and passes a stream of control words, text, and begin/end group
33  * indications to its subclass.
34  *
35  * Normally programmers will only use <b>RTFReader</b>, a subclass of this class that knows what to
36  * do with the tokens this class parses.
37  *
38  * @see AbstractFilter
39  * @see RTFReader
40  */
41 abstract class RTFParser extends AbstractFilter
42 {
43   /** The current RTF group nesting level. */
44   public int level;
45 
46   private int state;
47   private StringBuffer currentCharacters;
48   private String pendingKeyword;                // where keywords go while we
49                                                 // read their parameters
50   private int pendingCharacter;                 // for the \'xx construct
51 
52   private long binaryBytesLeft;                  // in a \bin blob?
53   ByteArrayOutputStream binaryBuf;
54   private boolean[] savedSpecials;
55 
56   /** A stream to which to write warnings and debugging information
57    *  while parsing. This is set to <code>System.out</code> to log
58    *  any anomalous information to stdout. */
59   protected PrintStream warnings;
60 
61   // value for the 'state' variable
62   private final int S_text = 0;          // reading random text
63   private final int S_backslashed = 1;   // read a backslash, waiting for next
64   private final int S_token = 2;         // reading a multicharacter token
65   private final int S_parameter = 3;     // reading a token's parameter
66 
67   private final int S_aftertick = 4;     // after reading \'
68   private final int S_aftertickc = 5;    // after reading \'x
69 
70   private final int S_inblob = 6;        // in a \bin blob
71 
72   /** Implemented by subclasses to interpret a parameter-less RTF keyword.
73    *  The keyword is passed without the leading '/' or any delimiting
74    *  whitespace. */
handleKeyword(String keyword)75   public abstract boolean handleKeyword(String keyword);
76   /** Implemented by subclasses to interpret a keyword with a parameter.
77    *  @param keyword   The keyword, as with <code>handleKeyword(String)</code>.
78    *  @param parameter The parameter following the keyword. */
handleKeyword(String keyword, int parameter)79   public abstract boolean handleKeyword(String keyword, int parameter);
80   /** Implemented by subclasses to interpret text from the RTF stream. */
handleText(String text)81   public abstract void handleText(String text);
handleText(char ch)82   public void handleText(char ch)
83   { handleText(String.valueOf(ch)); }
84   /** Implemented by subclasses to handle the contents of the \bin keyword. */
handleBinaryBlob(byte[] data)85   public abstract void handleBinaryBlob(byte[] data);
86   /** Implemented by subclasses to react to an increase
87    *  in the nesting level. */
begingroup()88   public abstract void begingroup();
89   /** Implemented by subclasses to react to the end of a group. */
endgroup()90   public abstract void endgroup();
91 
92   // table of non-text characters in rtf
93   static final boolean[] rtfSpecialsTable;
94   static {
95     rtfSpecialsTable = noSpecialsTable.clone();
96     rtfSpecialsTable['\n'] = true;
97     rtfSpecialsTable['\r'] = true;
98     rtfSpecialsTable['{'] = true;
99     rtfSpecialsTable['}'] = true;
100     rtfSpecialsTable['\\'] = true;
101   }
102 
RTFParser()103   public RTFParser()
104   {
105     currentCharacters = new StringBuffer();
106     state = S_text;
107     pendingKeyword = null;
108     level = 0;
109     //warnings = System.out;
110 
111     specialsTable = rtfSpecialsTable;
112   }
113 
114   // TODO: Handle wrapup at end of file correctly.
115 
writeSpecial(int b)116   public void writeSpecial(int b)
117     throws IOException
118   {
119     write((char)b);
120   }
121 
warning(String s)122     protected void warning(String s) {
123         if (warnings != null) {
124             warnings.println(s);
125         }
126     }
127 
write(String s)128   public void write(String s)
129     throws IOException
130   {
131     if (state != S_text) {
132       int index = 0;
133       int length = s.length();
134       while(index < length && state != S_text) {
135         write(s.charAt(index));
136         index ++;
137       }
138 
139       if(index >= length)
140         return;
141 
142       s = s.substring(index);
143     }
144 
145     if (currentCharacters.length() > 0)
146       currentCharacters.append(s);
147     else
148       handleText(s);
149   }
150 
151   @SuppressWarnings("fallthrough")
write(char ch)152   public void write(char ch)
153     throws IOException
154   {
155     boolean ok;
156 
157     switch (state)
158     {
159       case S_text:
160         if (ch == '\n' || ch == '\r') {
161           break;  // unadorned newlines are ignored
162         } else if (ch == '{') {
163           if (currentCharacters.length() > 0) {
164             handleText(currentCharacters.toString());
165             currentCharacters = new StringBuffer();
166           }
167           level ++;
168           begingroup();
169         } else if(ch == '}') {
170           if (currentCharacters.length() > 0) {
171             handleText(currentCharacters.toString());
172             currentCharacters = new StringBuffer();
173           }
174           if (level == 0)
175             throw new IOException("Too many close-groups in RTF text");
176           endgroup();
177           level --;
178         } else if(ch == '\\') {
179           if (currentCharacters.length() > 0) {
180             handleText(currentCharacters.toString());
181             currentCharacters = new StringBuffer();
182           }
183           state = S_backslashed;
184         } else {
185           currentCharacters.append(ch);
186         }
187         break;
188       case S_backslashed:
189         if (ch == '\'') {
190           state = S_aftertick;
191           break;
192         }
193         if (!Character.isLetter(ch)) {
194           char[] newstring = new char[1];
195           newstring[0] = ch;
196           if (!handleKeyword(new String(newstring))) {
197             warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")");
198           }
199           state = S_text;
200           pendingKeyword = null;
201           /* currentCharacters is already an empty stringBuffer */
202           break;
203         }
204 
205         state = S_token;
206         /* FALL THROUGH */
207       case S_token:
208         if (Character.isLetter(ch)) {
209           currentCharacters.append(ch);
210         } else {
211           pendingKeyword = currentCharacters.toString();
212           currentCharacters = new StringBuffer();
213 
214           // Parameter following?
215           if (Character.isDigit(ch) || (ch == '-')) {
216             state = S_parameter;
217             currentCharacters.append(ch);
218           } else {
219             ok = handleKeyword(pendingKeyword);
220             if (!ok)
221               warning("Unknown keyword: " + pendingKeyword);
222             pendingKeyword = null;
223             state = S_text;
224 
225             // Non-space delimiters get included in the text
226             if (!Character.isWhitespace(ch))
227               write(ch);
228           }
229         }
230         break;
231       case S_parameter:
232         if (Character.isDigit(ch)) {
233           currentCharacters.append(ch);
234         } else {
235           /* TODO: Test correct behavior of \bin keyword */
236 
237           if (pendingKeyword.equals("bin")) {  /* magic layer-breaking kwd */
238             long parameter = 0L;
239             try {
240               parameter = Long.parseLong(currentCharacters.toString());
241             } catch (NumberFormatException e) {
242               warning("Illegal number format " + currentCharacters.toString()
243                               + " in \bin tag");
244               pendingKeyword = null;
245               currentCharacters = new StringBuffer();
246               state = S_text;
247               // Delimiters here are interpreted as text too
248               if (!Character.isWhitespace(ch))
249                 write(ch);
250               break;
251             }
252             pendingKeyword = null;
253             state = S_inblob;
254             int maxBytes = 4 * 1024 * 1024;
255             binaryBytesLeft = parameter;
256 
257             if (binaryBytesLeft > maxBytes) {
258               binaryBuf = new ByteArrayOutputStream(maxBytes);
259             } else if (binaryBytesLeft < 0) {
260               binaryBytesLeft = 0;
261               binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft);
262             } else {
263               binaryBuf = new ByteArrayOutputStream((int) binaryBytesLeft);
264             }
265             savedSpecials = specialsTable;
266             specialsTable = allSpecialsTable;
267             break;
268           }
269 
270           int parameter = 0;
271           try {
272             parameter = Integer.parseInt(currentCharacters.toString());
273             ok = handleKeyword(pendingKeyword, parameter);
274             if (!ok) {
275                 warning("Unknown keyword: " + pendingKeyword +
276                         " (param " + currentCharacters + ")");
277             }
278           } catch (NumberFormatException e) {
279             warning("Illegal number format " + currentCharacters.toString()
280                     + " in " + pendingKeyword + " tag");
281           }
282           pendingKeyword = null;
283           currentCharacters = new StringBuffer();
284           state = S_text;
285 
286           // Delimiters here are interpreted as text too
287           if (!Character.isWhitespace(ch))
288             write(ch);
289         }
290         break;
291       case S_aftertick:
292         if (Character.digit(ch, 16) == -1)
293           state = S_text;
294         else {
295           pendingCharacter = Character.digit(ch, 16);
296           state = S_aftertickc;
297         }
298         break;
299       case S_aftertickc:
300         state = S_text;
301         if (Character.digit(ch, 16) != -1)
302         {
303           pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
304           ch = translationTable[pendingCharacter];
305           if (ch != 0)
306               handleText(ch);
307         }
308         break;
309       case S_inblob:
310         if (binaryBytesLeft > 0) {
311           binaryBuf.write(ch);
312           binaryBytesLeft--;
313         }
314         if (binaryBytesLeft == 0) {
315           state = S_text;
316           specialsTable = savedSpecials;
317           savedSpecials = null;
318           handleBinaryBlob(binaryBuf.toByteArray());
319           binaryBuf = null;
320         }
321       }
322   }
323 
324   /** Flushes any buffered but not yet written characters.
325    *  Subclasses which override this method should call this
326    *  method <em>before</em> flushing
327    *  any of their own buffers. */
flush()328   public void flush()
329     throws IOException
330   {
331     super.flush();
332 
333     if (state == S_text && currentCharacters.length() > 0) {
334       handleText(currentCharacters.toString());
335       currentCharacters = new StringBuffer();
336     }
337   }
338 
339   /** Closes the parser. Currently, this simply does a <code>flush()</code>,
340    *  followed by some minimal consistency checks. */
close()341   public void close()
342     throws IOException
343   {
344     flush();
345 
346     if (state != S_text || level > 0) {
347       warning("Truncated RTF file.");
348 
349       /* TODO: any sane way to handle termination in a non-S_text state? */
350       /* probably not */
351 
352       /* this will cause subclasses to behave more reasonably
353          some of the time */
354       while (level > 0) {
355           endgroup();
356           level --;
357       }
358     }
359 
360     super.close();
361   }
362 
363 }
364