1 /*
2  * $Id$
3  *
4  * Copyright 2001, 2002 by Paulo Soares.
5  *
6  * The contents of this file are subject to the Mozilla Public License Version 1.1
7  * (the "License"); you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the License.
13  *
14  * The Original Code is 'iText, a free JAVA-PDF library'.
15  *
16  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
18  * All Rights Reserved.
19  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
21  *
22  * Contributor(s): all the names of the contributors are added in the source code
23  * where applicable.
24  *
25  * Alternatively, the contents of this file may be used under the terms of the
26  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
27  * provisions of LGPL are applicable instead of those above.  If you wish to
28  * allow use of your version of this file only under the terms of the LGPL
29  * License and not to allow others to use your version of this file under
30  * the MPL, indicate your decision by deleting the provisions above and
31  * replace them with the notice and other provisions required by the LGPL.
32  * If you do not delete the provisions above, a recipient may use your version
33  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
34  *
35  * This library is free software; you can redistribute it and/or modify it
36  * under the terms of the MPL as stated above or under the terms of the GNU
37  * Library General Public License as published by the Free Software Foundation;
38  * either version 2 of the License, or any later version.
39  *
40  * This library is distributed in the hope that it will be useful, but WITHOUT
41  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
43  * details.
44  *
45  * If you didn't download this code from the following link, you should check if
46  * you aren't using an obsolete version:
47  * http://www.lowagie.com/iText/
48  */
49 
50 package com.lowagie.text.pdf;
51 
52 import java.io.IOException;
53 import com.lowagie.text.exceptions.InvalidPdfException;
54 import com.lowagie.text.error_messages.MessageLocalization;
55 /**
56  *
57  * @author  Paulo Soares (psoares@consiste.pt)
58  */
59 public class PRTokeniser {
60 
61     public static final int TK_NUMBER = 1;
62     public static final int TK_STRING = 2;
63     public static final int TK_NAME = 3;
64     public static final int TK_COMMENT = 4;
65     public static final int TK_START_ARRAY = 5;
66     public static final int TK_END_ARRAY = 6;
67     public static final int TK_START_DIC = 7;
68     public static final int TK_END_DIC = 8;
69     public static final int TK_REF = 9;
70     public static final int TK_OTHER = 10;
71     public static final int TK_ENDOFFILE = 11;
72     public static final boolean delims[] = {
73         true,  true,  false, false, false, false, false, false, false, false,
74         true,  true,  false, true,  true,  false, false, false, false, false,
75         false, false, false, false, false, false, false, false, false, false,
76         false, false, false, true,  false, false, false, false, true,  false,
77         false, true,  true,  false, false, false, false, false, true,  false,
78         false, false, false, false, false, false, false, false, false, false,
79         false, true,  false, true,  false, false, false, false, false, false,
80         false, false, false, false, false, false, false, false, false, false,
81         false, false, false, false, false, false, false, false, false, false,
82         false, false, true,  false, true,  false, false, false, false, false,
83         false, false, false, false, false, false, false, false, false, false,
84         false, false, false, false, false, false, false, false, false, false,
85         false, false, false, false, false, false, false, false, false, false,
86         false, false, false, false, false, false, false, false, false, false,
87         false, false, false, false, false, false, false, false, false, false,
88         false, false, false, false, false, false, false, false, false, false,
89         false, false, false, false, false, false, false, false, false, false,
90         false, false, false, false, false, false, false, false, false, false,
91         false, false, false, false, false, false, false, false, false, false,
92         false, false, false, false, false, false, false, false, false, false,
93         false, false, false, false, false, false, false, false, false, false,
94         false, false, false, false, false, false, false, false, false, false,
95         false, false, false, false, false, false, false, false, false, false,
96         false, false, false, false, false, false, false, false, false, false,
97         false, false, false, false, false, false, false, false, false, false,
98         false, false, false, false, false, false, false};
99 
100     static final String EMPTY = "";
101 
102 
103     protected RandomAccessFileOrArray file;
104     protected int type;
105     protected String stringValue;
106     protected int reference;
107     protected int generation;
108     protected boolean hexString;
109 
PRTokeniser(String filename)110     public PRTokeniser(String filename) throws IOException {
111         file = new RandomAccessFileOrArray(filename);
112     }
113 
PRTokeniser(byte pdfIn[])114     public PRTokeniser(byte pdfIn[]) {
115         file = new RandomAccessFileOrArray(pdfIn);
116     }
117 
PRTokeniser(RandomAccessFileOrArray file)118     public PRTokeniser(RandomAccessFileOrArray file) {
119         this.file = file;
120     }
121 
seek(int pos)122     public void seek(int pos) throws IOException {
123         file.seek(pos);
124     }
125 
getFilePointer()126     public int getFilePointer() throws IOException {
127         return file.getFilePointer();
128     }
129 
close()130     public void close() throws IOException {
131         file.close();
132     }
133 
length()134     public int length() throws IOException {
135         return file.length();
136     }
137 
read()138     public int read() throws IOException {
139         return file.read();
140     }
141 
getSafeFile()142     public RandomAccessFileOrArray getSafeFile() {
143         return new RandomAccessFileOrArray(file);
144     }
145 
getFile()146     public RandomAccessFileOrArray getFile() {
147         return file;
148     }
149 
readString(int size)150     public String readString(int size) throws IOException {
151         StringBuffer buf = new StringBuffer();
152         int ch;
153         while ((size--) > 0) {
154             ch = file.read();
155             if (ch == -1)
156                 break;
157             buf.append((char)ch);
158         }
159         return buf.toString();
160     }
161 
isWhitespace(int ch)162     public static final boolean isWhitespace(int ch) {
163         return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32);
164     }
165 
isDelimiter(int ch)166     public static final boolean isDelimiter(int ch) {
167         return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%');
168     }
169 
isDelimiterWhitespace(int ch)170     public static final boolean isDelimiterWhitespace(int ch) {
171         return delims[ch + 1];
172     }
173 
getTokenType()174     public int getTokenType() {
175         return type;
176     }
177 
getStringValue()178     public String getStringValue() {
179         return stringValue;
180     }
181 
getReference()182     public int getReference() {
183         return reference;
184     }
185 
getGeneration()186     public int getGeneration() {
187         return generation;
188     }
189 
backOnePosition(int ch)190     public void backOnePosition(int ch) {
191         if (ch != -1)
192             file.pushBack((byte)ch);
193     }
194 
throwError(String error)195     public void throwError(String error) throws IOException {
196         throw new InvalidPdfException(MessageLocalization.getComposedMessage("1.at.file.pointer.2", error, String.valueOf(file.getFilePointer())));
197     }
198 
checkPdfHeader()199     public char checkPdfHeader() throws IOException {
200         file.setStartOffset(0);
201         String str = readString(1024);
202         int idx = str.indexOf("%PDF-");
203         if (idx < 0)
204             throw new InvalidPdfException(MessageLocalization.getComposedMessage("pdf.header.not.found"));
205         file.setStartOffset(idx);
206         return str.charAt(idx + 7);
207     }
208 
checkFdfHeader()209     public void checkFdfHeader() throws IOException {
210         file.setStartOffset(0);
211         String str = readString(1024);
212         int idx = str.indexOf("%FDF-1.2");
213         if (idx < 0)
214             throw new InvalidPdfException(MessageLocalization.getComposedMessage("fdf.header.not.found"));
215         file.setStartOffset(idx);
216     }
217 
getStartxref()218     public int getStartxref() throws IOException {
219         int size = Math.min(1024, file.length());
220         int pos = file.length() - size;
221         file.seek(pos);
222         String str = readString(1024);
223         int idx = str.lastIndexOf("startxref");
224         if (idx < 0)
225             throw new InvalidPdfException(MessageLocalization.getComposedMessage("pdf.startxref.not.found"));
226         return pos + idx;
227     }
228 
getHex(int v)229     public static int getHex(int v) {
230         if (v >= '0' && v <= '9')
231             return v - '0';
232         if (v >= 'A' && v <= 'F')
233             return v - 'A' + 10;
234         if (v >= 'a' && v <= 'f')
235             return v - 'a' + 10;
236         return -1;
237     }
238 
nextValidToken()239     public void nextValidToken() throws IOException {
240         int level = 0;
241         String n1 = null;
242         String n2 = null;
243         int ptr = 0;
244         while (nextToken()) {
245             if (type == TK_COMMENT)
246                 continue;
247             switch (level) {
248                 case 0:
249                 {
250                     if (type != TK_NUMBER)
251                         return;
252                     ptr = file.getFilePointer();
253                     n1 = stringValue;
254                     ++level;
255                     break;
256                 }
257                 case 1:
258                 {
259                     if (type != TK_NUMBER) {
260                         file.seek(ptr);
261                         type = TK_NUMBER;
262                         stringValue = n1;
263                         return;
264                     }
265                     n2 = stringValue;
266                     ++level;
267                     break;
268                 }
269                 default:
270                 {
271                     if (type != TK_OTHER || !stringValue.equals("R")) {
272                         file.seek(ptr);
273                         type = TK_NUMBER;
274                         stringValue = n1;
275                         return;
276                     }
277                     type = TK_REF;
278                     reference = Integer.parseInt(n1);
279                     generation = Integer.parseInt(n2);
280                     return;
281                 }
282             }
283         }
284         if (level > 0) {
285             file.seek(ptr);
286             type = TK_NUMBER;
287             stringValue = n1;
288             return;
289         }
290         // if we hit here, the file is either corrupt (stream ended unexpectedly),
291         // or the last token ended exactly at the end of a stream.  This last
292         // case can occur inside an Object Stream.
293     }
294 
nextToken()295     public boolean nextToken() throws IOException {
296         int ch = 0;
297         do {
298             ch = file.read();
299         } while (ch != -1 && isWhitespace(ch));
300         if (ch == -1){
301             type = TK_ENDOFFILE;
302             return false;
303         }
304 
305         // Note:  We have to initialize stringValue here, after we've looked for the end of the stream,
306         // to ensure that we don't lose the value of a token that might end exactly at the end
307         // of the stream
308         StringBuffer outBuf = null;
309         stringValue = EMPTY;
310 
311         switch (ch) {
312             case '[':
313                 type = TK_START_ARRAY;
314                 break;
315             case ']':
316                 type = TK_END_ARRAY;
317                 break;
318             case '/':
319             {
320                 outBuf = new StringBuffer();
321                 type = TK_NAME;
322                 while (true) {
323                     ch = file.read();
324                     if (delims[ch + 1])
325                         break;
326                     if (ch == '#') {
327                         ch = (getHex(file.read()) << 4) + getHex(file.read());
328                     }
329                     outBuf.append((char)ch);
330                 }
331                 backOnePosition(ch);
332                 break;
333             }
334             case '>':
335                 ch = file.read();
336                 if (ch != '>')
337                     throwError(MessageLocalization.getComposedMessage("greaterthan.not.expected"));
338                 type = TK_END_DIC;
339                 break;
340             case '<':
341             {
342                 int v1 = file.read();
343                 if (v1 == '<') {
344                     type = TK_START_DIC;
345                     break;
346                 }
347                 outBuf = new StringBuffer();
348                 type = TK_STRING;
349                 hexString = true;
350                 int v2 = 0;
351                 while (true) {
352                     while (isWhitespace(v1))
353                         v1 = file.read();
354                     if (v1 == '>')
355                         break;
356                     v1 = getHex(v1);
357                     if (v1 < 0)
358                         break;
359                     v2 = file.read();
360                     while (isWhitespace(v2))
361                         v2 = file.read();
362                     if (v2 == '>') {
363                         ch = v1 << 4;
364                         outBuf.append((char)ch);
365                         break;
366                     }
367                     v2 = getHex(v2);
368                     if (v2 < 0)
369                         break;
370                     ch = (v1 << 4) + v2;
371                     outBuf.append((char)ch);
372                     v1 = file.read();
373                 }
374                 if (v1 < 0 || v2 < 0)
375                     throwError(MessageLocalization.getComposedMessage("error.reading.string"));
376                 break;
377             }
378             case '%':
379                 type = TK_COMMENT;
380                 do {
381                     ch = file.read();
382                 } while (ch != -1 && ch != '\r' && ch != '\n');
383                 break;
384             case '(':
385             {
386                 outBuf = new StringBuffer();
387                 type = TK_STRING;
388                 hexString = false;
389                 int nesting = 0;
390                 while (true) {
391                     ch = file.read();
392                     if (ch == -1)
393                         break;
394                     if (ch == '(') {
395                         ++nesting;
396                     }
397                     else if (ch == ')') {
398                         --nesting;
399                     }
400                     else if (ch == '\\') {
401                         boolean lineBreak = false;
402                         ch = file.read();
403                         switch (ch) {
404                             case 'n':
405                                 ch = '\n';
406                                 break;
407                             case 'r':
408                                 ch = '\r';
409                                 break;
410                             case 't':
411                                 ch = '\t';
412                                 break;
413                             case 'b':
414                                 ch = '\b';
415                                 break;
416                             case 'f':
417                                 ch = '\f';
418                                 break;
419                             case '(':
420                             case ')':
421                             case '\\':
422                                 break;
423                             case '\r':
424                                 lineBreak = true;
425                                 ch = file.read();
426                                 if (ch != '\n')
427                                     backOnePosition(ch);
428                                 break;
429                             case '\n':
430                                 lineBreak = true;
431                                 break;
432                             default:
433                             {
434                                 if (ch < '0' || ch > '7') {
435                                     break;
436                                 }
437                                 int octal = ch - '0';
438                                 ch = file.read();
439                                 if (ch < '0' || ch > '7') {
440                                     backOnePosition(ch);
441                                     ch = octal;
442                                     break;
443                                 }
444                                 octal = (octal << 3) + ch - '0';
445                                 ch = file.read();
446                                 if (ch < '0' || ch > '7') {
447                                     backOnePosition(ch);
448                                     ch = octal;
449                                     break;
450                                 }
451                                 octal = (octal << 3) + ch - '0';
452                                 ch = octal & 0xff;
453                                 break;
454                             }
455                         }
456                         if (lineBreak)
457                             continue;
458                         if (ch < 0)
459                             break;
460                     }
461                     else if (ch == '\r') {
462                         ch = file.read();
463                         if (ch < 0)
464                             break;
465                         if (ch != '\n') {
466                             backOnePosition(ch);
467                             ch = '\n';
468                         }
469                     }
470                     if (nesting == -1)
471                         break;
472                     outBuf.append((char)ch);
473                 }
474                 if (ch == -1)
475                     throwError(MessageLocalization.getComposedMessage("error.reading.string"));
476                 break;
477             }
478             default:
479             {
480                 outBuf = new StringBuffer();
481                 if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) {
482                     type = TK_NUMBER;
483                     do {
484                         outBuf.append((char)ch);
485                         ch = file.read();
486                     } while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'));
487                 }
488                 else {
489                     type = TK_OTHER;
490                     do {
491                         outBuf.append((char)ch);
492                         ch = file.read();
493                     } while (!delims[ch + 1]);
494                 }
495                 backOnePosition(ch);
496                 break;
497             }
498         }
499         if (outBuf != null)
500             stringValue = outBuf.toString();
501         return true;
502     }
503 
intValue()504     public int intValue() {
505         return Integer.parseInt(stringValue);
506     }
507 
readLineSegment(byte input[])508     public boolean readLineSegment(byte input[]) throws IOException {
509         int c = -1;
510         boolean eol = false;
511         int ptr = 0;
512         int len = input.length;
513 	// ssteward, pdftk-1.10, 040922:
514 	// skip initial whitespace; added this because PdfReader.rebuildXref()
515 	// assumes that line provided by readLineSegment does not have init. whitespace;
516 	if ( ptr < len ) {
517 	    while ( isWhitespace( (c = read()) ) );
518 	}
519 	while ( !eol && ptr < len ) {
520 	    switch (c) {
521                 case -1:
522                 case '\n':
523                     eol = true;
524                     break;
525                 case '\r':
526                     eol = true;
527                     int cur = getFilePointer();
528                     if ((read()) != '\n') {
529                         seek(cur);
530                     }
531                     break;
532                 default:
533                     input[ptr++] = (byte)c;
534                     break;
535             }
536 
537 	    // break loop? do it before we read() again
538 	    if( eol || len <= ptr ) {
539 		break;
540 	    }
541 	    else {
542 		c = read();
543 	    }
544         }
545         if (ptr >= len) {
546             eol = false;
547             while (!eol) {
548                 switch (c = read()) {
549                     case -1:
550                     case '\n':
551                         eol = true;
552                         break;
553                     case '\r':
554                         eol = true;
555                         int cur = getFilePointer();
556                         if ((read()) != '\n') {
557                             seek(cur);
558                         }
559                         break;
560                 }
561             }
562         }
563 
564         if ((c == -1) && (ptr == 0)) {
565             return false;
566         }
567         if (ptr + 2 <= len) {
568             input[ptr++] = (byte)' ';
569             input[ptr] = (byte)'X';
570         }
571         return true;
572     }
573 
checkObjectStart(byte line[])574     public static int[] checkObjectStart(byte line[]) {
575         try {
576             PRTokeniser tk = new PRTokeniser(line);
577             int num = 0;
578             int gen = 0;
579             if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
580                 return null;
581             num = tk.intValue();
582             if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
583                 return null;
584             gen = tk.intValue();
585             if (!tk.nextToken())
586                 return null;
587             if (!tk.getStringValue().equals("obj"))
588                 return null;
589             return new int[]{num, gen};
590         }
591         catch (Exception ioe) {
592             // empty on purpose
593         }
594         return null;
595     }
596 
isHexString()597     public boolean isHexString() {
598         return this.hexString;
599     }
600 
601 }
602