1 /* 2 * $Id$ 3 * 4 * Copyright 2001, 2002 by Paulo Soares. 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 1.1 7 * (the "License"); you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at http://www.mozilla.org/MPL/ 9 * 10 * Software distributed under the License is distributed on an "AS IS" basis, 11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 * for the specific language governing rights and limitations under the License. 13 * 14 * The Original Code is 'iText, a free JAVA-PDF library'. 15 * 16 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by 17 * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. 18 * All Rights Reserved. 19 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer 20 * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. 21 * 22 * Contributor(s): all the names of the contributors are added in the source code 23 * where applicable. 24 * 25 * Alternatively, the contents of this file may be used under the terms of the 26 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the 27 * provisions of LGPL are applicable instead of those above. If you wish to 28 * allow use of your version of this file only under the terms of the LGPL 29 * License and not to allow others to use your version of this file under 30 * the MPL, indicate your decision by deleting the provisions above and 31 * replace them with the notice and other provisions required by the LGPL. 32 * If you do not delete the provisions above, a recipient may use your version 33 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. 34 * 35 * This library is free software; you can redistribute it and/or modify it 36 * under the terms of the MPL as stated above or under the terms of the GNU 37 * Library General Public License as published by the Free Software Foundation; 38 * either version 2 of the License, or any later version. 39 * 40 * This library is distributed in the hope that it will be useful, but WITHOUT 41 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 42 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more 43 * details. 44 * 45 * If you didn't download this code from the following link, you should check if 46 * you aren't using an obsolete version: 47 * http://www.lowagie.com/iText/ 48 */ 49 50 package com.lowagie.text.pdf; 51 52 import java.io.IOException; 53 import com.lowagie.text.exceptions.InvalidPdfException; 54 import com.lowagie.text.error_messages.MessageLocalization; 55 /** 56 * 57 * @author Paulo Soares (psoares@consiste.pt) 58 */ 59 public class PRTokeniser { 60 61 public static final int TK_NUMBER = 1; 62 public static final int TK_STRING = 2; 63 public static final int TK_NAME = 3; 64 public static final int TK_COMMENT = 4; 65 public static final int TK_START_ARRAY = 5; 66 public static final int TK_END_ARRAY = 6; 67 public static final int TK_START_DIC = 7; 68 public static final int TK_END_DIC = 8; 69 public static final int TK_REF = 9; 70 public static final int TK_OTHER = 10; 71 public static final int TK_ENDOFFILE = 11; 72 public static final boolean delims[] = { 73 true, true, false, false, false, false, false, false, false, false, 74 true, true, false, true, true, false, false, false, false, false, 75 false, false, false, false, false, false, false, false, false, false, 76 false, false, false, true, false, false, false, false, true, false, 77 false, true, true, false, false, false, false, false, true, false, 78 false, false, false, false, false, false, false, false, false, false, 79 false, true, false, true, false, false, false, false, false, false, 80 false, false, false, false, false, false, false, false, false, false, 81 false, false, false, false, false, false, false, false, false, false, 82 false, false, true, false, true, false, false, false, false, false, 83 false, false, false, false, false, false, false, false, false, false, 84 false, false, false, false, false, false, false, false, false, false, 85 false, false, false, false, false, false, false, false, false, false, 86 false, false, false, false, false, false, false, false, false, false, 87 false, false, false, false, false, false, false, false, false, false, 88 false, false, false, false, false, false, false, false, false, false, 89 false, false, false, false, false, false, false, false, false, false, 90 false, false, false, false, false, false, false, false, false, false, 91 false, false, false, false, false, false, false, false, false, false, 92 false, false, false, false, false, false, false, false, false, false, 93 false, false, false, false, false, false, false, false, false, false, 94 false, false, false, false, false, false, false, false, false, false, 95 false, false, false, false, false, false, false, false, false, false, 96 false, false, false, false, false, false, false, false, false, false, 97 false, false, false, false, false, false, false, false, false, false, 98 false, false, false, false, false, false, false}; 99 100 static final String EMPTY = ""; 101 102 103 protected RandomAccessFileOrArray file; 104 protected int type; 105 protected String stringValue; 106 protected int reference; 107 protected int generation; 108 protected boolean hexString; 109 PRTokeniser(String filename)110 public PRTokeniser(String filename) throws IOException { 111 file = new RandomAccessFileOrArray(filename); 112 } 113 PRTokeniser(byte pdfIn[])114 public PRTokeniser(byte pdfIn[]) { 115 file = new RandomAccessFileOrArray(pdfIn); 116 } 117 PRTokeniser(RandomAccessFileOrArray file)118 public PRTokeniser(RandomAccessFileOrArray file) { 119 this.file = file; 120 } 121 seek(int pos)122 public void seek(int pos) throws IOException { 123 file.seek(pos); 124 } 125 getFilePointer()126 public int getFilePointer() throws IOException { 127 return file.getFilePointer(); 128 } 129 close()130 public void close() throws IOException { 131 file.close(); 132 } 133 length()134 public int length() throws IOException { 135 return file.length(); 136 } 137 read()138 public int read() throws IOException { 139 return file.read(); 140 } 141 getSafeFile()142 public RandomAccessFileOrArray getSafeFile() { 143 return new RandomAccessFileOrArray(file); 144 } 145 getFile()146 public RandomAccessFileOrArray getFile() { 147 return file; 148 } 149 readString(int size)150 public String readString(int size) throws IOException { 151 StringBuffer buf = new StringBuffer(); 152 int ch; 153 while ((size--) > 0) { 154 ch = file.read(); 155 if (ch == -1) 156 break; 157 buf.append((char)ch); 158 } 159 return buf.toString(); 160 } 161 isWhitespace(int ch)162 public static final boolean isWhitespace(int ch) { 163 return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32); 164 } 165 isDelimiter(int ch)166 public static final boolean isDelimiter(int ch) { 167 return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%'); 168 } 169 isDelimiterWhitespace(int ch)170 public static final boolean isDelimiterWhitespace(int ch) { 171 return delims[ch + 1]; 172 } 173 getTokenType()174 public int getTokenType() { 175 return type; 176 } 177 getStringValue()178 public String getStringValue() { 179 return stringValue; 180 } 181 getReference()182 public int getReference() { 183 return reference; 184 } 185 getGeneration()186 public int getGeneration() { 187 return generation; 188 } 189 backOnePosition(int ch)190 public void backOnePosition(int ch) { 191 if (ch != -1) 192 file.pushBack((byte)ch); 193 } 194 throwError(String error)195 public void throwError(String error) throws IOException { 196 throw new InvalidPdfException(MessageLocalization.getComposedMessage("1.at.file.pointer.2", error, String.valueOf(file.getFilePointer()))); 197 } 198 checkPdfHeader()199 public char checkPdfHeader() throws IOException { 200 file.setStartOffset(0); 201 String str = readString(1024); 202 int idx = str.indexOf("%PDF-"); 203 if (idx < 0) 204 throw new InvalidPdfException(MessageLocalization.getComposedMessage("pdf.header.not.found")); 205 file.setStartOffset(idx); 206 return str.charAt(idx + 7); 207 } 208 checkFdfHeader()209 public void checkFdfHeader() throws IOException { 210 file.setStartOffset(0); 211 String str = readString(1024); 212 int idx = str.indexOf("%FDF-1.2"); 213 if (idx < 0) 214 throw new InvalidPdfException(MessageLocalization.getComposedMessage("fdf.header.not.found")); 215 file.setStartOffset(idx); 216 } 217 getStartxref()218 public int getStartxref() throws IOException { 219 int size = Math.min(1024, file.length()); 220 int pos = file.length() - size; 221 file.seek(pos); 222 String str = readString(1024); 223 int idx = str.lastIndexOf("startxref"); 224 if (idx < 0) 225 throw new InvalidPdfException(MessageLocalization.getComposedMessage("pdf.startxref.not.found")); 226 return pos + idx; 227 } 228 getHex(int v)229 public static int getHex(int v) { 230 if (v >= '0' && v <= '9') 231 return v - '0'; 232 if (v >= 'A' && v <= 'F') 233 return v - 'A' + 10; 234 if (v >= 'a' && v <= 'f') 235 return v - 'a' + 10; 236 return -1; 237 } 238 nextValidToken()239 public void nextValidToken() throws IOException { 240 int level = 0; 241 String n1 = null; 242 String n2 = null; 243 int ptr = 0; 244 while (nextToken()) { 245 if (type == TK_COMMENT) 246 continue; 247 switch (level) { 248 case 0: 249 { 250 if (type != TK_NUMBER) 251 return; 252 ptr = file.getFilePointer(); 253 n1 = stringValue; 254 ++level; 255 break; 256 } 257 case 1: 258 { 259 if (type != TK_NUMBER) { 260 file.seek(ptr); 261 type = TK_NUMBER; 262 stringValue = n1; 263 return; 264 } 265 n2 = stringValue; 266 ++level; 267 break; 268 } 269 default: 270 { 271 if (type != TK_OTHER || !stringValue.equals("R")) { 272 file.seek(ptr); 273 type = TK_NUMBER; 274 stringValue = n1; 275 return; 276 } 277 type = TK_REF; 278 reference = Integer.parseInt(n1); 279 generation = Integer.parseInt(n2); 280 return; 281 } 282 } 283 } 284 if (level > 0) { 285 file.seek(ptr); 286 type = TK_NUMBER; 287 stringValue = n1; 288 return; 289 } 290 // if we hit here, the file is either corrupt (stream ended unexpectedly), 291 // or the last token ended exactly at the end of a stream. This last 292 // case can occur inside an Object Stream. 293 } 294 nextToken()295 public boolean nextToken() throws IOException { 296 int ch = 0; 297 do { 298 ch = file.read(); 299 } while (ch != -1 && isWhitespace(ch)); 300 if (ch == -1){ 301 type = TK_ENDOFFILE; 302 return false; 303 } 304 305 // Note: We have to initialize stringValue here, after we've looked for the end of the stream, 306 // to ensure that we don't lose the value of a token that might end exactly at the end 307 // of the stream 308 StringBuffer outBuf = null; 309 stringValue = EMPTY; 310 311 switch (ch) { 312 case '[': 313 type = TK_START_ARRAY; 314 break; 315 case ']': 316 type = TK_END_ARRAY; 317 break; 318 case '/': 319 { 320 outBuf = new StringBuffer(); 321 type = TK_NAME; 322 while (true) { 323 ch = file.read(); 324 if (delims[ch + 1]) 325 break; 326 if (ch == '#') { 327 ch = (getHex(file.read()) << 4) + getHex(file.read()); 328 } 329 outBuf.append((char)ch); 330 } 331 backOnePosition(ch); 332 break; 333 } 334 case '>': 335 ch = file.read(); 336 if (ch != '>') 337 throwError(MessageLocalization.getComposedMessage("greaterthan.not.expected")); 338 type = TK_END_DIC; 339 break; 340 case '<': 341 { 342 int v1 = file.read(); 343 if (v1 == '<') { 344 type = TK_START_DIC; 345 break; 346 } 347 outBuf = new StringBuffer(); 348 type = TK_STRING; 349 hexString = true; 350 int v2 = 0; 351 while (true) { 352 while (isWhitespace(v1)) 353 v1 = file.read(); 354 if (v1 == '>') 355 break; 356 v1 = getHex(v1); 357 if (v1 < 0) 358 break; 359 v2 = file.read(); 360 while (isWhitespace(v2)) 361 v2 = file.read(); 362 if (v2 == '>') { 363 ch = v1 << 4; 364 outBuf.append((char)ch); 365 break; 366 } 367 v2 = getHex(v2); 368 if (v2 < 0) 369 break; 370 ch = (v1 << 4) + v2; 371 outBuf.append((char)ch); 372 v1 = file.read(); 373 } 374 if (v1 < 0 || v2 < 0) 375 throwError(MessageLocalization.getComposedMessage("error.reading.string")); 376 break; 377 } 378 case '%': 379 type = TK_COMMENT; 380 do { 381 ch = file.read(); 382 } while (ch != -1 && ch != '\r' && ch != '\n'); 383 break; 384 case '(': 385 { 386 outBuf = new StringBuffer(); 387 type = TK_STRING; 388 hexString = false; 389 int nesting = 0; 390 while (true) { 391 ch = file.read(); 392 if (ch == -1) 393 break; 394 if (ch == '(') { 395 ++nesting; 396 } 397 else if (ch == ')') { 398 --nesting; 399 } 400 else if (ch == '\\') { 401 boolean lineBreak = false; 402 ch = file.read(); 403 switch (ch) { 404 case 'n': 405 ch = '\n'; 406 break; 407 case 'r': 408 ch = '\r'; 409 break; 410 case 't': 411 ch = '\t'; 412 break; 413 case 'b': 414 ch = '\b'; 415 break; 416 case 'f': 417 ch = '\f'; 418 break; 419 case '(': 420 case ')': 421 case '\\': 422 break; 423 case '\r': 424 lineBreak = true; 425 ch = file.read(); 426 if (ch != '\n') 427 backOnePosition(ch); 428 break; 429 case '\n': 430 lineBreak = true; 431 break; 432 default: 433 { 434 if (ch < '0' || ch > '7') { 435 break; 436 } 437 int octal = ch - '0'; 438 ch = file.read(); 439 if (ch < '0' || ch > '7') { 440 backOnePosition(ch); 441 ch = octal; 442 break; 443 } 444 octal = (octal << 3) + ch - '0'; 445 ch = file.read(); 446 if (ch < '0' || ch > '7') { 447 backOnePosition(ch); 448 ch = octal; 449 break; 450 } 451 octal = (octal << 3) + ch - '0'; 452 ch = octal & 0xff; 453 break; 454 } 455 } 456 if (lineBreak) 457 continue; 458 if (ch < 0) 459 break; 460 } 461 else if (ch == '\r') { 462 ch = file.read(); 463 if (ch < 0) 464 break; 465 if (ch != '\n') { 466 backOnePosition(ch); 467 ch = '\n'; 468 } 469 } 470 if (nesting == -1) 471 break; 472 outBuf.append((char)ch); 473 } 474 if (ch == -1) 475 throwError(MessageLocalization.getComposedMessage("error.reading.string")); 476 break; 477 } 478 default: 479 { 480 outBuf = new StringBuffer(); 481 if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) { 482 type = TK_NUMBER; 483 do { 484 outBuf.append((char)ch); 485 ch = file.read(); 486 } while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.')); 487 } 488 else { 489 type = TK_OTHER; 490 do { 491 outBuf.append((char)ch); 492 ch = file.read(); 493 } while (!delims[ch + 1]); 494 } 495 backOnePosition(ch); 496 break; 497 } 498 } 499 if (outBuf != null) 500 stringValue = outBuf.toString(); 501 return true; 502 } 503 intValue()504 public int intValue() { 505 return Integer.parseInt(stringValue); 506 } 507 readLineSegment(byte input[])508 public boolean readLineSegment(byte input[]) throws IOException { 509 int c = -1; 510 boolean eol = false; 511 int ptr = 0; 512 int len = input.length; 513 // ssteward, pdftk-1.10, 040922: 514 // skip initial whitespace; added this because PdfReader.rebuildXref() 515 // assumes that line provided by readLineSegment does not have init. whitespace; 516 if ( ptr < len ) { 517 while ( isWhitespace( (c = read()) ) ); 518 } 519 while ( !eol && ptr < len ) { 520 switch (c) { 521 case -1: 522 case '\n': 523 eol = true; 524 break; 525 case '\r': 526 eol = true; 527 int cur = getFilePointer(); 528 if ((read()) != '\n') { 529 seek(cur); 530 } 531 break; 532 default: 533 input[ptr++] = (byte)c; 534 break; 535 } 536 537 // break loop? do it before we read() again 538 if( eol || len <= ptr ) { 539 break; 540 } 541 else { 542 c = read(); 543 } 544 } 545 if (ptr >= len) { 546 eol = false; 547 while (!eol) { 548 switch (c = read()) { 549 case -1: 550 case '\n': 551 eol = true; 552 break; 553 case '\r': 554 eol = true; 555 int cur = getFilePointer(); 556 if ((read()) != '\n') { 557 seek(cur); 558 } 559 break; 560 } 561 } 562 } 563 564 if ((c == -1) && (ptr == 0)) { 565 return false; 566 } 567 if (ptr + 2 <= len) { 568 input[ptr++] = (byte)' '; 569 input[ptr] = (byte)'X'; 570 } 571 return true; 572 } 573 checkObjectStart(byte line[])574 public static int[] checkObjectStart(byte line[]) { 575 try { 576 PRTokeniser tk = new PRTokeniser(line); 577 int num = 0; 578 int gen = 0; 579 if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER) 580 return null; 581 num = tk.intValue(); 582 if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER) 583 return null; 584 gen = tk.intValue(); 585 if (!tk.nextToken()) 586 return null; 587 if (!tk.getStringValue().equals("obj")) 588 return null; 589 return new int[]{num, gen}; 590 } 591 catch (Exception ioe) { 592 // empty on purpose 593 } 594 return null; 595 } 596 isHexString()597 public boolean isHexString() { 598 return this.hexString; 599 } 600 601 } 602