1 /* 2 * $Id$ 3 * 4 * Copyright 2009 by Nigel Kerr. 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 1.1 7 * (the "License"); you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at http://www.mozilla.org/MPL/ 9 * 10 * Software distributed under the License is distributed on an "AS IS" basis, 11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 * for the specific language governing rights and limitations under the License. 13 * 14 * The Original Code is 'iText, a free JAVA-PDF library'. 15 * 16 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by 17 * the Initial Developer are Copyright (C) 1999-2009 by Bruno Lowagie. 18 * All Rights Reserved. 19 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer 20 * are Copyright (C) 2000-2009 by Paulo Soares. All Rights Reserved. 21 * 22 * Contributor(s): all the names of the contributors are added in the source code 23 * where applicable. 24 * 25 * Alternatively, the contents of this file may be used under the terms of the 26 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the 27 * provisions of LGPL are applicable instead of those above. If you wish to 28 * allow use of your version of this file only under the terms of the LGPL 29 * License and not to allow others to use your version of this file under 30 * the MPL, indicate your decision by deleting the provisions above and 31 * replace them with the notice and other provisions required by the LGPL. 32 * If you do not delete the provisions above, a recipient may use your version 33 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. 34 * 35 * This library is free software; you can redistribute it and/or modify it 36 * under the terms of the MPL as stated above or under the terms of the GNU 37 * Library General Public License as published by the Free Software Foundation; 38 * either version 2 of the License, or any later version. 39 * 40 * This library is distributed in the hope that it will be useful, but WITHOUT 41 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 42 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more 43 * details. 44 * 45 * If you didn't download this code from the following link, you should check if 46 * you aren't using an obsolete version: 47 * http://www.lowagie.com/iText/ 48 */ 49 50 package com.lowagie.text.pdf.codec; 51 52 import java.io.ByteArrayOutputStream; 53 import java.io.IOException; 54 import java.util.Iterator; 55 import java.util.SortedMap; 56 import java.util.SortedSet; 57 import java.util.TreeMap; 58 import java.util.TreeSet; 59 import com.lowagie.text.error_messages.MessageLocalization; 60 61 import com.lowagie.text.pdf.RandomAccessFileOrArray; 62 63 /** 64 * Class to read a JBIG2 file at a basic level: understand all the segments, 65 * understand what segments belong to which pages, how many pages there are, 66 * what the width and height of each page is, and global segments if there 67 * are any. Or: the minimum required to be able to take a normal sequential 68 * or random-access organized file, and be able to embed JBIG2 pages as images 69 * in a PDF. 70 * 71 * TODO: the indeterminate-segment-size value of dataLength, else? 72 * 73 * @since 2.1.5 74 */ 75 76 public class JBIG2SegmentReader { 77 78 public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2. 79 80 public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3. 81 public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3. 82 public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3. 83 public static final int PATTERN_DICTIONARY = 16; //see 7.4.4. 84 public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5. 85 public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5. 86 public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5. 87 public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6. 88 public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6. 89 public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6. 90 public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7. 91 public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7. 92 public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7. 93 94 public static final int PAGE_INFORMATION = 48; //see 7.4.8. 95 public static final int END_OF_PAGE = 49; //see 7.4.9. 96 public static final int END_OF_STRIPE = 50; //see 7.4.10. 97 public static final int END_OF_FILE = 51; //see 7.4.11. 98 public static final int PROFILES = 52; //see 7.4.12. 99 public static final int TABLES = 53; //see 7.4.13. 100 public static final int EXTENSION = 62; //see 7.4.14. 101 102 private final SortedMap segments = new TreeMap(); 103 private final SortedMap pages = new TreeMap(); 104 private final SortedSet globals = new TreeSet(); 105 private RandomAccessFileOrArray ra; 106 private boolean sequential; 107 private boolean number_of_pages_known; 108 private int number_of_pages = -1; 109 private boolean read = false; 110 111 /** 112 * Inner class that holds information about a JBIG2 segment. 113 * @since 2.1.5 114 */ 115 public static class JBIG2Segment implements Comparable { 116 117 public final int segmentNumber; 118 public long dataLength = -1; 119 public int page = -1; 120 public int[] referredToSegmentNumbers = null; 121 public boolean[] segmentRetentionFlags = null; 122 public int type = -1; 123 public boolean deferredNonRetain = false; 124 public int countOfReferredToSegments = -1; 125 public byte[] data = null; 126 public byte[] headerData = null; 127 public boolean page_association_size = false; 128 public int page_association_offset = -1; 129 JBIG2Segment(int segment_number)130 public JBIG2Segment(int segment_number) { 131 this.segmentNumber = segment_number; 132 } 133 134 // for the globals treeset compareTo(Object o)135 public int compareTo(Object o) { 136 return this.compareTo((JBIG2Segment)o); 137 } compareTo(JBIG2Segment s)138 public int compareTo(JBIG2Segment s) { 139 return this.segmentNumber - s.segmentNumber; 140 } 141 142 143 } 144 /** 145 * Inner class that holds information about a JBIG2 page. 146 * @since 2.1.5 147 */ 148 public static class JBIG2Page { 149 public final int page; 150 private final JBIG2SegmentReader sr; 151 private final SortedMap segs = new TreeMap(); 152 public int pageBitmapWidth = -1; 153 public int pageBitmapHeight = -1; JBIG2Page(int page, JBIG2SegmentReader sr)154 public JBIG2Page(int page, JBIG2SegmentReader sr) { 155 this.page = page; 156 this.sr = sr; 157 } 158 /** 159 * return as a single byte array the header-data for each segment in segment number 160 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization. 161 * if for_embedding, skip the segment types that are known to be not for acrobat. 162 * @param for_embedding 163 * @return a byte array 164 * @throws IOException 165 */ getData(boolean for_embedding)166 public byte[] getData(boolean for_embedding) throws IOException { 167 ByteArrayOutputStream os = new ByteArrayOutputStream(); 168 for (Iterator i = segs.keySet().iterator(); i.hasNext(); ) { 169 Integer sn = (Integer) i.next(); 170 JBIG2Segment s = (JBIG2Segment) segs.get(sn); 171 172 // pdf reference 1.4, section 3.3.6 JBIG2Decode Filter 173 // D.3 Embedded organisation 174 if ( for_embedding && 175 ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) { 176 continue; 177 } 178 179 if ( for_embedding ) { 180 // change the page association to page 1 181 byte[] headerData_emb = copyByteArray(s.headerData); 182 if ( s.page_association_size ) { 183 headerData_emb[s.page_association_offset] = 0x0; 184 headerData_emb[s.page_association_offset+1] = 0x0; 185 headerData_emb[s.page_association_offset+2] = 0x0; 186 headerData_emb[s.page_association_offset+3] = 0x1; 187 } else { 188 headerData_emb[s.page_association_offset] = 0x1; 189 } 190 os.write(headerData_emb); 191 } else { 192 os.write(s.headerData); 193 } 194 os.write(s.data); 195 } 196 os.close(); 197 return os.toByteArray(); 198 } addSegment(JBIG2Segment s)199 public void addSegment(JBIG2Segment s) { 200 segs.put(new Integer(s.segmentNumber), s); 201 } 202 203 } 204 JBIG2SegmentReader(RandomAccessFileOrArray ra )205 public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException { 206 this.ra = ra; 207 } 208 copyByteArray(byte[] b)209 public static byte[] copyByteArray(byte[] b) { 210 byte[] bc = new byte[b.length]; 211 System.arraycopy(b, 0, bc, 0, b.length); 212 return bc; 213 } 214 read()215 public void read() throws IOException { 216 if ( this.read ) { 217 throw new IllegalStateException(MessageLocalization.getComposedMessage("already.attempted.a.read.on.this.jbig2.file")); 218 } 219 this.read = true; 220 221 readFileHeader(); 222 // Annex D 223 if ( this.sequential ) { 224 // D.1 225 do { 226 JBIG2Segment tmp = readHeader(); 227 readSegment(tmp); 228 segments.put(new Integer(tmp.segmentNumber), tmp); 229 } while ( this.ra.getFilePointer() < this.ra.length() ); 230 } else { 231 // D.2 232 JBIG2Segment tmp; 233 do { 234 tmp = readHeader(); 235 segments.put(new Integer(tmp.segmentNumber), tmp); 236 } while ( tmp.type != END_OF_FILE ); 237 Iterator segs = segments.keySet().iterator(); 238 while ( segs.hasNext() ) { 239 readSegment((JBIG2Segment)segments.get(segs.next())); 240 } 241 } 242 } 243 readSegment(JBIG2Segment s)244 void readSegment(JBIG2Segment s) throws IOException { 245 int ptr = ra.getFilePointer(); 246 247 if ( s.dataLength == 0xffffffffl ) { 248 // TODO figure this bit out, 7.2.7 249 return; 250 } 251 252 byte[] data = new byte[(int)s.dataLength]; 253 ra.read(data); 254 s.data = data; 255 256 if ( s.type == PAGE_INFORMATION ) { 257 int last = ra.getFilePointer(); 258 ra.seek(ptr); 259 int page_bitmap_width = ra.readInt(); 260 int page_bitmap_height = ra.readInt(); 261 ra.seek(last); 262 JBIG2Page p = (JBIG2Page)pages.get(new Integer(s.page)); 263 if ( p == null ) { 264 throw new IllegalStateException(MessageLocalization.getComposedMessage("referring.to.widht.height.of.page.we.havent.seen.yet.1", s.page)); 265 } 266 267 p.pageBitmapWidth = page_bitmap_width; 268 p.pageBitmapHeight = page_bitmap_height; 269 } 270 } 271 readHeader()272 JBIG2Segment readHeader() throws IOException { 273 int ptr = ra.getFilePointer(); 274 // 7.2.1 275 int segment_number = ra.readInt(); 276 JBIG2Segment s = new JBIG2Segment(segment_number); 277 278 // 7.2.3 279 int segment_header_flags = ra.read(); 280 boolean deferred_non_retain = (( segment_header_flags & 0x80 ) == 0x80); 281 s.deferredNonRetain = deferred_non_retain; 282 boolean page_association_size = (( segment_header_flags & 0x40 ) == 0x40); 283 int segment_type = ( segment_header_flags & 0x3f ); 284 s.type = segment_type; 285 286 //7.2.4 287 int referred_to_byte0 = ra.read(); 288 int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5; 289 int[] referred_to_segment_numbers = null; 290 boolean[] segment_retention_flags = null; 291 292 if ( count_of_referred_to_segments == 7 ) { 293 // at least five bytes 294 ra.seek(ra.getFilePointer() - 1); 295 count_of_referred_to_segments = ( ra.readInt() & 0x1fffffff ); 296 segment_retention_flags = new boolean[count_of_referred_to_segments+1]; 297 int i = 0; 298 int referred_to_current_byte = 0; 299 do { 300 int j = i % 8; 301 if ( j == 0) { 302 referred_to_current_byte = ra.read(); 303 } 304 segment_retention_flags[i] = (((( 0x1 << j ) & referred_to_current_byte) >> j) == 0x1); 305 i++; 306 } while ( i <= count_of_referred_to_segments ); 307 308 } else if ( count_of_referred_to_segments <= 4 ) { 309 // only one byte 310 segment_retention_flags = new boolean[count_of_referred_to_segments+1]; 311 referred_to_byte0 &= 0x1f; 312 for ( int i = 0; i <= count_of_referred_to_segments; i++ ) { 313 segment_retention_flags[i] = (((( 0x1 << i ) & referred_to_byte0) >> i) == 0x1); 314 } 315 316 } else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) { 317 throw new IllegalStateException(MessageLocalization.getComposedMessage("count.of.referred.to.segments.had.bad.value.in.header.for.segment.1.starting.at.2", String.valueOf(segment_number), String.valueOf(ptr))); 318 } 319 s.segmentRetentionFlags = segment_retention_flags; 320 s.countOfReferredToSegments = count_of_referred_to_segments; 321 322 // 7.2.5 323 referred_to_segment_numbers = new int[count_of_referred_to_segments+1]; 324 for ( int i = 1; i <= count_of_referred_to_segments; i++ ) { 325 if ( segment_number <= 256 ) { 326 referred_to_segment_numbers[i] = ra.read(); 327 } else if ( segment_number <= 65536 ) { 328 referred_to_segment_numbers[i] = ra.readUnsignedShort(); 329 } else { 330 referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack 331 } 332 } 333 s.referredToSegmentNumbers = referred_to_segment_numbers; 334 335 // 7.2.6 336 int segment_page_association; 337 int page_association_offset = ra.getFilePointer() - ptr; 338 if ( page_association_size ) { 339 segment_page_association = ra.readInt(); 340 } else { 341 segment_page_association = ra.read(); 342 } 343 if ( segment_page_association < 0 ) { 344 throw new IllegalStateException(MessageLocalization.getComposedMessage("page.1.invalid.for.segment.2.starting.at.3", String.valueOf(segment_page_association), String.valueOf(segment_number), String.valueOf(ptr))); 345 } 346 s.page = segment_page_association; 347 // so we can change the page association at embedding time. 348 s.page_association_size = page_association_size; 349 s.page_association_offset = page_association_offset; 350 351 if ( segment_page_association > 0 && ! pages.containsKey(new Integer(segment_page_association)) ) { 352 pages.put(new Integer(segment_page_association), new JBIG2Page(segment_page_association, this)); 353 } 354 if ( segment_page_association > 0 ) { 355 ((JBIG2Page)pages.get(new Integer(segment_page_association))).addSegment(s); 356 } else { 357 globals.add(s); 358 } 359 360 // 7.2.7 361 long segment_data_length = ra.readUnsignedInt(); 362 // TODO the 0xffffffff value that might be here, and how to understand those afflicted segments 363 s.dataLength = segment_data_length; 364 365 int end_ptr = ra.getFilePointer(); 366 ra.seek(ptr); 367 byte[] header_data = new byte[end_ptr - ptr]; 368 ra.read(header_data); 369 s.headerData = header_data; 370 371 return s; 372 } 373 readFileHeader()374 void readFileHeader() throws IOException { 375 ra.seek(0); 376 byte[] idstring = new byte[8]; 377 ra.read(idstring); 378 379 byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A}; 380 381 for ( int i = 0; i < idstring.length; i++ ) { 382 if ( idstring[i] != refidstring[i] ) { 383 throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.idstring.not.good.at.byte.1", i)); 384 } 385 } 386 387 int fileheaderflags = ra.read(); 388 389 this.sequential = (( fileheaderflags & 0x1 ) == 0x1); 390 this.number_of_pages_known = (( fileheaderflags & 0x2) == 0x0); 391 392 if ( (fileheaderflags & 0xfc) != 0x0 ) { 393 throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.flags.bits.2.7.not.0")); 394 } 395 396 if ( this.number_of_pages_known ) { 397 this.number_of_pages = ra.readInt(); 398 } 399 } 400 numberOfPages()401 public int numberOfPages() { 402 return pages.size(); 403 } 404 getPageHeight(int i)405 public int getPageHeight(int i) { 406 return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapHeight; 407 } 408 getPageWidth(int i)409 public int getPageWidth(int i) { 410 return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapWidth; 411 } 412 getPage(int page)413 public JBIG2Page getPage(int page) { 414 return (JBIG2Page)pages.get(new Integer(page)); 415 } 416 getGlobal(boolean for_embedding)417 public byte[] getGlobal(boolean for_embedding) { 418 ByteArrayOutputStream os = new ByteArrayOutputStream(); 419 try { 420 for (Iterator gitr = globals.iterator(); gitr.hasNext();) { 421 JBIG2Segment s = (JBIG2Segment)gitr.next(); 422 if ( for_embedding && 423 ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) { 424 continue; 425 } 426 os.write(s.headerData); 427 os.write(s.data); 428 } 429 os.close(); 430 } catch (IOException e) { 431 e.printStackTrace(); 432 } 433 if ( os.size() <= 0 ) { 434 return null; 435 } 436 return os.toByteArray(); 437 } 438 toString()439 public String toString() { 440 if ( this.read ) { 441 return "Jbig2SegmentReader: number of pages: " + this.numberOfPages(); 442 } else { 443 return "Jbig2SegmentReader in indeterminate state."; 444 } 445 } 446 } 447