1 /*
2  * $Id$
3  *
4  * Copyright 2009 by Nigel Kerr.
5  *
6  * The contents of this file are subject to the Mozilla Public License Version 1.1
7  * (the "License"); you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the License.
13  *
14  * The Original Code is 'iText, a free JAVA-PDF library'.
15  *
16  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17  * the Initial Developer are Copyright (C) 1999-2009 by Bruno Lowagie.
18  * All Rights Reserved.
19  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20  * are Copyright (C) 2000-2009 by Paulo Soares. All Rights Reserved.
21  *
22  * Contributor(s): all the names of the contributors are added in the source code
23  * where applicable.
24  *
25  * Alternatively, the contents of this file may be used under the terms of the
26  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
27  * provisions of LGPL are applicable instead of those above.  If you wish to
28  * allow use of your version of this file only under the terms of the LGPL
29  * License and not to allow others to use your version of this file under
30  * the MPL, indicate your decision by deleting the provisions above and
31  * replace them with the notice and other provisions required by the LGPL.
32  * If you do not delete the provisions above, a recipient may use your version
33  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
34  *
35  * This library is free software; you can redistribute it and/or modify it
36  * under the terms of the MPL as stated above or under the terms of the GNU
37  * Library General Public License as published by the Free Software Foundation;
38  * either version 2 of the License, or any later version.
39  *
40  * This library is distributed in the hope that it will be useful, but WITHOUT
41  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
43  * details.
44  *
45  * If you didn't download this code from the following link, you should check if
46  * you aren't using an obsolete version:
47  * http://www.lowagie.com/iText/
48  */
49 
50 package com.lowagie.text.pdf.codec;
51 
52 import java.io.ByteArrayOutputStream;
53 import java.io.IOException;
54 import java.util.Iterator;
55 import java.util.SortedMap;
56 import java.util.SortedSet;
57 import java.util.TreeMap;
58 import java.util.TreeSet;
59 import com.lowagie.text.error_messages.MessageLocalization;
60 
61 import com.lowagie.text.pdf.RandomAccessFileOrArray;
62 
63 /**
64  * Class to read a JBIG2 file at a basic level: understand all the segments,
65  * understand what segments belong to which pages, how many pages there are,
66  * what the width and height of each page is, and global segments if there
67  * are any.  Or: the minimum required to be able to take a normal sequential
68  * or random-access organized file, and be able to embed JBIG2 pages as images
69  * in a PDF.
70  *
71  * TODO: the indeterminate-segment-size value of dataLength, else?
72  *
73  * @since 2.1.5
74  */
75 
76 public class JBIG2SegmentReader {
77 
78 	public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2.
79 
80 	public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3.
81 	public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3.
82 	public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3.
83 	public static final int PATTERN_DICTIONARY = 16; //see 7.4.4.
84 	public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5.
85 	public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5.
86 	public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5.
87 	public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6.
88 	public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6.
89 	public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6.
90 	public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7.
91 	public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7.
92 	public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7.
93 
94 	public static final int PAGE_INFORMATION = 48; //see 7.4.8.
95 	public static final int END_OF_PAGE = 49; //see 7.4.9.
96 	public static final int END_OF_STRIPE = 50; //see 7.4.10.
97 	public static final int END_OF_FILE = 51; //see 7.4.11.
98 	public static final int PROFILES = 52; //see 7.4.12.
99 	public static final int TABLES = 53; //see 7.4.13.
100 	public static final int EXTENSION = 62; //see 7.4.14.
101 
102 	private final SortedMap segments = new TreeMap();
103 	private final SortedMap pages = new TreeMap();
104 	private final SortedSet globals = new TreeSet();
105 	private RandomAccessFileOrArray ra;
106 	private boolean sequential;
107 	private boolean number_of_pages_known;
108 	private int number_of_pages = -1;
109 	private boolean read = false;
110 
111 	/**
112 	 * Inner class that holds information about a JBIG2 segment.
113 	 * @since	2.1.5
114 	 */
115 	public static class JBIG2Segment implements Comparable {
116 
117 		public final int segmentNumber;
118 		public long dataLength = -1;
119 		public int page = -1;
120 		public int[] referredToSegmentNumbers = null;
121 		public boolean[] segmentRetentionFlags = null;
122 		public int type = -1;
123 		public boolean deferredNonRetain = false;
124 		public int countOfReferredToSegments = -1;
125 		public byte[] data = null;
126 		public byte[] headerData = null;
127 		public boolean page_association_size = false;
128 		public int page_association_offset = -1;
129 
JBIG2Segment(int segment_number)130 		public JBIG2Segment(int segment_number) {
131 			this.segmentNumber = segment_number;
132 		}
133 
134 		// for the globals treeset
compareTo(Object o)135 		public int compareTo(Object o) {
136 			return this.compareTo((JBIG2Segment)o);
137 		}
compareTo(JBIG2Segment s)138 		public int compareTo(JBIG2Segment s) {
139 			return this.segmentNumber - s.segmentNumber;
140 		}
141 
142 
143 	}
144 	/**
145 	 * Inner class that holds information about a JBIG2 page.
146 	 * @since	2.1.5
147 	 */
148 	public static class JBIG2Page {
149 		public final int page;
150 		private final JBIG2SegmentReader sr;
151 		private final SortedMap segs = new TreeMap();
152 		public int pageBitmapWidth = -1;
153 		public int pageBitmapHeight = -1;
JBIG2Page(int page, JBIG2SegmentReader sr)154 		public JBIG2Page(int page, JBIG2SegmentReader sr) {
155 			this.page = page;
156 			this.sr = sr;
157 		}
158 		/**
159 		 * return as a single byte array the header-data for each segment in segment number
160 		 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization.
161 		 * if for_embedding, skip the segment types that are known to be not for acrobat.
162 		 * @param for_embedding
163 		 * @return	a byte array
164 		 * @throws IOException
165 		 */
getData(boolean for_embedding)166 		public byte[] getData(boolean for_embedding) throws IOException {
167 			ByteArrayOutputStream os = new ByteArrayOutputStream();
168 			for (Iterator i = segs.keySet().iterator(); i.hasNext();  ) {
169 				Integer sn = (Integer) i.next();
170 				JBIG2Segment s = (JBIG2Segment) segs.get(sn);
171 
172 				// pdf reference 1.4, section 3.3.6 JBIG2Decode Filter
173 				// D.3 Embedded organisation
174 				if ( for_embedding &&
175 						( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
176 					continue;
177 				}
178 
179 				if ( for_embedding ) {
180 					// change the page association to page 1
181 					byte[] headerData_emb = copyByteArray(s.headerData);
182 					if ( s.page_association_size ) {
183 						headerData_emb[s.page_association_offset] = 0x0;
184 						headerData_emb[s.page_association_offset+1] = 0x0;
185 						headerData_emb[s.page_association_offset+2] = 0x0;
186 						headerData_emb[s.page_association_offset+3] = 0x1;
187 					} else {
188 						headerData_emb[s.page_association_offset] = 0x1;
189 					}
190 					os.write(headerData_emb);
191 				} else {
192 					os.write(s.headerData);
193 				}
194 				os.write(s.data);
195 			}
196 			os.close();
197 			return os.toByteArray();
198 		}
addSegment(JBIG2Segment s)199 		public void addSegment(JBIG2Segment s) {
200 			segs.put(new Integer(s.segmentNumber), s);
201 		}
202 
203 	}
204 
JBIG2SegmentReader(RandomAccessFileOrArray ra )205 	public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException {
206 		this.ra = ra;
207 	}
208 
copyByteArray(byte[] b)209 	public static byte[] copyByteArray(byte[] b) {
210 		byte[] bc = new byte[b.length];
211 		System.arraycopy(b, 0, bc, 0, b.length);
212 		return bc;
213 	}
214 
read()215 	public void read() throws IOException {
216 		if ( this.read ) {
217 			throw new IllegalStateException(MessageLocalization.getComposedMessage("already.attempted.a.read.on.this.jbig2.file"));
218 		}
219 		this.read = true;
220 
221 		readFileHeader();
222 		// Annex D
223 		if ( this.sequential ) {
224 			// D.1
225 			do {
226 				JBIG2Segment tmp = readHeader();
227 				readSegment(tmp);
228 				segments.put(new Integer(tmp.segmentNumber), tmp);
229 			} while ( this.ra.getFilePointer() < this.ra.length() );
230 		} else {
231 			// D.2
232 			JBIG2Segment tmp;
233 			do {
234 				tmp = readHeader();
235 				segments.put(new Integer(tmp.segmentNumber), tmp);
236 			} while ( tmp.type != END_OF_FILE );
237 			Iterator segs = segments.keySet().iterator();
238 			while ( segs.hasNext() ) {
239 				readSegment((JBIG2Segment)segments.get(segs.next()));
240 			}
241 		}
242 	}
243 
readSegment(JBIG2Segment s)244 	void readSegment(JBIG2Segment s) throws IOException {
245 		int ptr = ra.getFilePointer();
246 
247 		if ( s.dataLength == 0xffffffffl ) {
248 			// TODO figure this bit out, 7.2.7
249 			return;
250 		}
251 
252 		byte[] data = new byte[(int)s.dataLength];
253 		ra.read(data);
254 		s.data = data;
255 
256 		if ( s.type == PAGE_INFORMATION ) {
257 			int last = ra.getFilePointer();
258 			ra.seek(ptr);
259 			int page_bitmap_width = ra.readInt();
260 			int page_bitmap_height = ra.readInt();
261 			ra.seek(last);
262 			JBIG2Page p = (JBIG2Page)pages.get(new Integer(s.page));
263 			if ( p == null ) {
264 				throw new IllegalStateException(MessageLocalization.getComposedMessage("referring.to.widht.height.of.page.we.havent.seen.yet.1", s.page));
265 			}
266 
267 			p.pageBitmapWidth = page_bitmap_width;
268 			p.pageBitmapHeight = page_bitmap_height;
269 		}
270 	}
271 
readHeader()272 	JBIG2Segment readHeader() throws IOException {
273 		int ptr = ra.getFilePointer();
274 		// 7.2.1
275 		int segment_number = ra.readInt();
276 		JBIG2Segment s = new JBIG2Segment(segment_number);
277 
278 		// 7.2.3
279 		int segment_header_flags = ra.read();
280 		boolean deferred_non_retain = (( segment_header_flags & 0x80 ) == 0x80);
281 		s.deferredNonRetain = deferred_non_retain;
282 		boolean page_association_size = (( segment_header_flags & 0x40 ) == 0x40);
283 		int segment_type = ( segment_header_flags & 0x3f );
284 		s.type = segment_type;
285 
286 		//7.2.4
287 		int referred_to_byte0 = ra.read();
288 		int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5;
289 		int[] referred_to_segment_numbers = null;
290 		boolean[] segment_retention_flags = null;
291 
292 		if ( count_of_referred_to_segments == 7 ) {
293 			// at least five bytes
294 			ra.seek(ra.getFilePointer() - 1);
295 			count_of_referred_to_segments = ( ra.readInt() & 0x1fffffff );
296 			segment_retention_flags = new boolean[count_of_referred_to_segments+1];
297 			int i = 0;
298 			int referred_to_current_byte = 0;
299 			do {
300 				int j = i % 8;
301 				if ( j == 0) {
302 					referred_to_current_byte = ra.read();
303 				}
304 				segment_retention_flags[i] = (((( 0x1 << j ) & referred_to_current_byte) >> j) == 0x1);
305 				i++;
306 			} while ( i <= count_of_referred_to_segments );
307 
308 		} else if ( count_of_referred_to_segments <= 4 ) {
309 			// only one byte
310 			segment_retention_flags = new boolean[count_of_referred_to_segments+1];
311 			referred_to_byte0 &= 0x1f;
312 			for ( int i = 0; i <= count_of_referred_to_segments; i++ ) {
313 				segment_retention_flags[i] = (((( 0x1 << i ) & referred_to_byte0) >> i) == 0x1);
314 			}
315 
316 		} else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) {
317 			throw new IllegalStateException(MessageLocalization.getComposedMessage("count.of.referred.to.segments.had.bad.value.in.header.for.segment.1.starting.at.2", String.valueOf(segment_number), String.valueOf(ptr)));
318 		}
319 		s.segmentRetentionFlags = segment_retention_flags;
320 		s.countOfReferredToSegments = count_of_referred_to_segments;
321 
322 		// 7.2.5
323 		referred_to_segment_numbers = new int[count_of_referred_to_segments+1];
324 		for ( int i = 1; i <= count_of_referred_to_segments; i++ ) {
325 			if ( segment_number <= 256 ) {
326 				referred_to_segment_numbers[i] = ra.read();
327 			} else if ( segment_number <= 65536 ) {
328 				referred_to_segment_numbers[i] = ra.readUnsignedShort();
329 			} else {
330 				referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack
331 			}
332 		}
333 		s.referredToSegmentNumbers = referred_to_segment_numbers;
334 
335 		// 7.2.6
336 		int segment_page_association;
337 		int page_association_offset = ra.getFilePointer() - ptr;
338 		if ( page_association_size ) {
339 			segment_page_association = ra.readInt();
340 		} else {
341 			segment_page_association = ra.read();
342 		}
343 		if ( segment_page_association < 0 ) {
344 			throw new IllegalStateException(MessageLocalization.getComposedMessage("page.1.invalid.for.segment.2.starting.at.3", String.valueOf(segment_page_association), String.valueOf(segment_number), String.valueOf(ptr)));
345 		}
346 		s.page = segment_page_association;
347 		// so we can change the page association at embedding time.
348 		s.page_association_size = page_association_size;
349 		s.page_association_offset = page_association_offset;
350 
351 		if ( segment_page_association > 0 && ! pages.containsKey(new Integer(segment_page_association)) ) {
352 			pages.put(new Integer(segment_page_association), new JBIG2Page(segment_page_association, this));
353 		}
354 		if ( segment_page_association > 0 ) {
355 			((JBIG2Page)pages.get(new Integer(segment_page_association))).addSegment(s);
356 		} else {
357 			globals.add(s);
358 		}
359 
360 		// 7.2.7
361 		long segment_data_length = ra.readUnsignedInt();
362 		// TODO the 0xffffffff value that might be here, and how to understand those afflicted segments
363 		s.dataLength = segment_data_length;
364 
365 		int end_ptr = ra.getFilePointer();
366 		ra.seek(ptr);
367 		byte[] header_data = new byte[end_ptr - ptr];
368 		ra.read(header_data);
369 		s.headerData  = header_data;
370 
371 		return s;
372 	}
373 
readFileHeader()374 	void readFileHeader() throws IOException {
375 		ra.seek(0);
376 		byte[] idstring = new byte[8];
377 		ra.read(idstring);
378 
379 		byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A};
380 
381 		for ( int i = 0; i < idstring.length; i++ ) {
382 			if ( idstring[i] != refidstring[i] ) {
383 				throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.idstring.not.good.at.byte.1", i));
384 			}
385 		}
386 
387 		int fileheaderflags = ra.read();
388 
389 		this.sequential = (( fileheaderflags & 0x1 ) == 0x1);
390 		this.number_of_pages_known = (( fileheaderflags & 0x2) == 0x0);
391 
392 		if ( (fileheaderflags & 0xfc) != 0x0 ) {
393 			throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.flags.bits.2.7.not.0"));
394 		}
395 
396 		if ( this.number_of_pages_known ) {
397 			this.number_of_pages = ra.readInt();
398 		}
399 	}
400 
numberOfPages()401 	public int numberOfPages() {
402 		return pages.size();
403 	}
404 
getPageHeight(int i)405 	public int getPageHeight(int i) {
406 		return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapHeight;
407 	}
408 
getPageWidth(int i)409 	public int getPageWidth(int i) {
410 		return ((JBIG2Page)pages.get(new Integer(i))).pageBitmapWidth;
411 	}
412 
getPage(int page)413 	public JBIG2Page getPage(int page) {
414 		return (JBIG2Page)pages.get(new Integer(page));
415 	}
416 
getGlobal(boolean for_embedding)417 	public byte[] getGlobal(boolean for_embedding) {
418 		ByteArrayOutputStream os = new ByteArrayOutputStream();
419 		try {
420 			for (Iterator gitr = globals.iterator(); gitr.hasNext();) {
421 				JBIG2Segment s = (JBIG2Segment)gitr.next();
422 				if ( for_embedding &&
423 						( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
424 					continue;
425 				}
426 				os.write(s.headerData);
427 				os.write(s.data);
428 			}
429 			os.close();
430 		} catch (IOException e) {
431 			e.printStackTrace();
432 		}
433 		if ( os.size() <= 0 ) {
434 			return null;
435 		}
436 		return os.toByteArray();
437 	}
438 
toString()439 	public String toString() {
440 		if ( this.read ) {
441 			return "Jbig2SegmentReader: number of pages: " + this.numberOfPages();
442 		} else {
443 			return "Jbig2SegmentReader in indeterminate state.";
444 		}
445 	}
446 }
447