1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <string.h>
5 
6 int
pdf_obj_num_is_stream(fz_context * ctx,pdf_document * doc,int num)7 pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num)
8 {
9 	pdf_xref_entry *entry;
10 
11 	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
12 		return 0;
13 
14 	fz_try(ctx)
15 		entry = pdf_cache_object(ctx, doc, num);
16 	fz_catch(ctx)
17 	{
18 		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
19 		return 0;
20 	}
21 
22 	return entry->stm_ofs != 0 || entry->stm_buf;
23 }
24 
25 int
pdf_is_stream(fz_context * ctx,pdf_obj * ref)26 pdf_is_stream(fz_context *ctx, pdf_obj *ref)
27 {
28 	pdf_document *doc = pdf_get_indirect_document(ctx, ref);
29 	if (doc)
30 		return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref));
31 	return 0;
32 }
33 
34 /*
35  * Scan stream dictionary for an explicit /Crypt filter
36  */
37 static int
pdf_stream_has_crypt(fz_context * ctx,pdf_obj * stm)38 pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm)
39 {
40 	pdf_obj *filters;
41 	pdf_obj *obj;
42 	int i;
43 
44 	filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F));
45 	if (filters)
46 	{
47 		if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt)))
48 			return 1;
49 		if (pdf_is_array(ctx, filters))
50 		{
51 			int n = pdf_array_len(ctx, filters);
52 			for (i = 0; i < n; i++)
53 			{
54 				obj = pdf_array_get(ctx, filters, i);
55 				if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt)))
56 					return 1;
57 			}
58 		}
59 	}
60 	return 0;
61 }
62 
63 static fz_jbig2_globals *
pdf_load_jbig2_globals(fz_context * ctx,pdf_obj * dict)64 pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict)
65 {
66 	fz_jbig2_globals *globals;
67 	fz_buffer *buf = NULL;
68 
69 	fz_var(buf);
70 
71 	if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL)
72 		return globals;
73 
74 	if (pdf_mark_obj(ctx, dict))
75 		fz_throw(ctx, FZ_ERROR_GENERIC, "cyclic reference when loading JBIG2 globals");
76 
77 	fz_try(ctx)
78 	{
79 		buf = pdf_load_stream(ctx, dict);
80 		globals = fz_load_jbig2_globals(ctx, buf);
81 		pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL));
82 	}
83 	fz_always(ctx)
84 	{
85 		fz_drop_buffer(ctx, buf);
86 		pdf_unmark_obj(ctx, dict);
87 	}
88 	fz_catch(ctx)
89 	{
90 		fz_rethrow(ctx);
91 	}
92 
93 	return globals;
94 }
95 
96 static void
build_compression_params(fz_context * ctx,pdf_obj * f,pdf_obj * p,fz_compression_params * params)97 build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params)
98 {
99 	int predictor = pdf_dict_get_int(ctx, p, PDF_NAME(Predictor));
100 	pdf_obj *columns_obj = pdf_dict_get(ctx, p, PDF_NAME(Columns));
101 	int columns = pdf_to_int(ctx, columns_obj);
102 	int colors = pdf_dict_get_int(ctx, p, PDF_NAME(Colors));
103 	int bpc = pdf_dict_get_int(ctx, p, PDF_NAME(BitsPerComponent));
104 
105 	params->type = FZ_IMAGE_RAW;
106 
107 	if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF)))
108 	{
109 		pdf_obj *k = pdf_dict_get(ctx, p, PDF_NAME(K));
110 		pdf_obj *eol = pdf_dict_get(ctx, p, PDF_NAME(EndOfLine));
111 		pdf_obj *eba = pdf_dict_get(ctx, p, PDF_NAME(EncodedByteAlign));
112 		pdf_obj *rows = pdf_dict_get(ctx, p, PDF_NAME(Rows));
113 		pdf_obj *eob = pdf_dict_get(ctx, p, PDF_NAME(EndOfBlock));
114 		pdf_obj *bi1 = pdf_dict_get(ctx, p, PDF_NAME(BlackIs1));
115 
116 		params->type = FZ_IMAGE_FAX;
117 		params->u.fax.k = (k ? pdf_to_int(ctx, k) : 0);
118 		params->u.fax.end_of_line = (eol ? pdf_to_bool(ctx, eol) : 0);
119 		params->u.fax.encoded_byte_align = (eba ? pdf_to_bool(ctx, eba) : 0);
120 		params->u.fax.columns = (columns_obj ? columns : 1728);
121 		params->u.fax.rows = (rows ? pdf_to_int(ctx, rows) : 0);
122 		params->u.fax.end_of_block = (eob ? pdf_to_bool(ctx, eob) : 1);
123 		params->u.fax.black_is_1 = (bi1 ? pdf_to_bool(ctx, bi1) : 0);
124 	}
125 	else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT)))
126 	{
127 		pdf_obj *ct = pdf_dict_get(ctx, p, PDF_NAME(ColorTransform));
128 
129 		params->type = FZ_IMAGE_JPEG;
130 		params->u.jpeg.color_transform = (ct ? pdf_to_int(ctx, ct) : -1);
131 	}
132 	else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL)))
133 	{
134 		params->type = FZ_IMAGE_RLD;
135 	}
136 	else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl)))
137 	{
138 		params->type = FZ_IMAGE_FLATE;
139 		params->u.flate.predictor = predictor;
140 		params->u.flate.columns = columns;
141 		params->u.flate.colors = colors;
142 		params->u.flate.bpc = bpc;
143 	}
144 	else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW)))
145 	{
146 		pdf_obj *ec = pdf_dict_get(ctx, p, PDF_NAME(EarlyChange));
147 
148 		params->type = FZ_IMAGE_LZW;
149 		params->u.lzw.predictor = predictor;
150 		params->u.lzw.columns = columns;
151 		params->u.lzw.colors = colors;
152 		params->u.lzw.bpc = bpc;
153 		params->u.lzw.early_change = (ec ? pdf_to_int(ctx, ec) : 1);
154 	}
155 	else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)))
156 	{
157 		pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals));
158 
159 		params->type = FZ_IMAGE_JBIG2;
160 		params->u.jbig2.globals = NULL;
161 		if (g)
162 		{
163 			if (!pdf_is_stream(ctx, g))
164 				fz_warn(ctx, "jbig2 globals is not a stream, skipping globals");
165 			else
166 				params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g);
167 		}
168 	}
169 }
170 
171 /*
172  * Create a filter given a name and param dictionary.
173  */
174 static fz_stream *
build_filter(fz_context * ctx,fz_stream * chain,pdf_document * doc,pdf_obj * f,pdf_obj * p,int num,int gen,fz_compression_params * params)175 build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params)
176 {
177 	fz_compression_params local_params;
178 
179 	local_params.u.jbig2.globals = NULL;
180 	if (params == NULL)
181 		params = &local_params;
182 
183 	build_compression_params(ctx, f, p, params);
184 
185 	/* If we were using params we were passed in, and we successfully
186 	 * recognised the image type, we can use the existing filter and
187 	 * shortstop here. */
188 	if (params != &local_params && params->type != FZ_IMAGE_RAW)
189 		return fz_keep_stream(ctx, chain); /* nothing to do */
190 
191 	else if (params->type == FZ_IMAGE_JBIG2)
192 	{
193 		fz_stream *stm;
194 		fz_try(ctx)
195 			stm = fz_open_image_decomp_stream(ctx, chain, params, NULL);
196 		fz_always(ctx)
197 			fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals);
198 		fz_catch(ctx)
199 			fz_rethrow(ctx);
200 		return stm;
201 	}
202 
203 	else if (params->type != FZ_IMAGE_RAW)
204 		return fz_open_image_decomp_stream(ctx, chain, params, NULL);
205 
206 	else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx)))
207 		return fz_open_ahxd(ctx, chain);
208 
209 	else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85)))
210 		return fz_open_a85d(ctx, chain);
211 
212 	else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))
213 		return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */
214 
215 	else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt)))
216 	{
217 		if (!doc->crypt)
218 			fz_warn(ctx, "crypt filter in unencrypted document");
219 		else
220 		{
221 			pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name));
222 			if (pdf_is_name(ctx, name))
223 				return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen);
224 		}
225 	}
226 
227 	else
228 		fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f));
229 
230 	return fz_keep_stream(ctx, chain);
231 }
232 
233 /* Build filter, and assume ownership of chain */
234 static fz_stream *
build_filter_drop(fz_context * ctx,fz_stream * tail,pdf_document * doc,pdf_obj * f,pdf_obj * p,int num,int gen,fz_compression_params * params)235 build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params)
236 {
237 	fz_stream *head;
238 	fz_try(ctx)
239 		head = build_filter(ctx, tail, doc, f, p, num, gen, params);
240 	fz_always(ctx)
241 		fz_drop_stream(ctx, tail);
242 	fz_catch(ctx)
243 		fz_rethrow(ctx);
244 	return head;
245 }
246 
247 /*
248  * Build a chain of filters given filter names and param dicts.
249  * If chain is given, start filter chain with it.
250  * Assume ownership of chain.
251  */
252 static fz_stream *
build_filter_chain_drop(fz_context * ctx,fz_stream * chain,pdf_document * doc,pdf_obj * fs,pdf_obj * ps,int num,int gen,fz_compression_params * params)253 build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params)
254 {
255 	fz_var(chain);
256 	fz_try(ctx)
257 	{
258 		int i, n = pdf_array_len(ctx, fs);
259 		for (i = 0; i < n; i++)
260 		{
261 			pdf_obj *f = pdf_array_get(ctx, fs, i);
262 			pdf_obj *p = pdf_array_get(ctx, ps, i);
263 			chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL));
264 		}
265 	}
266 	fz_catch(ctx)
267 		fz_rethrow(ctx);
268 	return chain;
269 }
270 
271 static fz_stream *
build_filter_chain(fz_context * ctx,fz_stream * chain,pdf_document * doc,pdf_obj * fs,pdf_obj * ps,int num,int gen,fz_compression_params * params)272 build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params)
273 {
274 	return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params);
275 }
276 
277 /*
278  * Build a filter for reading raw stream data.
279  * This is a null filter to constrain reading to the stream length (and to
280  * allow for other people accessing the file), followed by a decryption
281  * filter.
282  *
283  * orig_num and orig_gen are used purely to seed the encryption.
284  */
285 static fz_stream *
pdf_open_raw_filter(fz_context * ctx,fz_stream * file_stm,pdf_document * doc,pdf_obj * stmobj,int num,int * orig_num,int * orig_gen,int64_t offset)286 pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset)
287 {
288 	pdf_xref_entry *x = NULL;
289 	fz_stream *null_stm, *crypt_stm;
290 	int hascrypt;
291 	int len;
292 
293 	if (num > 0 && num < pdf_xref_len(ctx, doc))
294 	{
295 		x = pdf_get_xref_entry(ctx, doc, num);
296 		*orig_num = x->num;
297 		*orig_gen = x->gen;
298 		if (x->stm_buf)
299 			return fz_open_buffer(ctx, x->stm_buf);
300 	}
301 	else
302 	{
303 		/* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */
304 		/* New style XRef sections must have generation number 0. */
305 		*orig_num = num;
306 		*orig_gen = 0;
307 	}
308 
309 	hascrypt = pdf_stream_has_crypt(ctx, stmobj);
310 	len = pdf_dict_get_int(ctx, stmobj, PDF_NAME(Length));
311 	null_stm = fz_open_endstream_filter(ctx, file_stm, len, offset);
312 	if (doc->crypt && !hascrypt)
313 	{
314 		fz_try(ctx)
315 			crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen);
316 		fz_always(ctx)
317 			fz_drop_stream(ctx, null_stm);
318 		fz_catch(ctx)
319 			fz_rethrow(ctx);
320 		return crypt_stm;
321 	}
322 	return null_stm;
323 }
324 
325 /*
326  * Construct a filter to decode a stream, constraining
327  * to stream length and decrypting.
328  */
329 static fz_stream *
pdf_open_filter(fz_context * ctx,pdf_document * doc,fz_stream * file_stm,pdf_obj * stmobj,int num,int64_t offset,fz_compression_params * imparams)330 pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams)
331 {
332 	pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
333 	pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
334 	int orig_num, orig_gen;
335 	fz_stream *rstm, *fstm;
336 
337 	rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset);
338 	fz_try(ctx)
339 	{
340 		if (pdf_is_name(ctx, filters))
341 			fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams);
342 		else if (pdf_array_len(ctx, filters) > 0)
343 			fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams);
344 		else
345 		{
346 			if (imparams)
347 				imparams->type = FZ_IMAGE_RAW;
348 			fstm = fz_keep_stream(ctx, rstm);
349 		}
350 	}
351 	fz_always(ctx)
352 		fz_drop_stream(ctx, rstm);
353 	fz_catch(ctx)
354 		fz_rethrow(ctx);
355 
356 	return fstm;
357 }
358 
359 fz_stream *
pdf_open_inline_stream(fz_context * ctx,pdf_document * doc,pdf_obj * stmobj,int length,fz_stream * file_stm,fz_compression_params * imparams)360 pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams)
361 {
362 	pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
363 	pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
364 
365 	if (pdf_is_name(ctx, filters))
366 		return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams);
367 	else if (pdf_array_len(ctx, filters) > 0)
368 		return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams);
369 
370 	if (imparams)
371 		imparams->type = FZ_IMAGE_RAW;
372 	return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm));
373 }
374 
375 void
pdf_load_compressed_inline_image(fz_context * ctx,pdf_document * doc,pdf_obj * dict,int length,fz_stream * file_stm,int indexed,fz_compressed_image * image)376 pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image)
377 {
378 	fz_stream *istm = NULL, *leech = NULL, *decomp = NULL;
379 	fz_pixmap *pixmap = NULL;
380 	fz_compressed_buffer *bc;
381 	int dummy_l2factor = 0;
382 
383 	fz_var(istm);
384 	fz_var(leech);
385 	fz_var(decomp);
386 	fz_var(pixmap);
387 
388 	bc = fz_malloc_struct(ctx, fz_compressed_buffer);
389 	fz_try(ctx)
390 	{
391 		bc->buffer = fz_new_buffer(ctx, 1024);
392 		istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params);
393 		leech = fz_open_leecher(ctx, istm, bc->buffer);
394 		decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor);
395 		pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0);
396 		fz_set_compressed_image_buffer(ctx, image, bc);
397 	}
398 	fz_always(ctx)
399 	{
400 		fz_drop_stream(ctx, istm);
401 		fz_drop_stream(ctx, leech);
402 		fz_drop_stream(ctx, decomp);
403 		fz_drop_pixmap(ctx, pixmap);
404 	}
405 	fz_catch(ctx)
406 	{
407 		fz_drop_compressed_buffer(ctx, bc);
408 		fz_rethrow(ctx);
409 	}
410 }
411 
412 fz_stream *
pdf_open_raw_stream_number(fz_context * ctx,pdf_document * doc,int num)413 pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
414 {
415 	pdf_xref_entry *x;
416 	int orig_num, orig_gen;
417 
418 	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
419 		fz_throw(ctx, FZ_ERROR_GENERIC, "object id out of range (%d 0 R)", num);
420 
421 	x = pdf_cache_object(ctx, doc, num);
422 	if (x->stm_ofs == 0)
423 		fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
424 
425 	return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs);
426 }
427 
428 static fz_stream *
pdf_open_image_stream(fz_context * ctx,pdf_document * doc,int num,fz_compression_params * params)429 pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params)
430 {
431 	pdf_xref_entry *x;
432 
433 	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
434 		fz_throw(ctx, FZ_ERROR_GENERIC, "object id out of range (%d 0 R)", num);
435 
436 	x = pdf_cache_object(ctx, doc, num);
437 	if (x->stm_ofs == 0 && x->stm_buf == NULL)
438 		fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
439 
440 	return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params);
441 }
442 
443 fz_stream *
pdf_open_stream_number(fz_context * ctx,pdf_document * doc,int num)444 pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num)
445 {
446 	return pdf_open_image_stream(ctx, doc, num, NULL);
447 }
448 
449 fz_stream *
pdf_open_stream_with_offset(fz_context * ctx,pdf_document * doc,int num,pdf_obj * dict,int64_t stm_ofs)450 pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs)
451 {
452 	if (stm_ofs == 0)
453 		fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
454 	return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL);
455 }
456 
457 fz_buffer *
pdf_load_raw_stream_number(fz_context * ctx,pdf_document * doc,int num)458 pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
459 {
460 	fz_stream *stm;
461 	pdf_obj *dict;
462 	int len;
463 	fz_buffer *buf = NULL;
464 	pdf_xref_entry *x;
465 
466 	if (num > 0 && num < pdf_xref_len(ctx, doc))
467 	{
468 		x = pdf_get_xref_entry(ctx, doc, num);
469 		if (x->stm_buf)
470 			return fz_keep_buffer(ctx, x->stm_buf);
471 	}
472 
473 	dict = pdf_load_object(ctx, doc, num);
474 
475 	fz_try(ctx)
476 		len = pdf_dict_get_int(ctx, dict, PDF_NAME(Length));
477 	fz_always(ctx)
478 		pdf_drop_obj(ctx, dict);
479 	fz_catch(ctx)
480 		fz_rethrow(ctx);
481 
482 	stm = pdf_open_raw_stream_number(ctx, doc, num);
483 
484 	fz_try(ctx)
485 		buf = fz_read_all(ctx, stm, len);
486 	fz_always(ctx)
487 		fz_drop_stream(ctx, stm);
488 	fz_catch(ctx)
489 		fz_rethrow(ctx);
490 
491 	return buf;
492 }
493 
494 static int
pdf_guess_filter_length(int len,const char * filter)495 pdf_guess_filter_length(int len, const char *filter)
496 {
497 	if (!strcmp(filter, "ASCIIHexDecode"))
498 		return len / 2;
499 	if (!strcmp(filter, "ASCII85Decode"))
500 		return len * 4 / 5;
501 	if (!strcmp(filter, "FlateDecode"))
502 		return len * 3;
503 	if (!strcmp(filter, "RunLengthDecode"))
504 		return len * 3;
505 	if (!strcmp(filter, "LZWDecode"))
506 		return len * 2;
507 	return len;
508 }
509 
510 /* Check if an entry has a cached stream and return whether it is directly
511  * reusable. A buffer is directly reusable only if the stream is
512  * uncompressed, or if it is compressed purely a compression method we can
513  * return details of in fz_compression_params.
514  *
515  * If the stream is reusable return 1, and set params as required, otherwise
516  * return 0. */
517 static int
can_reuse_buffer(fz_context * ctx,pdf_xref_entry * entry,fz_compression_params * params)518 can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params)
519 {
520 	pdf_obj *f;
521 	pdf_obj *p;
522 
523 	if (!entry || !entry->obj || !entry->stm_buf)
524 		return 0;
525 
526 	if (params)
527 		params->type = FZ_IMAGE_RAW;
528 
529 	f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F));
530 	/* If there are no filters, it's uncompressed, and we can use it */
531 	if (!f)
532 		return 1;
533 
534 	p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP));
535 	if (pdf_is_array(ctx, f))
536 	{
537 		int len = pdf_array_len(ctx, f);
538 
539 		/* Empty array of filters. Its uncompressed. We can cope. */
540 		if (len == 0)
541 			return 1;
542 		/* 1 filter is the most we can hope to cope with - if more,*/
543 		if (len != 1)
544 			return 0;
545 		p = pdf_array_get(ctx, p, 0);
546 	}
547 	if (pdf_is_null(ctx, f))
548 		return 1; /* Null filter is uncompressed */
549 	if (!pdf_is_name(ctx, f))
550 		return 0;
551 
552 	/* There are filters, so unless we have the option of shortstopping,
553 	 * we can't use the existing buffer. */
554 	if (!params)
555 		return 0;
556 
557 	build_compression_params(ctx, f, p, params);
558 
559 	return (params->type == FZ_IMAGE_RAW) ? 0 : 1;
560 }
561 
562 static fz_buffer *
pdf_load_image_stream(fz_context * ctx,pdf_document * doc,int num,fz_compression_params * params,int * truncated)563 pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated)
564 {
565 	fz_stream *stm = NULL;
566 	pdf_obj *dict, *obj;
567 	int i, len, n;
568 	fz_buffer *buf;
569 
570 	fz_var(buf);
571 
572 	if (num > 0 && num < pdf_xref_len(ctx, doc))
573 	{
574 		pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
575 		/* Return ref to existing buffer, but only if uncompressed,
576 		 * or shortstoppable */
577 		if (can_reuse_buffer(ctx, entry, params))
578 			return fz_keep_buffer(ctx, entry->stm_buf);
579 	}
580 
581 	dict = pdf_load_object(ctx, doc, num);
582 	fz_try(ctx)
583 	{
584 		len = pdf_dict_get_int(ctx, dict, PDF_NAME(Length));
585 		obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
586 		len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj));
587 		n = pdf_array_len(ctx, obj);
588 		for (i = 0; i < n; i++)
589 			len = pdf_guess_filter_length(len, pdf_to_name(ctx, pdf_array_get(ctx, obj, i)));
590 	}
591 	fz_always(ctx)
592 	{
593 		pdf_drop_obj(ctx, dict);
594 	}
595 	fz_catch(ctx)
596 	{
597 		fz_rethrow(ctx);
598 	}
599 
600 	stm = pdf_open_image_stream(ctx, doc, num, params);
601 
602 	fz_try(ctx)
603 	{
604 		if (truncated)
605 			buf = fz_read_best(ctx, stm, len, truncated);
606 		else
607 			buf = fz_read_all(ctx, stm, len);
608 	}
609 	fz_always(ctx)
610 	{
611 		fz_drop_stream(ctx, stm);
612 	}
613 	fz_catch(ctx)
614 	{
615 		fz_rethrow(ctx);
616 	}
617 
618 	return buf;
619 }
620 
621 fz_buffer *
pdf_load_stream_number(fz_context * ctx,pdf_document * doc,int num)622 pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num)
623 {
624 	return pdf_load_image_stream(ctx, doc, num, NULL, NULL);
625 }
626 
627 fz_compressed_buffer *
pdf_load_compressed_stream(fz_context * ctx,pdf_document * doc,int num)628 pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num)
629 {
630 	fz_compressed_buffer *bc = fz_malloc_struct(ctx, fz_compressed_buffer);
631 
632 	fz_try(ctx)
633 	{
634 		bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL);
635 	}
636 	fz_catch(ctx)
637 	{
638 		fz_free(ctx, bc);
639 		fz_rethrow(ctx);
640 	}
641 	return bc;
642 }
643 
644 static fz_stream *
pdf_open_object_array(fz_context * ctx,pdf_document * doc,pdf_obj * list)645 pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list)
646 {
647 	fz_stream *stm;
648 	int i, n;
649 
650 	n = pdf_array_len(ctx, list);
651 	stm = fz_open_concat(ctx, n, 1);
652 
653 	for (i = 0; i < n; i++)
654 	{
655 		pdf_obj *obj = pdf_array_get(ctx, list, i);
656 		fz_try(ctx)
657 			fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj));
658 		fz_catch(ctx)
659 		{
660 			if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
661 			{
662 				fz_drop_stream(ctx, stm);
663 				fz_rethrow(ctx);
664 			}
665 			fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
666 		}
667 	}
668 
669 	return stm;
670 }
671 
672 fz_stream *
pdf_open_contents_stream(fz_context * ctx,pdf_document * doc,pdf_obj * obj)673 pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
674 {
675 	int num;
676 
677 	if (pdf_is_array(ctx, obj))
678 		return pdf_open_object_array(ctx, doc, obj);
679 
680 	num = pdf_to_num(ctx, obj);
681 	if (pdf_is_stream(ctx, obj))
682 		return pdf_open_image_stream(ctx, doc, num, NULL);
683 
684 	fz_warn(ctx, "content stream is not a stream (%d 0 R)", num);
685 	return fz_open_memory(ctx, (unsigned char *)"", 0);
686 }
687 
pdf_load_raw_stream(fz_context * ctx,pdf_obj * ref)688 fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref)
689 {
690 	if (pdf_is_stream(ctx, ref))
691 		return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
692 	fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
693 }
694 
pdf_load_stream(fz_context * ctx,pdf_obj * ref)695 fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref)
696 {
697 	if (pdf_is_stream(ctx, ref))
698 		return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
699 	fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
700 }
701 
pdf_open_raw_stream(fz_context * ctx,pdf_obj * ref)702 fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref)
703 {
704 	if (pdf_is_stream(ctx, ref))
705 		return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
706 	fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
707 }
708 
pdf_open_stream(fz_context * ctx,pdf_obj * ref)709 fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref)
710 {
711 	if (pdf_is_stream(ctx, ref))
712 		return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
713 	fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
714 }
715