1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3
4 #include <string.h>
5
6 int
pdf_obj_num_is_stream(fz_context * ctx,pdf_document * doc,int num)7 pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num)
8 {
9 pdf_xref_entry *entry;
10
11 if (num <= 0 || num >= pdf_xref_len(ctx, doc))
12 return 0;
13
14 fz_try(ctx)
15 entry = pdf_cache_object(ctx, doc, num);
16 fz_catch(ctx)
17 {
18 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
19 return 0;
20 }
21
22 return entry->stm_ofs != 0 || entry->stm_buf;
23 }
24
25 int
pdf_is_stream(fz_context * ctx,pdf_obj * ref)26 pdf_is_stream(fz_context *ctx, pdf_obj *ref)
27 {
28 pdf_document *doc = pdf_get_indirect_document(ctx, ref);
29 if (doc)
30 return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref));
31 return 0;
32 }
33
34 /*
35 * Scan stream dictionary for an explicit /Crypt filter
36 */
37 static int
pdf_stream_has_crypt(fz_context * ctx,pdf_obj * stm)38 pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm)
39 {
40 pdf_obj *filters;
41 pdf_obj *obj;
42 int i;
43
44 filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F));
45 if (filters)
46 {
47 if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt)))
48 return 1;
49 if (pdf_is_array(ctx, filters))
50 {
51 int n = pdf_array_len(ctx, filters);
52 for (i = 0; i < n; i++)
53 {
54 obj = pdf_array_get(ctx, filters, i);
55 if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt)))
56 return 1;
57 }
58 }
59 }
60 return 0;
61 }
62
63 static fz_jbig2_globals *
pdf_load_jbig2_globals(fz_context * ctx,pdf_obj * dict)64 pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict)
65 {
66 fz_jbig2_globals *globals;
67 fz_buffer *buf = NULL;
68
69 fz_var(buf);
70
71 if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL)
72 return globals;
73
74 if (pdf_mark_obj(ctx, dict))
75 fz_throw(ctx, FZ_ERROR_GENERIC, "cyclic reference when loading JBIG2 globals");
76
77 fz_try(ctx)
78 {
79 buf = pdf_load_stream(ctx, dict);
80 globals = fz_load_jbig2_globals(ctx, buf);
81 pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL));
82 }
83 fz_always(ctx)
84 {
85 fz_drop_buffer(ctx, buf);
86 pdf_unmark_obj(ctx, dict);
87 }
88 fz_catch(ctx)
89 {
90 fz_rethrow(ctx);
91 }
92
93 return globals;
94 }
95
96 static void
build_compression_params(fz_context * ctx,pdf_obj * f,pdf_obj * p,fz_compression_params * params)97 build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params)
98 {
99 int predictor = pdf_dict_get_int(ctx, p, PDF_NAME(Predictor));
100 pdf_obj *columns_obj = pdf_dict_get(ctx, p, PDF_NAME(Columns));
101 int columns = pdf_to_int(ctx, columns_obj);
102 int colors = pdf_dict_get_int(ctx, p, PDF_NAME(Colors));
103 int bpc = pdf_dict_get_int(ctx, p, PDF_NAME(BitsPerComponent));
104
105 params->type = FZ_IMAGE_RAW;
106
107 if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF)))
108 {
109 pdf_obj *k = pdf_dict_get(ctx, p, PDF_NAME(K));
110 pdf_obj *eol = pdf_dict_get(ctx, p, PDF_NAME(EndOfLine));
111 pdf_obj *eba = pdf_dict_get(ctx, p, PDF_NAME(EncodedByteAlign));
112 pdf_obj *rows = pdf_dict_get(ctx, p, PDF_NAME(Rows));
113 pdf_obj *eob = pdf_dict_get(ctx, p, PDF_NAME(EndOfBlock));
114 pdf_obj *bi1 = pdf_dict_get(ctx, p, PDF_NAME(BlackIs1));
115
116 params->type = FZ_IMAGE_FAX;
117 params->u.fax.k = (k ? pdf_to_int(ctx, k) : 0);
118 params->u.fax.end_of_line = (eol ? pdf_to_bool(ctx, eol) : 0);
119 params->u.fax.encoded_byte_align = (eba ? pdf_to_bool(ctx, eba) : 0);
120 params->u.fax.columns = (columns_obj ? columns : 1728);
121 params->u.fax.rows = (rows ? pdf_to_int(ctx, rows) : 0);
122 params->u.fax.end_of_block = (eob ? pdf_to_bool(ctx, eob) : 1);
123 params->u.fax.black_is_1 = (bi1 ? pdf_to_bool(ctx, bi1) : 0);
124 }
125 else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT)))
126 {
127 pdf_obj *ct = pdf_dict_get(ctx, p, PDF_NAME(ColorTransform));
128
129 params->type = FZ_IMAGE_JPEG;
130 params->u.jpeg.color_transform = (ct ? pdf_to_int(ctx, ct) : -1);
131 }
132 else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL)))
133 {
134 params->type = FZ_IMAGE_RLD;
135 }
136 else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl)))
137 {
138 params->type = FZ_IMAGE_FLATE;
139 params->u.flate.predictor = predictor;
140 params->u.flate.columns = columns;
141 params->u.flate.colors = colors;
142 params->u.flate.bpc = bpc;
143 }
144 else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW)))
145 {
146 pdf_obj *ec = pdf_dict_get(ctx, p, PDF_NAME(EarlyChange));
147
148 params->type = FZ_IMAGE_LZW;
149 params->u.lzw.predictor = predictor;
150 params->u.lzw.columns = columns;
151 params->u.lzw.colors = colors;
152 params->u.lzw.bpc = bpc;
153 params->u.lzw.early_change = (ec ? pdf_to_int(ctx, ec) : 1);
154 }
155 else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)))
156 {
157 pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals));
158
159 params->type = FZ_IMAGE_JBIG2;
160 params->u.jbig2.globals = NULL;
161 if (g)
162 {
163 if (!pdf_is_stream(ctx, g))
164 fz_warn(ctx, "jbig2 globals is not a stream, skipping globals");
165 else
166 params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g);
167 }
168 }
169 }
170
171 /*
172 * Create a filter given a name and param dictionary.
173 */
174 static fz_stream *
build_filter(fz_context * ctx,fz_stream * chain,pdf_document * doc,pdf_obj * f,pdf_obj * p,int num,int gen,fz_compression_params * params)175 build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params)
176 {
177 fz_compression_params local_params;
178
179 local_params.u.jbig2.globals = NULL;
180 if (params == NULL)
181 params = &local_params;
182
183 build_compression_params(ctx, f, p, params);
184
185 /* If we were using params we were passed in, and we successfully
186 * recognised the image type, we can use the existing filter and
187 * shortstop here. */
188 if (params != &local_params && params->type != FZ_IMAGE_RAW)
189 return fz_keep_stream(ctx, chain); /* nothing to do */
190
191 else if (params->type == FZ_IMAGE_JBIG2)
192 {
193 fz_stream *stm;
194 fz_try(ctx)
195 stm = fz_open_image_decomp_stream(ctx, chain, params, NULL);
196 fz_always(ctx)
197 fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals);
198 fz_catch(ctx)
199 fz_rethrow(ctx);
200 return stm;
201 }
202
203 else if (params->type != FZ_IMAGE_RAW)
204 return fz_open_image_decomp_stream(ctx, chain, params, NULL);
205
206 else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx)))
207 return fz_open_ahxd(ctx, chain);
208
209 else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85)))
210 return fz_open_a85d(ctx, chain);
211
212 else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))
213 return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */
214
215 else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt)))
216 {
217 if (!doc->crypt)
218 fz_warn(ctx, "crypt filter in unencrypted document");
219 else
220 {
221 pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name));
222 if (pdf_is_name(ctx, name))
223 return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen);
224 }
225 }
226
227 else
228 fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f));
229
230 return fz_keep_stream(ctx, chain);
231 }
232
233 /* Build filter, and assume ownership of chain */
234 static fz_stream *
build_filter_drop(fz_context * ctx,fz_stream * tail,pdf_document * doc,pdf_obj * f,pdf_obj * p,int num,int gen,fz_compression_params * params)235 build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params)
236 {
237 fz_stream *head;
238 fz_try(ctx)
239 head = build_filter(ctx, tail, doc, f, p, num, gen, params);
240 fz_always(ctx)
241 fz_drop_stream(ctx, tail);
242 fz_catch(ctx)
243 fz_rethrow(ctx);
244 return head;
245 }
246
247 /*
248 * Build a chain of filters given filter names and param dicts.
249 * If chain is given, start filter chain with it.
250 * Assume ownership of chain.
251 */
252 static fz_stream *
build_filter_chain_drop(fz_context * ctx,fz_stream * chain,pdf_document * doc,pdf_obj * fs,pdf_obj * ps,int num,int gen,fz_compression_params * params)253 build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params)
254 {
255 fz_var(chain);
256 fz_try(ctx)
257 {
258 int i, n = pdf_array_len(ctx, fs);
259 for (i = 0; i < n; i++)
260 {
261 pdf_obj *f = pdf_array_get(ctx, fs, i);
262 pdf_obj *p = pdf_array_get(ctx, ps, i);
263 chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL));
264 }
265 }
266 fz_catch(ctx)
267 fz_rethrow(ctx);
268 return chain;
269 }
270
271 static fz_stream *
build_filter_chain(fz_context * ctx,fz_stream * chain,pdf_document * doc,pdf_obj * fs,pdf_obj * ps,int num,int gen,fz_compression_params * params)272 build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params)
273 {
274 return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params);
275 }
276
277 /*
278 * Build a filter for reading raw stream data.
279 * This is a null filter to constrain reading to the stream length (and to
280 * allow for other people accessing the file), followed by a decryption
281 * filter.
282 *
283 * orig_num and orig_gen are used purely to seed the encryption.
284 */
285 static fz_stream *
pdf_open_raw_filter(fz_context * ctx,fz_stream * file_stm,pdf_document * doc,pdf_obj * stmobj,int num,int * orig_num,int * orig_gen,int64_t offset)286 pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset)
287 {
288 pdf_xref_entry *x = NULL;
289 fz_stream *null_stm, *crypt_stm;
290 int hascrypt;
291 int len;
292
293 if (num > 0 && num < pdf_xref_len(ctx, doc))
294 {
295 x = pdf_get_xref_entry(ctx, doc, num);
296 *orig_num = x->num;
297 *orig_gen = x->gen;
298 if (x->stm_buf)
299 return fz_open_buffer(ctx, x->stm_buf);
300 }
301 else
302 {
303 /* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */
304 /* New style XRef sections must have generation number 0. */
305 *orig_num = num;
306 *orig_gen = 0;
307 }
308
309 hascrypt = pdf_stream_has_crypt(ctx, stmobj);
310 len = pdf_dict_get_int(ctx, stmobj, PDF_NAME(Length));
311 null_stm = fz_open_endstream_filter(ctx, file_stm, len, offset);
312 if (doc->crypt && !hascrypt)
313 {
314 fz_try(ctx)
315 crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen);
316 fz_always(ctx)
317 fz_drop_stream(ctx, null_stm);
318 fz_catch(ctx)
319 fz_rethrow(ctx);
320 return crypt_stm;
321 }
322 return null_stm;
323 }
324
325 /*
326 * Construct a filter to decode a stream, constraining
327 * to stream length and decrypting.
328 */
329 static fz_stream *
pdf_open_filter(fz_context * ctx,pdf_document * doc,fz_stream * file_stm,pdf_obj * stmobj,int num,int64_t offset,fz_compression_params * imparams)330 pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams)
331 {
332 pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
333 pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
334 int orig_num, orig_gen;
335 fz_stream *rstm, *fstm;
336
337 rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset);
338 fz_try(ctx)
339 {
340 if (pdf_is_name(ctx, filters))
341 fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams);
342 else if (pdf_array_len(ctx, filters) > 0)
343 fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams);
344 else
345 {
346 if (imparams)
347 imparams->type = FZ_IMAGE_RAW;
348 fstm = fz_keep_stream(ctx, rstm);
349 }
350 }
351 fz_always(ctx)
352 fz_drop_stream(ctx, rstm);
353 fz_catch(ctx)
354 fz_rethrow(ctx);
355
356 return fstm;
357 }
358
359 fz_stream *
pdf_open_inline_stream(fz_context * ctx,pdf_document * doc,pdf_obj * stmobj,int length,fz_stream * file_stm,fz_compression_params * imparams)360 pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams)
361 {
362 pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
363 pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
364
365 if (pdf_is_name(ctx, filters))
366 return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams);
367 else if (pdf_array_len(ctx, filters) > 0)
368 return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams);
369
370 if (imparams)
371 imparams->type = FZ_IMAGE_RAW;
372 return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm));
373 }
374
375 void
pdf_load_compressed_inline_image(fz_context * ctx,pdf_document * doc,pdf_obj * dict,int length,fz_stream * file_stm,int indexed,fz_compressed_image * image)376 pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image)
377 {
378 fz_stream *istm = NULL, *leech = NULL, *decomp = NULL;
379 fz_pixmap *pixmap = NULL;
380 fz_compressed_buffer *bc;
381 int dummy_l2factor = 0;
382
383 fz_var(istm);
384 fz_var(leech);
385 fz_var(decomp);
386 fz_var(pixmap);
387
388 bc = fz_malloc_struct(ctx, fz_compressed_buffer);
389 fz_try(ctx)
390 {
391 bc->buffer = fz_new_buffer(ctx, 1024);
392 istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params);
393 leech = fz_open_leecher(ctx, istm, bc->buffer);
394 decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor);
395 pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0);
396 fz_set_compressed_image_buffer(ctx, image, bc);
397 }
398 fz_always(ctx)
399 {
400 fz_drop_stream(ctx, istm);
401 fz_drop_stream(ctx, leech);
402 fz_drop_stream(ctx, decomp);
403 fz_drop_pixmap(ctx, pixmap);
404 }
405 fz_catch(ctx)
406 {
407 fz_drop_compressed_buffer(ctx, bc);
408 fz_rethrow(ctx);
409 }
410 }
411
412 fz_stream *
pdf_open_raw_stream_number(fz_context * ctx,pdf_document * doc,int num)413 pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
414 {
415 pdf_xref_entry *x;
416 int orig_num, orig_gen;
417
418 if (num <= 0 || num >= pdf_xref_len(ctx, doc))
419 fz_throw(ctx, FZ_ERROR_GENERIC, "object id out of range (%d 0 R)", num);
420
421 x = pdf_cache_object(ctx, doc, num);
422 if (x->stm_ofs == 0)
423 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
424
425 return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs);
426 }
427
428 static fz_stream *
pdf_open_image_stream(fz_context * ctx,pdf_document * doc,int num,fz_compression_params * params)429 pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params)
430 {
431 pdf_xref_entry *x;
432
433 if (num <= 0 || num >= pdf_xref_len(ctx, doc))
434 fz_throw(ctx, FZ_ERROR_GENERIC, "object id out of range (%d 0 R)", num);
435
436 x = pdf_cache_object(ctx, doc, num);
437 if (x->stm_ofs == 0 && x->stm_buf == NULL)
438 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
439
440 return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params);
441 }
442
443 fz_stream *
pdf_open_stream_number(fz_context * ctx,pdf_document * doc,int num)444 pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num)
445 {
446 return pdf_open_image_stream(ctx, doc, num, NULL);
447 }
448
449 fz_stream *
pdf_open_stream_with_offset(fz_context * ctx,pdf_document * doc,int num,pdf_obj * dict,int64_t stm_ofs)450 pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs)
451 {
452 if (stm_ofs == 0)
453 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
454 return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL);
455 }
456
457 fz_buffer *
pdf_load_raw_stream_number(fz_context * ctx,pdf_document * doc,int num)458 pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
459 {
460 fz_stream *stm;
461 pdf_obj *dict;
462 int len;
463 fz_buffer *buf = NULL;
464 pdf_xref_entry *x;
465
466 if (num > 0 && num < pdf_xref_len(ctx, doc))
467 {
468 x = pdf_get_xref_entry(ctx, doc, num);
469 if (x->stm_buf)
470 return fz_keep_buffer(ctx, x->stm_buf);
471 }
472
473 dict = pdf_load_object(ctx, doc, num);
474
475 fz_try(ctx)
476 len = pdf_dict_get_int(ctx, dict, PDF_NAME(Length));
477 fz_always(ctx)
478 pdf_drop_obj(ctx, dict);
479 fz_catch(ctx)
480 fz_rethrow(ctx);
481
482 stm = pdf_open_raw_stream_number(ctx, doc, num);
483
484 fz_try(ctx)
485 buf = fz_read_all(ctx, stm, len);
486 fz_always(ctx)
487 fz_drop_stream(ctx, stm);
488 fz_catch(ctx)
489 fz_rethrow(ctx);
490
491 return buf;
492 }
493
494 static int
pdf_guess_filter_length(int len,const char * filter)495 pdf_guess_filter_length(int len, const char *filter)
496 {
497 if (!strcmp(filter, "ASCIIHexDecode"))
498 return len / 2;
499 if (!strcmp(filter, "ASCII85Decode"))
500 return len * 4 / 5;
501 if (!strcmp(filter, "FlateDecode"))
502 return len * 3;
503 if (!strcmp(filter, "RunLengthDecode"))
504 return len * 3;
505 if (!strcmp(filter, "LZWDecode"))
506 return len * 2;
507 return len;
508 }
509
510 /* Check if an entry has a cached stream and return whether it is directly
511 * reusable. A buffer is directly reusable only if the stream is
512 * uncompressed, or if it is compressed purely a compression method we can
513 * return details of in fz_compression_params.
514 *
515 * If the stream is reusable return 1, and set params as required, otherwise
516 * return 0. */
517 static int
can_reuse_buffer(fz_context * ctx,pdf_xref_entry * entry,fz_compression_params * params)518 can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params)
519 {
520 pdf_obj *f;
521 pdf_obj *p;
522
523 if (!entry || !entry->obj || !entry->stm_buf)
524 return 0;
525
526 if (params)
527 params->type = FZ_IMAGE_RAW;
528
529 f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F));
530 /* If there are no filters, it's uncompressed, and we can use it */
531 if (!f)
532 return 1;
533
534 p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP));
535 if (pdf_is_array(ctx, f))
536 {
537 int len = pdf_array_len(ctx, f);
538
539 /* Empty array of filters. Its uncompressed. We can cope. */
540 if (len == 0)
541 return 1;
542 /* 1 filter is the most we can hope to cope with - if more,*/
543 if (len != 1)
544 return 0;
545 p = pdf_array_get(ctx, p, 0);
546 }
547 if (pdf_is_null(ctx, f))
548 return 1; /* Null filter is uncompressed */
549 if (!pdf_is_name(ctx, f))
550 return 0;
551
552 /* There are filters, so unless we have the option of shortstopping,
553 * we can't use the existing buffer. */
554 if (!params)
555 return 0;
556
557 build_compression_params(ctx, f, p, params);
558
559 return (params->type == FZ_IMAGE_RAW) ? 0 : 1;
560 }
561
562 static fz_buffer *
pdf_load_image_stream(fz_context * ctx,pdf_document * doc,int num,fz_compression_params * params,int * truncated)563 pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated)
564 {
565 fz_stream *stm = NULL;
566 pdf_obj *dict, *obj;
567 int i, len, n;
568 fz_buffer *buf;
569
570 fz_var(buf);
571
572 if (num > 0 && num < pdf_xref_len(ctx, doc))
573 {
574 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
575 /* Return ref to existing buffer, but only if uncompressed,
576 * or shortstoppable */
577 if (can_reuse_buffer(ctx, entry, params))
578 return fz_keep_buffer(ctx, entry->stm_buf);
579 }
580
581 dict = pdf_load_object(ctx, doc, num);
582 fz_try(ctx)
583 {
584 len = pdf_dict_get_int(ctx, dict, PDF_NAME(Length));
585 obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
586 len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj));
587 n = pdf_array_len(ctx, obj);
588 for (i = 0; i < n; i++)
589 len = pdf_guess_filter_length(len, pdf_to_name(ctx, pdf_array_get(ctx, obj, i)));
590 }
591 fz_always(ctx)
592 {
593 pdf_drop_obj(ctx, dict);
594 }
595 fz_catch(ctx)
596 {
597 fz_rethrow(ctx);
598 }
599
600 stm = pdf_open_image_stream(ctx, doc, num, params);
601
602 fz_try(ctx)
603 {
604 if (truncated)
605 buf = fz_read_best(ctx, stm, len, truncated);
606 else
607 buf = fz_read_all(ctx, stm, len);
608 }
609 fz_always(ctx)
610 {
611 fz_drop_stream(ctx, stm);
612 }
613 fz_catch(ctx)
614 {
615 fz_rethrow(ctx);
616 }
617
618 return buf;
619 }
620
621 fz_buffer *
pdf_load_stream_number(fz_context * ctx,pdf_document * doc,int num)622 pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num)
623 {
624 return pdf_load_image_stream(ctx, doc, num, NULL, NULL);
625 }
626
627 fz_compressed_buffer *
pdf_load_compressed_stream(fz_context * ctx,pdf_document * doc,int num)628 pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num)
629 {
630 fz_compressed_buffer *bc = fz_malloc_struct(ctx, fz_compressed_buffer);
631
632 fz_try(ctx)
633 {
634 bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL);
635 }
636 fz_catch(ctx)
637 {
638 fz_free(ctx, bc);
639 fz_rethrow(ctx);
640 }
641 return bc;
642 }
643
644 static fz_stream *
pdf_open_object_array(fz_context * ctx,pdf_document * doc,pdf_obj * list)645 pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list)
646 {
647 fz_stream *stm;
648 int i, n;
649
650 n = pdf_array_len(ctx, list);
651 stm = fz_open_concat(ctx, n, 1);
652
653 for (i = 0; i < n; i++)
654 {
655 pdf_obj *obj = pdf_array_get(ctx, list, i);
656 fz_try(ctx)
657 fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj));
658 fz_catch(ctx)
659 {
660 if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
661 {
662 fz_drop_stream(ctx, stm);
663 fz_rethrow(ctx);
664 }
665 fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
666 }
667 }
668
669 return stm;
670 }
671
672 fz_stream *
pdf_open_contents_stream(fz_context * ctx,pdf_document * doc,pdf_obj * obj)673 pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
674 {
675 int num;
676
677 if (pdf_is_array(ctx, obj))
678 return pdf_open_object_array(ctx, doc, obj);
679
680 num = pdf_to_num(ctx, obj);
681 if (pdf_is_stream(ctx, obj))
682 return pdf_open_image_stream(ctx, doc, num, NULL);
683
684 fz_warn(ctx, "content stream is not a stream (%d 0 R)", num);
685 return fz_open_memory(ctx, (unsigned char *)"", 0);
686 }
687
pdf_load_raw_stream(fz_context * ctx,pdf_obj * ref)688 fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref)
689 {
690 if (pdf_is_stream(ctx, ref))
691 return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
692 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
693 }
694
pdf_load_stream(fz_context * ctx,pdf_obj * ref)695 fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref)
696 {
697 if (pdf_is_stream(ctx, ref))
698 return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
699 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
700 }
701
pdf_open_raw_stream(fz_context * ctx,pdf_obj * ref)702 fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref)
703 {
704 if (pdf_is_stream(ctx, ref))
705 return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
706 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
707 }
708
pdf_open_stream(fz_context * ctx,pdf_obj * ref)709 fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref)
710 {
711 if (pdf_is_stream(ctx, ref))
712 return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
713 fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
714 }
715