1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3
4 #include <string.h>
5
6 static fz_image *pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask);
7
8 static fz_image *
pdf_load_jpx_imp(fz_context * ctx,pdf_document * doc,pdf_obj * rdb,pdf_obj * dict,fz_stream * cstm,int forcemask)9 pdf_load_jpx_imp(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask)
10 {
11 fz_image *image = pdf_load_jpx(ctx, doc, dict, forcemask);
12
13 if (forcemask)
14 {
15 fz_pixmap_image *cimg = (fz_pixmap_image *)image;
16 fz_pixmap *mask_pixmap;
17 fz_pixmap *tile = fz_pixmap_image_tile(ctx, cimg);
18
19 if (tile->n != 1)
20 {
21 fz_pixmap *gray = fz_convert_pixmap(ctx, tile, fz_device_gray(ctx), NULL, NULL, fz_default_color_params, 0);
22 fz_drop_pixmap(ctx, tile);
23 tile = gray;
24 }
25
26 mask_pixmap = fz_alpha_from_gray(ctx, tile);
27 fz_drop_pixmap(ctx, tile);
28 fz_set_pixmap_image_tile(ctx, cimg, mask_pixmap);
29 }
30
31 return image;
32 }
33
34 static fz_image *
pdf_load_image_imp(fz_context * ctx,pdf_document * doc,pdf_obj * rdb,pdf_obj * dict,fz_stream * cstm,int forcemask)35 pdf_load_image_imp(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask)
36 {
37 fz_image *image = NULL;
38 pdf_obj *obj, *res;
39
40 int w, h, bpc, n;
41 int imagemask;
42 int interpolate;
43 int indexed;
44 fz_image *mask = NULL; /* explicit mask/soft mask image */
45 int use_colorkey = 0;
46 fz_colorspace *colorspace = NULL;
47 float decode[FZ_MAX_COLORS * 2];
48 int colorkey[FZ_MAX_COLORS * 2];
49 int stride;
50
51 int i;
52 fz_compressed_buffer *buffer;
53
54 /* special case for JPEG2000 images */
55 if (pdf_is_jpx_image(ctx, dict))
56 return pdf_load_jpx_imp(ctx, doc, rdb, dict, cstm, forcemask);
57
58 w = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Width), PDF_NAME(W)));
59 h = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Height), PDF_NAME(H)));
60 bpc = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(BitsPerComponent), PDF_NAME(BPC)));
61 if (bpc == 0)
62 bpc = 8;
63 imagemask = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(ImageMask), PDF_NAME(IM)));
64 interpolate = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Interpolate), PDF_NAME(I)));
65
66 indexed = 0;
67 use_colorkey = 0;
68
69 if (imagemask)
70 bpc = 1;
71
72 if (w <= 0)
73 fz_throw(ctx, FZ_ERROR_GENERIC, "image width is zero (or less)");
74 if (h <= 0)
75 fz_throw(ctx, FZ_ERROR_GENERIC, "image height is zero (or less)");
76 if (bpc <= 0)
77 fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is zero (or less)");
78 if (bpc > 16)
79 fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is too large: %d", bpc);
80 if (w > (1 << 16))
81 fz_throw(ctx, FZ_ERROR_GENERIC, "image is too wide");
82 if (h > (1 << 16))
83 fz_throw(ctx, FZ_ERROR_GENERIC, "image is too high");
84
85 fz_var(mask);
86 fz_var(image);
87 fz_var(colorspace);
88
89 fz_try(ctx)
90 {
91 obj = pdf_dict_geta(ctx, dict, PDF_NAME(ColorSpace), PDF_NAME(CS));
92 if (obj && !imagemask && !forcemask)
93 {
94 /* colorspace resource lookup is only done for inline images */
95 if (pdf_is_name(ctx, obj))
96 {
97 res = pdf_dict_get(ctx, pdf_dict_get(ctx, rdb, PDF_NAME(ColorSpace)), obj);
98 if (res)
99 obj = res;
100 }
101
102 colorspace = pdf_load_colorspace(ctx, obj);
103 indexed = fz_colorspace_is_indexed(ctx, colorspace);
104
105 n = fz_colorspace_n(ctx, colorspace);
106 }
107 else
108 {
109 n = 1;
110 }
111
112 obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D));
113 if (obj)
114 {
115 for (i = 0; i < n * 2; i++)
116 decode[i] = pdf_array_get_real(ctx, obj, i);
117 }
118 else if (fz_colorspace_is_lab(ctx, colorspace))
119 {
120 decode[0] = 0;
121 decode[1] = 100;
122 decode[2] = -128;
123 decode[3] = 127;
124 decode[4] = -128;
125 decode[5] = 127;
126 }
127 else
128 {
129 float maxval = indexed ? (1 << bpc) - 1 : 1;
130 for (i = 0; i < n * 2; i++)
131 decode[i] = i & 1 ? maxval : 0;
132 }
133
134 obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask));
135 if (pdf_is_dict(ctx, obj))
136 {
137 /* Not allowed for inline images or soft masks */
138 if (cstm)
139 fz_warn(ctx, "Ignoring invalid inline image soft mask");
140 else if (forcemask)
141 fz_warn(ctx, "Ignoring recursive image soft mask");
142 else
143 {
144 mask = pdf_load_image_imp(ctx, doc, rdb, obj, NULL, 1);
145 obj = pdf_dict_get(ctx, obj, PDF_NAME(Matte));
146 if (pdf_is_array(ctx, obj))
147 {
148 use_colorkey = 1;
149 for (i = 0; i < n; i++)
150 colorkey[i] = pdf_array_get_real(ctx, obj, i) * 255;
151 }
152 }
153 }
154 else if (pdf_is_array(ctx, obj))
155 {
156 use_colorkey = 1;
157 for (i = 0; i < n * 2; i++)
158 {
159 if (!pdf_is_int(ctx, pdf_array_get(ctx, obj, i)))
160 {
161 fz_warn(ctx, "invalid value in color key mask");
162 use_colorkey = 0;
163 }
164 colorkey[i] = pdf_array_get_int(ctx, obj, i);
165 }
166 }
167
168 /* Do we load from a ref, or do we load an inline stream? */
169 if (cstm == NULL)
170 {
171 /* Just load the compressed image data now and we can decode it on demand. */
172 buffer = pdf_load_compressed_stream(ctx, doc, pdf_to_num(ctx, dict));
173 image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, buffer, mask);
174 image->invert_cmyk_jpeg = 0;
175 }
176 else
177 {
178 /* Inline stream */
179 stride = (w * n * bpc + 7) / 8;
180 image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, NULL, mask);
181 image->invert_cmyk_jpeg = 0;
182 pdf_load_compressed_inline_image(ctx, doc, dict, stride * h, cstm, indexed, (fz_compressed_image *)image);
183 }
184 }
185 fz_always(ctx)
186 {
187 fz_drop_colorspace(ctx, colorspace);
188 fz_drop_image(ctx, mask);
189 }
190 fz_catch(ctx)
191 {
192 fz_drop_image(ctx, image);
193 fz_rethrow(ctx);
194 }
195 return image;
196 }
197
198 fz_image *
pdf_load_inline_image(fz_context * ctx,pdf_document * doc,pdf_obj * rdb,pdf_obj * dict,fz_stream * file)199 pdf_load_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *file)
200 {
201 return pdf_load_image_imp(ctx, doc, rdb, dict, file, 0);
202 }
203
204 int
pdf_is_jpx_image(fz_context * ctx,pdf_obj * dict)205 pdf_is_jpx_image(fz_context *ctx, pdf_obj *dict)
206 {
207 pdf_obj *filter;
208 int i, n;
209
210 filter = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
211 if (pdf_name_eq(ctx, filter, PDF_NAME(JPXDecode)))
212 return 1;
213 n = pdf_array_len(ctx, filter);
214 for (i = 0; i < n; i++)
215 if (pdf_name_eq(ctx, pdf_array_get(ctx, filter, i), PDF_NAME(JPXDecode)))
216 return 1;
217 return 0;
218 }
219
220 static fz_image *
pdf_load_jpx(fz_context * ctx,pdf_document * doc,pdf_obj * dict,int forcemask)221 pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask)
222 {
223 fz_buffer *buf = NULL;
224 fz_colorspace *colorspace = NULL;
225 fz_pixmap *pix = NULL;
226 pdf_obj *obj;
227 fz_image *mask = NULL;
228 fz_image *img = NULL;
229
230 fz_var(pix);
231 fz_var(buf);
232 fz_var(colorspace);
233 fz_var(mask);
234
235 buf = pdf_load_stream(ctx, dict);
236
237 /* FIXME: We can't handle decode arrays for indexed images currently */
238 fz_try(ctx)
239 {
240 unsigned char *data;
241 size_t len;
242
243 obj = pdf_dict_get(ctx, dict, PDF_NAME(ColorSpace));
244 if (obj)
245 colorspace = pdf_load_colorspace(ctx, obj);
246
247 len = fz_buffer_storage(ctx, buf, &data);
248 pix = fz_load_jpx(ctx, data, len, colorspace);
249
250 obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask));
251 if (pdf_is_dict(ctx, obj))
252 {
253 if (forcemask)
254 fz_warn(ctx, "Ignoring recursive JPX soft mask");
255 else
256 mask = pdf_load_image_imp(ctx, doc, NULL, obj, NULL, 1);
257 }
258
259 obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D));
260 if (obj && !fz_colorspace_is_indexed(ctx, colorspace))
261 {
262 float decode[FZ_MAX_COLORS * 2];
263 int i;
264
265 for (i = 0; i < pix->n * 2; i++)
266 decode[i] = pdf_array_get_real(ctx, obj, i);
267
268 fz_decode_tile(ctx, pix, decode);
269 }
270
271 img = fz_new_image_from_pixmap(ctx, pix, mask);
272 }
273 fz_always(ctx)
274 {
275 fz_drop_image(ctx, mask);
276 fz_drop_pixmap(ctx, pix);
277 fz_drop_colorspace(ctx, colorspace);
278 fz_drop_buffer(ctx, buf);
279 }
280 fz_catch(ctx)
281 {
282 fz_rethrow(ctx);
283 }
284
285 return img;
286 }
287
288 fz_image *
pdf_load_image(fz_context * ctx,pdf_document * doc,pdf_obj * dict)289 pdf_load_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
290 {
291 fz_image *image;
292
293 if ((image = pdf_find_item(ctx, fz_drop_image_imp, dict)) != NULL)
294 return image;
295
296 image = pdf_load_image_imp(ctx, doc, NULL, dict, NULL, 0);
297 pdf_store_item(ctx, dict, image, fz_image_size(ctx, image));
298 return image;
299 }
300
301 pdf_obj *
pdf_add_image(fz_context * ctx,pdf_document * doc,fz_image * image)302 pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image)
303 {
304 fz_pixmap *pixmap = NULL;
305 pdf_obj *imobj = NULL;
306 pdf_obj *dp;
307 fz_buffer *buffer = NULL;
308 fz_compressed_buffer *cbuffer;
309 int i, n;
310
311 /* If we can maintain compression, do so */
312 cbuffer = fz_compressed_image_buffer(ctx, image);
313
314 fz_var(pixmap);
315 fz_var(buffer);
316 fz_var(imobj);
317
318 imobj = pdf_add_new_dict(ctx, doc, 3);
319 fz_try(ctx)
320 {
321 dp = pdf_dict_put_dict(ctx, imobj, PDF_NAME(DecodeParms), 3);
322 pdf_dict_put(ctx, imobj, PDF_NAME(Type), PDF_NAME(XObject));
323 pdf_dict_put(ctx, imobj, PDF_NAME(Subtype), PDF_NAME(Image));
324
325 if (cbuffer)
326 {
327 fz_compression_params *cp = &cbuffer->params;
328 switch (cp->type)
329 {
330 default:
331 goto unknown_compression;
332 case FZ_IMAGE_RAW:
333 break;
334 case FZ_IMAGE_JPEG:
335 if (cp->u.jpeg.color_transform != -1)
336 pdf_dict_put_int(ctx, dp, PDF_NAME(ColorTransform), cp->u.jpeg.color_transform);
337 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(DCTDecode));
338 break;
339 case FZ_IMAGE_JPX:
340 if (cp->u.jpx.smask_in_data)
341 pdf_dict_put_int(ctx, dp, PDF_NAME(SMaskInData), cp->u.jpx.smask_in_data);
342 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JPXDecode));
343 break;
344 case FZ_IMAGE_FAX:
345 if (cp->u.fax.columns)
346 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.fax.columns);
347 if (cp->u.fax.rows)
348 pdf_dict_put_int(ctx, dp, PDF_NAME(Rows), cp->u.fax.rows);
349 if (cp->u.fax.k)
350 pdf_dict_put_int(ctx, dp, PDF_NAME(K), cp->u.fax.k);
351 if (cp->u.fax.end_of_line)
352 pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfLine), cp->u.fax.end_of_line);
353 if (cp->u.fax.encoded_byte_align)
354 pdf_dict_put_bool(ctx, dp, PDF_NAME(EncodedByteAlign), cp->u.fax.encoded_byte_align);
355 if (cp->u.fax.end_of_block)
356 pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfBlock), cp->u.fax.end_of_block);
357 if (cp->u.fax.black_is_1)
358 pdf_dict_put_bool(ctx, dp, PDF_NAME(BlackIs1), cp->u.fax.black_is_1);
359 if (cp->u.fax.damaged_rows_before_error)
360 pdf_dict_put_int(ctx, dp, PDF_NAME(DamagedRowsBeforeError), cp->u.fax.damaged_rows_before_error);
361 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
362 break;
363 case FZ_IMAGE_FLATE:
364 if (cp->u.flate.columns)
365 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.flate.columns);
366 if (cp->u.flate.colors)
367 pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.flate.colors);
368 if (cp->u.flate.predictor)
369 pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.flate.predictor);
370 if (cp->u.flate.bpc)
371 pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.flate.bpc);
372 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
373 break;
374 case FZ_IMAGE_LZW:
375 if (cp->u.lzw.columns)
376 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.lzw.columns);
377 if (cp->u.lzw.colors)
378 pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.lzw.colors);
379 if (cp->u.lzw.predictor)
380 pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.lzw.predictor);
381 if (cp->u.lzw.early_change)
382 pdf_dict_put_int(ctx, dp, PDF_NAME(EarlyChange), cp->u.lzw.early_change);
383 if (cp->u.lzw.bpc)
384 pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.lzw.bpc);
385 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(LZWDecode));
386 break;
387 case FZ_IMAGE_RLD:
388 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(RunLengthDecode));
389 break;
390 }
391
392 if (!pdf_dict_len(ctx, dp))
393 pdf_dict_del(ctx, imobj, PDF_NAME(DecodeParms));
394
395 pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc);
396 pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), image->w);
397 pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), image->h);
398
399 buffer = fz_keep_buffer(ctx, cbuffer->buffer);
400
401 if (image->use_decode)
402 {
403 pdf_obj *ary = pdf_dict_put_array(ctx, imobj, PDF_NAME(Decode), image->n * 2);
404 for (i = 0; i < image->n * 2; ++i)
405 pdf_array_push_real(ctx, ary, image->decode[i]);
406 }
407 }
408 else
409 {
410 unknown_compression:
411
412 pixmap = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL);
413 n = pixmap->n - pixmap->alpha - pixmap->s; /* number of colorants */
414 if (n == 0)
415 n = 1; /* treat pixmaps with only alpha or spots as grayscale */
416
417 pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), pixmap->w);
418 pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), pixmap->h);
419
420 if (fz_is_pixmap_monochrome(ctx, pixmap))
421 {
422 int stride = (image->w + 7) / 8;
423 int h = pixmap->h;
424 int w = pixmap->w;
425 unsigned char *s = pixmap->samples;
426 unsigned char *d = fz_calloc(ctx, h, stride);
427 buffer = fz_new_buffer_from_data(ctx, d, (size_t)h * stride);
428
429 pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), 1);
430
431 while (h--)
432 {
433 int x;
434 for (x = 0; x < w; ++x)
435 if (s[x] > 0)
436 d[x>>3] |= 1 << (7 - (x & 7));
437 s += pixmap->stride;
438 d += stride;
439 }
440 }
441 else
442 {
443 size_t size = (size_t)pixmap->w * n;
444 int h = pixmap->h;
445 unsigned char *s = pixmap->samples;
446 unsigned char *d = Memento_label(fz_malloc(ctx, size * h), "pdf_image_samples");
447 buffer = fz_new_buffer_from_data(ctx, d, size * h);
448
449 pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), 8);
450
451 if (n == pixmap->n)
452 {
453 /* If we use all channels, we can copy the data as is. */
454 while (h--)
455 {
456 memcpy(d, s, size);
457 d += size;
458 s += pixmap->stride;
459 }
460 }
461 else
462 {
463 /* Need to remove the alpha and spot planes. */
464 /* TODO: extract alpha plane to a soft mask. */
465 /* TODO: convert spots to colors. */
466
467 int line_skip = pixmap->stride - pixmap->w * pixmap->n;
468 int skip = pixmap->n - n;
469 while (h--)
470 {
471 int w = pixmap->w;
472 while (w--)
473 {
474 int k;
475 for (k = 0; k < n; ++k)
476 *d++ = *s++;
477 s += skip;
478 }
479 s += line_skip;
480 }
481 }
482 }
483 }
484
485 if (image->imagemask)
486 {
487 pdf_dict_put_bool(ctx, imobj, PDF_NAME(ImageMask), 1);
488 }
489 else
490 {
491 fz_colorspace *cs;
492
493 cs = pixmap ? pixmap->colorspace : image->colorspace;
494 switch (fz_colorspace_type(ctx, cs))
495 {
496 case FZ_COLORSPACE_INDEXED:
497 {
498 fz_colorspace *basecs;
499 unsigned char *lookup = NULL;
500 int high = 0;
501 int basen;
502 pdf_obj *arr;
503
504 basecs = cs->u.indexed.base;
505 high = cs->u.indexed.high;
506 lookup = cs->u.indexed.lookup;
507 basen = basecs->n;
508
509 arr = pdf_dict_put_array(ctx, imobj, PDF_NAME(ColorSpace), 4);
510
511 pdf_array_push(ctx, arr, PDF_NAME(Indexed));
512 switch (fz_colorspace_type(ctx, basecs))
513 {
514 case FZ_COLORSPACE_GRAY:
515 pdf_array_push(ctx, arr, PDF_NAME(DeviceGray));
516 break;
517 case FZ_COLORSPACE_RGB:
518 pdf_array_push(ctx, arr, PDF_NAME(DeviceRGB));
519 break;
520 case FZ_COLORSPACE_CMYK:
521 pdf_array_push(ctx, arr, PDF_NAME(DeviceCMYK));
522 break;
523 default:
524 // TODO: convert to RGB!
525 fz_throw(ctx, FZ_ERROR_GENERIC, "only indexed Gray, RGB, and CMYK colorspaces supported");
526 break;
527 }
528
529 pdf_array_push_int(ctx, arr, high);
530 pdf_array_push_string(ctx, arr, (char *) lookup, (size_t)basen * (high + 1));
531 }
532 break;
533 case FZ_COLORSPACE_NONE:
534 case FZ_COLORSPACE_GRAY:
535 pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceGray));
536 break;
537 case FZ_COLORSPACE_RGB:
538 pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceRGB));
539 break;
540 case FZ_COLORSPACE_CMYK:
541 pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceCMYK));
542 break;
543 default:
544 // TODO: convert to RGB!
545 fz_throw(ctx, FZ_ERROR_GENERIC, "only Gray, RGB, and CMYK colorspaces supported");
546 break;
547 }
548 }
549
550 if (image->mask)
551 {
552 if (image->mask->imagemask)
553 pdf_dict_put_drop(ctx, imobj, PDF_NAME(Mask), pdf_add_image(ctx, doc, image->mask));
554 else
555 pdf_dict_put_drop(ctx, imobj, PDF_NAME(SMask), pdf_add_image(ctx, doc, image->mask));
556 }
557
558 pdf_update_stream(ctx, doc, imobj, buffer, 1);
559 }
560 fz_always(ctx)
561 {
562 fz_drop_pixmap(ctx, pixmap);
563 fz_drop_buffer(ctx, buffer);
564 }
565 fz_catch(ctx)
566 {
567 pdf_drop_obj(ctx, imobj);
568 fz_rethrow(ctx);
569 }
570 return imobj;
571 }
572