1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <string.h>
5 #include <math.h>
6 
7 /* Maximum number of errors before aborting */
8 #define MAX_SYNTAX_ERRORS 100
9 
10 void *
pdf_new_processor(fz_context * ctx,int size)11 pdf_new_processor(fz_context *ctx, int size)
12 {
13 	return Memento_label(fz_calloc(ctx, 1, size), "pdf_processor");
14 }
15 
16 void
pdf_close_processor(fz_context * ctx,pdf_processor * proc)17 pdf_close_processor(fz_context *ctx, pdf_processor *proc)
18 {
19 	if (proc && proc->close_processor)
20 	{
21 		proc->close_processor(ctx, proc);
22 		proc->close_processor = NULL;
23 	}
24 }
25 
26 void
pdf_drop_processor(fz_context * ctx,pdf_processor * proc)27 pdf_drop_processor(fz_context *ctx, pdf_processor *proc)
28 {
29 	if (proc)
30 	{
31 		if (proc->close_processor)
32 			fz_warn(ctx, "dropping unclosed PDF processor");
33 		if (proc->drop_processor)
34 			proc->drop_processor(ctx, proc);
35 	}
36 	fz_free(ctx, proc);
37 }
38 
39 static void
pdf_init_csi(fz_context * ctx,pdf_csi * csi,pdf_document * doc,pdf_obj * rdb,pdf_lexbuf * buf,fz_cookie * cookie)40 pdf_init_csi(fz_context *ctx, pdf_csi *csi, pdf_document *doc, pdf_obj *rdb, pdf_lexbuf *buf, fz_cookie *cookie)
41 {
42 	memset(csi, 0, sizeof *csi);
43 	csi->doc = doc;
44 	csi->rdb = rdb;
45 	csi->buf = buf;
46 	csi->cookie = cookie;
47 }
48 
49 static void
pdf_clear_stack(fz_context * ctx,pdf_csi * csi)50 pdf_clear_stack(fz_context *ctx, pdf_csi *csi)
51 {
52 	int i;
53 
54 	pdf_drop_obj(ctx, csi->obj);
55 	csi->obj = NULL;
56 
57 	csi->name[0] = 0;
58 	csi->string_len = 0;
59 	for (i = 0; i < csi->top; i++)
60 		csi->stack[i] = 0;
61 
62 	csi->top = 0;
63 }
64 
65 static pdf_font_desc *
pdf_try_load_font(fz_context * ctx,pdf_document * doc,pdf_obj * rdb,pdf_obj * font,fz_cookie * cookie)66 pdf_try_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *font, fz_cookie *cookie)
67 {
68 	pdf_font_desc *desc = NULL;
69 	fz_try(ctx)
70 		desc = pdf_load_font(ctx, doc, rdb, font);
71 	fz_catch(ctx)
72 	{
73 		if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
74 			if (cookie)
75 				cookie->incomplete++;
76 	}
77 	if (desc == NULL)
78 		desc = pdf_load_hail_mary_font(ctx, doc);
79 	return desc;
80 }
81 
82 static fz_image *
parse_inline_image(fz_context * ctx,pdf_csi * csi,fz_stream * stm,char * csname,int cslen)83 parse_inline_image(fz_context *ctx, pdf_csi *csi, fz_stream *stm, char *csname, int cslen)
84 {
85 	pdf_document *doc = csi->doc;
86 	pdf_obj *rdb = csi->rdb;
87 	pdf_obj *obj = NULL;
88 	pdf_obj *cs;
89 	fz_image *img = NULL;
90 	int ch, found;
91 
92 	fz_var(obj);
93 	fz_var(img);
94 
95 	fz_try(ctx)
96 	{
97 		obj = pdf_parse_dict(ctx, doc, stm, &doc->lexbuf.base);
98 
99 		if (csname)
100 		{
101 			cs = pdf_dict_get(ctx, obj, PDF_NAME(CS));
102 			if (!pdf_is_indirect(ctx, cs) && pdf_is_name(ctx, cs))
103 				fz_strlcpy(csname, pdf_to_name(ctx, cs), cslen);
104 			else
105 				csname[0] = 0;
106 		}
107 
108 		/* read whitespace after ID keyword */
109 		ch = fz_read_byte(ctx, stm);
110 		if (ch == '\r')
111 			if (fz_peek_byte(ctx, stm) == '\n')
112 				fz_read_byte(ctx, stm);
113 
114 		img = pdf_load_inline_image(ctx, doc, rdb, obj, stm);
115 
116 		/* find EI */
117 		found = 0;
118 		ch = fz_read_byte(ctx, stm);
119 		do
120 		{
121 			while (ch != 'E' && ch != EOF)
122 				ch = fz_read_byte(ctx, stm);
123 			if (ch == 'E')
124 			{
125 				ch = fz_read_byte(ctx, stm);
126 				if (ch == 'I')
127 				{
128 					ch = fz_peek_byte(ctx, stm);
129 					if (ch == ' ' || ch <= 32 || ch == '<' || ch == '/')
130 					{
131 						found = 1;
132 						break;
133 					}
134 				}
135 			}
136 		} while (ch != EOF);
137 		if (!found)
138 			fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error after inline image");
139 	}
140 	fz_always(ctx)
141 	{
142 		pdf_drop_obj(ctx, obj);
143 	}
144 	fz_catch(ctx)
145 	{
146 		fz_drop_image(ctx, img);
147 		fz_rethrow(ctx);
148 	}
149 
150 	return img;
151 }
152 
153 static void
pdf_process_extgstate(fz_context * ctx,pdf_processor * proc,pdf_csi * csi,pdf_obj * dict)154 pdf_process_extgstate(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, pdf_obj *dict)
155 {
156 	pdf_obj *obj;
157 
158 	obj = pdf_dict_get(ctx, dict, PDF_NAME(LW));
159 	if (pdf_is_number(ctx, obj) && proc->op_w)
160 		proc->op_w(ctx, proc, pdf_to_real(ctx, obj));
161 
162 	obj = pdf_dict_get(ctx, dict, PDF_NAME(LC));
163 	if (pdf_is_int(ctx, obj) && proc->op_J)
164 		proc->op_J(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2));
165 
166 	obj = pdf_dict_get(ctx, dict, PDF_NAME(LJ));
167 	if (pdf_is_int(ctx, obj) && proc->op_j)
168 		proc->op_j(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2));
169 
170 	obj = pdf_dict_get(ctx, dict, PDF_NAME(ML));
171 	if (pdf_is_number(ctx, obj) && proc->op_M)
172 		proc->op_M(ctx, proc, pdf_to_real(ctx, obj));
173 
174 	obj = pdf_dict_get(ctx, dict, PDF_NAME(D));
175 	if (pdf_is_array(ctx, obj) && proc->op_d)
176 	{
177 		pdf_obj *dash_array = pdf_array_get(ctx, obj, 0);
178 		pdf_obj *dash_phase = pdf_array_get(ctx, obj, 1);
179 		proc->op_d(ctx, proc, dash_array, pdf_to_real(ctx, dash_phase));
180 	}
181 
182 	obj = pdf_dict_get(ctx, dict, PDF_NAME(RI));
183 	if (pdf_is_name(ctx, obj) && proc->op_ri)
184 		proc->op_ri(ctx, proc, pdf_to_name(ctx, obj));
185 
186 	obj = pdf_dict_get(ctx, dict, PDF_NAME(FL));
187 	if (pdf_is_number(ctx, obj) && proc->op_i)
188 		proc->op_i(ctx, proc, pdf_to_real(ctx, obj));
189 
190 	obj = pdf_dict_get(ctx, dict, PDF_NAME(Font));
191 	if (pdf_is_array(ctx, obj) && proc->op_Tf)
192 	{
193 		pdf_obj *font_ref = pdf_array_get(ctx, obj, 0);
194 		pdf_obj *font_size = pdf_array_get(ctx, obj, 1);
195 		pdf_font_desc *font;
196 		if (pdf_is_dict(ctx, font_ref))
197 			font = pdf_try_load_font(ctx, csi->doc, csi->rdb, font_ref, csi->cookie);
198 		else
199 			font = pdf_load_hail_mary_font(ctx, csi->doc);
200 		fz_try(ctx)
201 			proc->op_Tf(ctx, proc, "ExtGState", font, pdf_to_real(ctx, font_size));
202 		fz_always(ctx)
203 			pdf_drop_font(ctx, font);
204 		fz_catch(ctx)
205 			fz_rethrow(ctx);
206 	}
207 
208 	/* overprint and color management */
209 
210 	obj = pdf_dict_get(ctx, dict, PDF_NAME(OP));
211 	if (pdf_is_bool(ctx, obj) && proc->op_gs_OP)
212 		proc->op_gs_OP(ctx, proc, pdf_to_bool(ctx, obj));
213 
214 	obj = pdf_dict_get(ctx, dict, PDF_NAME(op));
215 	if (pdf_is_bool(ctx, obj) && proc->op_gs_op)
216 		proc->op_gs_op(ctx, proc, pdf_to_bool(ctx, obj));
217 
218 	obj = pdf_dict_get(ctx, dict, PDF_NAME(OPM));
219 	if (pdf_is_int(ctx, obj) && proc->op_gs_OPM)
220 		proc->op_gs_OPM(ctx, proc, pdf_to_int(ctx, obj));
221 
222 	obj = pdf_dict_get(ctx, dict, PDF_NAME(UseBlackPtComp));
223 	if (pdf_is_name(ctx, obj) && proc->op_gs_UseBlackPtComp)
224 		proc->op_gs_UseBlackPtComp(ctx, proc, obj);
225 
226 	/* transfer functions */
227 
228 	obj = pdf_dict_get(ctx, dict, PDF_NAME(TR2));
229 	if (pdf_is_name(ctx, obj))
230 		if (!pdf_name_eq(ctx, obj, PDF_NAME(Identity)) && !pdf_name_eq(ctx, obj, PDF_NAME(Default)))
231 			fz_warn(ctx, "ignoring transfer function");
232 	if (!obj) /* TR is ignored in the presence of TR2 */
233 	{
234 		pdf_obj *tr = pdf_dict_get(ctx, dict, PDF_NAME(TR));
235 		if (pdf_is_name(ctx, tr))
236 			if (!pdf_name_eq(ctx, tr, PDF_NAME(Identity)))
237 				fz_warn(ctx, "ignoring transfer function");
238 	}
239 
240 	/* transparency state */
241 
242 	obj = pdf_dict_get(ctx, dict, PDF_NAME(CA));
243 	if (pdf_is_number(ctx, obj) && proc->op_gs_CA)
244 		proc->op_gs_CA(ctx, proc, pdf_to_real(ctx, obj));
245 
246 	obj = pdf_dict_get(ctx, dict, PDF_NAME(ca));
247 	if (pdf_is_number(ctx, obj) && proc->op_gs_ca)
248 		proc->op_gs_ca(ctx, proc, pdf_to_real(ctx, obj));
249 
250 	obj = pdf_dict_get(ctx, dict, PDF_NAME(BM));
251 	if (pdf_is_array(ctx, obj))
252 		obj = pdf_array_get(ctx, obj, 0);
253 	if (pdf_is_name(ctx, obj) && proc->op_gs_BM)
254 		proc->op_gs_BM(ctx, proc, pdf_to_name(ctx, obj));
255 
256 	obj = pdf_dict_get(ctx, dict, PDF_NAME(SMask));
257 	if (proc->op_gs_SMask)
258 	{
259 		if (pdf_is_dict(ctx, obj))
260 		{
261 			pdf_obj *xobj, *s, *bc, *tr;
262 			float softmask_bc[FZ_MAX_COLORS];
263 			fz_colorspace *colorspace;
264 			int colorspace_n = 1;
265 			int k, luminosity;
266 
267 			xobj = pdf_dict_get(ctx, obj, PDF_NAME(G));
268 
269 			colorspace = pdf_xobject_colorspace(ctx, xobj);
270 			if (colorspace)
271 				colorspace_n = fz_colorspace_n(ctx, colorspace);
272 
273 			/* Default background color is black. */
274 			for (k = 0; k < colorspace_n; k++)
275 				softmask_bc[k] = 0;
276 			/* Which in CMYK means not all zeros! This should really be
277 			 * a test for subtractive color spaces, but this will have
278 			 * to do for now. */
279 			if (fz_colorspace_is_cmyk(ctx, colorspace))
280 				softmask_bc[3] = 1.0f;
281 			fz_drop_colorspace(ctx, colorspace);
282 
283 			bc = pdf_dict_get(ctx, obj, PDF_NAME(BC));
284 			if (pdf_is_array(ctx, bc))
285 			{
286 				for (k = 0; k < colorspace_n; k++)
287 					softmask_bc[k] = pdf_array_get_real(ctx, bc, k);
288 			}
289 
290 			s = pdf_dict_get(ctx, obj, PDF_NAME(S));
291 			if (pdf_name_eq(ctx, s, PDF_NAME(Luminosity)))
292 				luminosity = 1;
293 			else
294 				luminosity = 0;
295 
296 			tr = pdf_dict_get(ctx, obj, PDF_NAME(TR));
297 			if (tr && !pdf_name_eq(ctx, tr, PDF_NAME(Identity)))
298 				fz_warn(ctx, "ignoring transfer function");
299 
300 			proc->op_gs_SMask(ctx, proc, xobj, csi->rdb, softmask_bc, luminosity);
301 		}
302 		else if (pdf_is_name(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(None)))
303 		{
304 			proc->op_gs_SMask(ctx, proc, NULL, NULL, NULL, 0);
305 		}
306 	}
307 }
308 
309 static void
pdf_process_Do(fz_context * ctx,pdf_processor * proc,pdf_csi * csi)310 pdf_process_Do(fz_context *ctx, pdf_processor *proc, pdf_csi *csi)
311 {
312 	pdf_obj *xres, *xobj, *subtype;
313 
314 	xres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(XObject));
315 	xobj = pdf_dict_gets(ctx, xres, csi->name);
316 	if (!xobj)
317 		fz_throw(ctx, FZ_ERROR_MINOR, "cannot find XObject resource '%s'", csi->name);
318 	subtype = pdf_dict_get(ctx, xobj, PDF_NAME(Subtype));
319 	if (pdf_name_eq(ctx, subtype, PDF_NAME(Form)))
320 	{
321 		pdf_obj *st = pdf_dict_get(ctx, xobj, PDF_NAME(Subtype2));
322 		if (st)
323 			subtype = st;
324 	}
325 	if (!pdf_is_name(ctx, subtype))
326 		fz_throw(ctx, FZ_ERROR_MINOR, "no XObject subtype specified");
327 
328 	if (pdf_is_hidden_ocg(ctx, csi->doc->ocg, csi->rdb, proc->usage, pdf_dict_get(ctx, xobj, PDF_NAME(OC))))
329 		return;
330 
331 	if (pdf_name_eq(ctx, subtype, PDF_NAME(Form)))
332 	{
333 		if (proc->op_Do_form)
334 			proc->op_Do_form(ctx, proc, csi->name, xobj, csi->rdb);
335 	}
336 
337 	else if (pdf_name_eq(ctx, subtype, PDF_NAME(Image)))
338 	{
339 		if (proc->op_Do_image)
340 		{
341 			fz_image *image = pdf_load_image(ctx, csi->doc, xobj);
342 			fz_try(ctx)
343 				proc->op_Do_image(ctx, proc, csi->name, image);
344 			fz_always(ctx)
345 				fz_drop_image(ctx, image);
346 			fz_catch(ctx)
347 				fz_rethrow(ctx);
348 		}
349 	}
350 
351 	else if (!strcmp(pdf_to_name(ctx, subtype), "PS"))
352 		fz_warn(ctx, "ignoring XObject with subtype PS");
353 	else
354 		fz_warn(ctx, "ignoring XObject with unknown subtype: '%s'", pdf_to_name(ctx, subtype));
355 }
356 
357 static void
pdf_process_CS(fz_context * ctx,pdf_processor * proc,pdf_csi * csi,int stroke)358 pdf_process_CS(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, int stroke)
359 {
360 	if (!proc->op_CS || !proc->op_cs)
361 		return;
362 
363 	if (!strcmp(csi->name, "Pattern"))
364 	{
365 		if (stroke)
366 			proc->op_CS(ctx, proc, "Pattern", NULL);
367 		else
368 			proc->op_cs(ctx, proc, "Pattern", NULL);
369 	}
370 	else
371 	{
372 		fz_colorspace *cs;
373 
374 		if (!strcmp(csi->name, "DeviceGray"))
375 			cs = fz_keep_colorspace(ctx, fz_device_gray(ctx));
376 		else if (!strcmp(csi->name, "DeviceRGB"))
377 			cs = fz_keep_colorspace(ctx, fz_device_rgb(ctx));
378 		else if (!strcmp(csi->name, "DeviceCMYK"))
379 			cs = fz_keep_colorspace(ctx, fz_device_cmyk(ctx));
380 		else
381 		{
382 			pdf_obj *csres, *csobj;
383 			csres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(ColorSpace));
384 			csobj = pdf_dict_gets(ctx, csres, csi->name);
385 			if (!csobj)
386 				fz_throw(ctx, FZ_ERROR_MINOR, "cannot find ColorSpace resource '%s'", csi->name);
387 			cs = pdf_load_colorspace(ctx, csobj);
388 		}
389 
390 		fz_try(ctx)
391 		{
392 			if (stroke)
393 				proc->op_CS(ctx, proc, csi->name, cs);
394 			else
395 				proc->op_cs(ctx, proc, csi->name, cs);
396 		}
397 		fz_always(ctx)
398 			fz_drop_colorspace(ctx, cs);
399 		fz_catch(ctx)
400 			fz_rethrow(ctx);
401 	}
402 }
403 
404 static void
pdf_process_SC(fz_context * ctx,pdf_processor * proc,pdf_csi * csi,int stroke)405 pdf_process_SC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, int stroke)
406 {
407 	if (csi->name[0])
408 	{
409 		pdf_obj *patres, *patobj, *type;
410 
411 		patres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Pattern));
412 		patobj = pdf_dict_gets(ctx, patres, csi->name);
413 		if (!patobj)
414 			fz_throw(ctx, FZ_ERROR_MINOR, "cannot find Pattern resource '%s'", csi->name);
415 
416 		type = pdf_dict_get(ctx, patobj, PDF_NAME(PatternType));
417 
418 		if (pdf_to_int(ctx, type) == 1)
419 		{
420 			if (proc->op_SC_pattern && proc->op_sc_pattern)
421 			{
422 				pdf_pattern *pat = pdf_load_pattern(ctx, csi->doc, patobj);
423 				fz_try(ctx)
424 				{
425 					if (stroke)
426 						proc->op_SC_pattern(ctx, proc, csi->name, pat, csi->top, csi->stack);
427 					else
428 						proc->op_sc_pattern(ctx, proc, csi->name, pat, csi->top, csi->stack);
429 				}
430 				fz_always(ctx)
431 					pdf_drop_pattern(ctx, pat);
432 				fz_catch(ctx)
433 					fz_rethrow(ctx);
434 			}
435 		}
436 
437 		else if (pdf_to_int(ctx, type) == 2)
438 		{
439 			if (proc->op_SC_shade && proc->op_sc_shade)
440 			{
441 				fz_shade *shade = pdf_load_shading(ctx, csi->doc, patobj);
442 				fz_try(ctx)
443 				{
444 					if (stroke)
445 						proc->op_SC_shade(ctx, proc, csi->name, shade);
446 					else
447 						proc->op_sc_shade(ctx, proc, csi->name, shade);
448 				}
449 				fz_always(ctx)
450 					fz_drop_shade(ctx, shade);
451 				fz_catch(ctx)
452 					fz_rethrow(ctx);
453 			}
454 		}
455 
456 		else
457 		{
458 			fz_throw(ctx, FZ_ERROR_MINOR, "unknown pattern type: %d", pdf_to_int(ctx, type));
459 		}
460 	}
461 
462 	else
463 	{
464 		if (proc->op_SC_color && proc->op_sc_color)
465 		{
466 			if (stroke)
467 				proc->op_SC_color(ctx, proc, csi->top, csi->stack);
468 			else
469 				proc->op_sc_color(ctx, proc, csi->top, csi->stack);
470 		}
471 	}
472 }
473 
474 static pdf_obj *
resolve_properties(fz_context * ctx,pdf_csi * csi,pdf_obj * obj)475 resolve_properties(fz_context *ctx, pdf_csi *csi, pdf_obj *obj)
476 {
477 	if (pdf_is_name(ctx, obj))
478 		return pdf_dict_get(ctx, pdf_dict_get(ctx, csi->rdb, PDF_NAME(Properties)), obj);
479 	else
480 		return obj;
481 }
482 
483 static void
pdf_process_BDC(fz_context * ctx,pdf_processor * proc,pdf_csi * csi)484 pdf_process_BDC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi)
485 {
486 	if (proc->op_BDC)
487 		proc->op_BDC(ctx, proc, csi->name, csi->obj, resolve_properties(ctx, csi, csi->obj));
488 
489 	/* Already hidden, no need to look further */
490 	if (proc->hidden > 0)
491 	{
492 		++proc->hidden;
493 		return;
494 	}
495 
496 	/* We only look at OC groups here */
497 	if (strcmp(csi->name, "OC"))
498 		return;
499 
500 	if (pdf_is_hidden_ocg(ctx, csi->doc->ocg, csi->rdb, proc->usage, csi->obj))
501 		++proc->hidden;
502 }
503 
504 static void
pdf_process_BMC(fz_context * ctx,pdf_processor * proc,pdf_csi * csi,const char * name)505 pdf_process_BMC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, const char *name)
506 {
507 	if (proc->op_BMC)
508 		proc->op_BMC(ctx, proc, name);
509 	if (proc->hidden > 0)
510 		++proc->hidden;
511 }
512 
513 static void
pdf_process_EMC(fz_context * ctx,pdf_processor * proc,pdf_csi * csi)514 pdf_process_EMC(fz_context *ctx, pdf_processor *proc, pdf_csi *csi)
515 {
516 	if (proc->op_EMC)
517 		proc->op_EMC(ctx, proc);
518 	if (proc->hidden > 0)
519 		--proc->hidden;
520 }
521 
522 static void
pdf_process_gsave(fz_context * ctx,pdf_processor * proc,pdf_csi * csi)523 pdf_process_gsave(fz_context *ctx, pdf_processor *proc, pdf_csi *csi)
524 {
525 	if (proc->op_q)
526 		proc->op_q(ctx, proc);
527 	++csi->gstate;
528 }
529 
530 static void
pdf_process_grestore(fz_context * ctx,pdf_processor * proc,pdf_csi * csi)531 pdf_process_grestore(fz_context *ctx, pdf_processor *proc, pdf_csi *csi)
532 {
533 	if (csi->gstate > 0)
534 	{
535 		if (proc->op_Q)
536 			proc->op_Q(ctx, proc);
537 		--csi->gstate;
538 	}
539 }
540 
541 static void
pdf_process_end(fz_context * ctx,pdf_processor * proc,pdf_csi * csi)542 pdf_process_end(fz_context *ctx, pdf_processor *proc, pdf_csi *csi)
543 {
544 	while (csi->gstate > 0)
545 		pdf_process_grestore(ctx, proc, csi);
546 	if (proc->op_END)
547 		proc->op_END(ctx, proc);
548 }
549 
is_known_bad_word(const char * word)550 static int is_known_bad_word(const char *word)
551 {
552 	switch (*word)
553 	{
554 	case 'I': return !strcmp(word, "Infinity");
555 	case 'N': return !strcmp(word, "NaN");
556 	case 'i': return !strcmp(word, "inf");
557 	case 'n': return !strcmp(word, "nan");
558 	}
559 	return 0;
560 }
561 
562 #define A(a) (a)
563 #define B(a,b) (a | b << 8)
564 #define C(a,b,c) (a | b << 8 | c << 16)
565 
566 static void
pdf_process_keyword(fz_context * ctx,pdf_processor * proc,pdf_csi * csi,fz_stream * stm,char * word)567 pdf_process_keyword(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream *stm, char *word)
568 {
569 	float *s = csi->stack;
570 	char csname[40];
571 	int key;
572 
573 	key = word[0];
574 	if (word[1])
575 	{
576 		key |= word[1] << 8;
577 		if (word[2])
578 		{
579 			key |= word[2] << 16;
580 			if (word[3])
581 				key = 0;
582 		}
583 	}
584 
585 	switch (key)
586 	{
587 	default:
588 		if (!csi->xbalance)
589 		{
590 			if (is_known_bad_word(word))
591 				fz_throw(ctx, FZ_ERROR_MINOR, "unknown keyword: '%s'", word);
592 			else
593 				fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown keyword: '%s'", word);
594 		}
595 		break;
596 
597 	/* general graphics state */
598 	case A('w'): if (proc->op_w) proc->op_w(ctx, proc, s[0]); break;
599 	case A('j'): if (proc->op_j) proc->op_j(ctx, proc, fz_clampi(s[0], 0, 2)); break;
600 	case A('J'): if (proc->op_J) proc->op_J(ctx, proc, fz_clampi(s[0], 0, 2)); break;
601 	case A('M'): if (proc->op_M) proc->op_M(ctx, proc, s[0]); break;
602 	case A('d'): if (proc->op_d) proc->op_d(ctx, proc, csi->obj, s[0]); break;
603 	case B('r','i'): if (proc->op_ri) proc->op_ri(ctx, proc, csi->name); break;
604 	case A('i'): if (proc->op_i) proc->op_i(ctx, proc, s[0]); break;
605 
606 	case B('g','s'):
607 		{
608 			pdf_obj *gsres, *gsobj;
609 			gsres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(ExtGState));
610 			gsobj = pdf_dict_gets(ctx, gsres, csi->name);
611 			if (!gsobj)
612 				fz_throw(ctx, FZ_ERROR_MINOR, "cannot find ExtGState resource '%s'", csi->name);
613 			if (proc->op_gs_begin)
614 				proc->op_gs_begin(ctx, proc, csi->name, gsobj);
615 			pdf_process_extgstate(ctx, proc, csi, gsobj);
616 			if (proc->op_gs_end)
617 				proc->op_gs_end(ctx, proc);
618 		}
619 		break;
620 
621 	/* special graphics state */
622 	case A('q'): pdf_process_gsave(ctx, proc, csi); break;
623 	case A('Q'): pdf_process_grestore(ctx, proc, csi); break;
624 	case B('c','m'): if (proc->op_cm) proc->op_cm(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break;
625 
626 	/* path construction */
627 	case A('m'): if (proc->op_m) proc->op_m(ctx, proc, s[0], s[1]); break;
628 	case A('l'): if (proc->op_l) proc->op_l(ctx, proc, s[0], s[1]); break;
629 	case A('c'): if (proc->op_c) proc->op_c(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break;
630 	case A('v'): if (proc->op_v) proc->op_v(ctx, proc, s[0], s[1], s[2], s[3]); break;
631 	case A('y'): if (proc->op_y) proc->op_y(ctx, proc, s[0], s[1], s[2], s[3]); break;
632 	case A('h'): if (proc->op_h) proc->op_h(ctx, proc); break;
633 	case B('r','e'): if (proc->op_re) proc->op_re(ctx, proc, s[0], s[1], s[2], s[3]); break;
634 
635 	/* path painting */
636 	case A('S'): if (proc->op_S) proc->op_S(ctx, proc); break;
637 	case A('s'): if (proc->op_s) proc->op_s(ctx, proc); break;
638 	case A('F'): if (proc->op_F) proc->op_F(ctx, proc); break;
639 	case A('f'): if (proc->op_f) proc->op_f(ctx, proc); break;
640 	case B('f','*'): if (proc->op_fstar) proc->op_fstar(ctx, proc); break;
641 	case A('B'): if (proc->op_B) proc->op_B(ctx, proc); break;
642 	case B('B','*'): if (proc->op_Bstar) proc->op_Bstar(ctx, proc); break;
643 	case A('b'): if (proc->op_b) proc->op_b(ctx, proc); break;
644 	case B('b','*'): if (proc->op_bstar) proc->op_bstar(ctx, proc); break;
645 	case A('n'): if (proc->op_n) proc->op_n(ctx, proc); break;
646 
647 	/* path clipping */
648 	case A('W'): if (proc->op_W) proc->op_W(ctx, proc); break;
649 	case B('W','*'): if (proc->op_Wstar) proc->op_Wstar(ctx, proc); break;
650 
651 	/* text objects */
652 	case B('B','T'): csi->in_text = 1; if (proc->op_BT) proc->op_BT(ctx, proc); break;
653 	case B('E','T'): csi->in_text = 0; if (proc->op_ET) proc->op_ET(ctx, proc); break;
654 
655 	/* text state */
656 	case B('T','c'): if (proc->op_Tc) proc->op_Tc(ctx, proc, s[0]); break;
657 	case B('T','w'): if (proc->op_Tw) proc->op_Tw(ctx, proc, s[0]); break;
658 	case B('T','z'): if (proc->op_Tz) proc->op_Tz(ctx, proc, s[0]); break;
659 	case B('T','L'): if (proc->op_TL) proc->op_TL(ctx, proc, s[0]); break;
660 	case B('T','r'): if (proc->op_Tr) proc->op_Tr(ctx, proc, s[0]); break;
661 	case B('T','s'): if (proc->op_Ts) proc->op_Ts(ctx, proc, s[0]); break;
662 
663 	case B('T','f'):
664 		if (proc->op_Tf)
665 		{
666 			pdf_obj *fontres, *fontobj;
667 			pdf_font_desc *font;
668 			fontres = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Font));
669 			fontobj = pdf_dict_gets(ctx, fontres, csi->name);
670 			if (pdf_is_dict(ctx, fontobj))
671 				font = pdf_try_load_font(ctx, csi->doc, csi->rdb, fontobj, csi->cookie);
672 			else
673 				font = pdf_load_hail_mary_font(ctx, csi->doc);
674 			fz_try(ctx)
675 				proc->op_Tf(ctx, proc, csi->name, font, s[0]);
676 			fz_always(ctx)
677 				pdf_drop_font(ctx, font);
678 			fz_catch(ctx)
679 				fz_rethrow(ctx);
680 		}
681 		break;
682 
683 	/* text positioning */
684 	case B('T','d'): if (proc->op_Td) proc->op_Td(ctx, proc, s[0], s[1]); break;
685 	case B('T','D'): if (proc->op_TD) proc->op_TD(ctx, proc, s[0], s[1]); break;
686 	case B('T','m'): if (proc->op_Tm) proc->op_Tm(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break;
687 	case B('T','*'): if (proc->op_Tstar) proc->op_Tstar(ctx, proc); break;
688 
689 	/* text showing */
690 	case B('T','J'): if (proc->op_TJ) proc->op_TJ(ctx, proc, csi->obj); break;
691 	case B('T','j'):
692 		if (proc->op_Tj)
693 		{
694 			if (csi->string_len > 0)
695 				proc->op_Tj(ctx, proc, csi->string, csi->string_len);
696 			else
697 				proc->op_Tj(ctx, proc, pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj));
698 		}
699 		break;
700 	case A('\''):
701 		if (proc->op_squote)
702 		{
703 			if (csi->string_len > 0)
704 				proc->op_squote(ctx, proc, csi->string, csi->string_len);
705 			else
706 				proc->op_squote(ctx, proc, pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj));
707 		}
708 		break;
709 	case A('"'):
710 		if (proc->op_dquote)
711 		{
712 			if (csi->string_len > 0)
713 				proc->op_dquote(ctx, proc, s[0], s[1], csi->string, csi->string_len);
714 			else
715 				proc->op_dquote(ctx, proc, s[0], s[1], pdf_to_str_buf(ctx, csi->obj), pdf_to_str_len(ctx, csi->obj));
716 		}
717 		break;
718 
719 	/* type 3 fonts */
720 	case B('d','0'): if (proc->op_d0) proc->op_d0(ctx, proc, s[0], s[1]); break;
721 	case B('d','1'): if (proc->op_d1) proc->op_d1(ctx, proc, s[0], s[1], s[2], s[3], s[4], s[5]); break;
722 
723 	/* color */
724 	case B('C','S'): pdf_process_CS(ctx, proc, csi, 1); break;
725 	case B('c','s'): pdf_process_CS(ctx, proc, csi, 0); break;
726 	case B('S','C'): pdf_process_SC(ctx, proc, csi, 1); break;
727 	case B('s','c'): pdf_process_SC(ctx, proc, csi, 0); break;
728 	case C('S','C','N'): pdf_process_SC(ctx, proc, csi, 1); break;
729 	case C('s','c','n'): pdf_process_SC(ctx, proc, csi, 0); break;
730 
731 	case A('G'): if (proc->op_G) proc->op_G(ctx, proc, s[0]); break;
732 	case A('g'): if (proc->op_g) proc->op_g(ctx, proc, s[0]); break;
733 	case B('R','G'): if (proc->op_RG) proc->op_RG(ctx, proc, s[0], s[1], s[2]); break;
734 	case B('r','g'): if (proc->op_rg) proc->op_rg(ctx, proc, s[0], s[1], s[2]); break;
735 	case A('K'): if (proc->op_K) proc->op_K(ctx, proc, s[0], s[1], s[2], s[3]); break;
736 	case A('k'): if (proc->op_k) proc->op_k(ctx, proc, s[0], s[1], s[2], s[3]); break;
737 
738 	/* shadings, images, xobjects */
739 	case B('B','I'):
740 		{
741 			fz_image *img = parse_inline_image(ctx, csi, stm, csname, sizeof csname);
742 			fz_try(ctx)
743 			{
744 				if (proc->op_BI)
745 					proc->op_BI(ctx, proc, img, csname[0] ? csname : NULL);
746 			}
747 			fz_always(ctx)
748 				fz_drop_image(ctx, img);
749 			fz_catch(ctx)
750 				fz_rethrow(ctx);
751 		}
752 		break;
753 
754 	case B('s','h'):
755 		if (proc->op_sh)
756 		{
757 			pdf_obj *shaderes, *shadeobj;
758 			fz_shade *shade;
759 			shaderes = pdf_dict_get(ctx, csi->rdb, PDF_NAME(Shading));
760 			shadeobj = pdf_dict_gets(ctx, shaderes, csi->name);
761 			if (!shadeobj)
762 				fz_throw(ctx, FZ_ERROR_MINOR, "cannot find Shading resource '%s'", csi->name);
763 			shade = pdf_load_shading(ctx, csi->doc, shadeobj);
764 			fz_try(ctx)
765 				proc->op_sh(ctx, proc, csi->name, shade);
766 			fz_always(ctx)
767 				fz_drop_shade(ctx, shade);
768 			fz_catch(ctx)
769 				fz_rethrow(ctx);
770 		}
771 		break;
772 
773 	case B('D','o'): pdf_process_Do(ctx, proc, csi); break;
774 
775 	/* marked content */
776 	case B('M','P'): if (proc->op_MP) proc->op_MP(ctx, proc, csi->name); break;
777 	case B('D','P'): if (proc->op_DP) proc->op_DP(ctx, proc, csi->name, csi->obj, resolve_properties(ctx, csi, csi->obj)); break;
778 	case C('B','M','C'): pdf_process_BMC(ctx, proc, csi, csi->name); break;
779 	case C('B','D','C'): pdf_process_BDC(ctx, proc, csi); break;
780 	case C('E','M','C'): pdf_process_EMC(ctx, proc, csi); break;
781 
782 	/* compatibility */
783 	case B('B','X'): ++csi->xbalance; if (proc->op_BX) proc->op_BX(ctx, proc); break;
784 	case B('E','X'): --csi->xbalance; if (proc->op_EX) proc->op_EX(ctx, proc); break;
785 	}
786 }
787 
788 static void
pdf_process_stream(fz_context * ctx,pdf_processor * proc,pdf_csi * csi,fz_stream * stm)789 pdf_process_stream(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream *stm)
790 {
791 	pdf_document *doc = csi->doc;
792 	pdf_lexbuf *buf = csi->buf;
793 	fz_cookie *cookie = csi->cookie;
794 
795 	pdf_token tok = PDF_TOK_ERROR;
796 	int in_text_array = 0;
797 	int syntax_errors = 0;
798 
799 	/* make sure we have a clean slate if we come here from flush_text */
800 	pdf_clear_stack(ctx, csi);
801 
802 	fz_var(in_text_array);
803 	fz_var(tok);
804 
805 	if (cookie)
806 	{
807 		cookie->progress_max = -1;
808 		cookie->progress = 0;
809 	}
810 
811 	do
812 	{
813 		fz_try(ctx)
814 		{
815 			do
816 			{
817 				/* Check the cookie */
818 				if (cookie)
819 				{
820 					if (cookie->abort)
821 					{
822 						tok = PDF_TOK_EOF;
823 						break;
824 					}
825 					cookie->progress++;
826 				}
827 
828 				tok = pdf_lex(ctx, stm, buf);
829 
830 				if (in_text_array)
831 				{
832 					switch(tok)
833 					{
834 					case PDF_TOK_CLOSE_ARRAY:
835 						in_text_array = 0;
836 						break;
837 					case PDF_TOK_REAL:
838 						pdf_array_push_real(ctx, csi->obj, buf->f);
839 						break;
840 					case PDF_TOK_INT:
841 						pdf_array_push_int(ctx, csi->obj, buf->i);
842 						break;
843 					case PDF_TOK_STRING:
844 						pdf_array_push_string(ctx, csi->obj, buf->scratch, buf->len);
845 						break;
846 					case PDF_TOK_EOF:
847 						break;
848 					case PDF_TOK_KEYWORD:
849 						if (buf->scratch[0] == 'T' && (buf->scratch[1] == 'w' || buf->scratch[1] == 'c') && buf->scratch[2] == 0)
850 						{
851 							int n = pdf_array_len(ctx, csi->obj);
852 							if (n > 0)
853 							{
854 								pdf_obj *o = pdf_array_get(ctx, csi->obj, n-1);
855 								if (pdf_is_number(ctx, o))
856 								{
857 									csi->stack[0] = pdf_to_real(ctx, o);
858 									pdf_array_delete(ctx, csi->obj, n-1);
859 									pdf_process_keyword(ctx, proc, csi, stm, buf->scratch);
860 								}
861 							}
862 						}
863 						/* Deliberate Fallthrough! */
864 					default:
865 						fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in array");
866 					}
867 				}
868 				else switch (tok)
869 				{
870 				case PDF_TOK_ENDSTREAM:
871 				case PDF_TOK_EOF:
872 					tok = PDF_TOK_EOF;
873 					break;
874 
875 				case PDF_TOK_OPEN_ARRAY:
876 					if (csi->obj)
877 					{
878 						pdf_drop_obj(ctx, csi->obj);
879 						csi->obj = NULL;
880 					}
881 					if (csi->in_text)
882 					{
883 						in_text_array = 1;
884 						csi->obj = pdf_new_array(ctx, doc, 4);
885 					}
886 					else
887 					{
888 						csi->obj = pdf_parse_array(ctx, doc, stm, buf);
889 					}
890 					break;
891 
892 				case PDF_TOK_OPEN_DICT:
893 					if (csi->obj)
894 					{
895 						pdf_drop_obj(ctx, csi->obj);
896 						csi->obj = NULL;
897 					}
898 					csi->obj = pdf_parse_dict(ctx, doc, stm, buf);
899 					break;
900 
901 				case PDF_TOK_NAME:
902 					if (csi->name[0])
903 					{
904 						pdf_drop_obj(ctx, csi->obj);
905 						csi->obj = NULL;
906 						csi->obj = pdf_new_name(ctx, buf->scratch);
907 					}
908 					else
909 						fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name));
910 					break;
911 
912 				case PDF_TOK_INT:
913 					if (csi->top < (int)nelem(csi->stack)) {
914 						csi->stack[csi->top] = buf->i;
915 						csi->top ++;
916 					}
917 					else
918 						fz_throw(ctx, FZ_ERROR_SYNTAX, "stack overflow");
919 					break;
920 
921 				case PDF_TOK_REAL:
922 					if (csi->top < (int)nelem(csi->stack)) {
923 						csi->stack[csi->top] = buf->f;
924 						csi->top ++;
925 					}
926 					else
927 						fz_throw(ctx, FZ_ERROR_SYNTAX, "stack overflow");
928 					break;
929 
930 				case PDF_TOK_STRING:
931 					if (buf->len <= sizeof(csi->string))
932 					{
933 						memcpy(csi->string, buf->scratch, buf->len);
934 						csi->string_len = buf->len;
935 					}
936 					else
937 					{
938 						if (csi->obj)
939 						{
940 							pdf_drop_obj(ctx, csi->obj);
941 							csi->obj = NULL;
942 						}
943 						csi->obj = pdf_new_string(ctx, buf->scratch, buf->len);
944 					}
945 					break;
946 
947 				case PDF_TOK_KEYWORD:
948 					pdf_process_keyword(ctx, proc, csi, stm, buf->scratch);
949 					pdf_clear_stack(ctx, csi);
950 					break;
951 
952 				default:
953 					fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in content stream");
954 				}
955 			}
956 			while (tok != PDF_TOK_EOF);
957 		}
958 		fz_always(ctx)
959 		{
960 			pdf_clear_stack(ctx, csi);
961 		}
962 		fz_catch(ctx)
963 		{
964 			int caught = fz_caught(ctx);
965 			if (cookie)
966 			{
967 				if (caught == FZ_ERROR_TRYLATER)
968 				{
969 					cookie->incomplete++;
970 					tok = PDF_TOK_EOF;
971 				}
972 				else if (caught == FZ_ERROR_ABORT)
973 				{
974 					fz_rethrow(ctx);
975 				}
976 				else if (caught == FZ_ERROR_MINOR)
977 				{
978 					cookie->errors++;
979 				}
980 				else if (caught == FZ_ERROR_SYNTAX)
981 				{
982 					cookie->errors++;
983 					if (++syntax_errors >= MAX_SYNTAX_ERRORS)
984 					{
985 						fz_warn(ctx, "too many syntax errors; ignoring rest of page");
986 						tok = PDF_TOK_EOF;
987 					}
988 				}
989 				else
990 				{
991 					fz_rethrow(ctx);
992 				}
993 			}
994 			else
995 			{
996 				if (caught == FZ_ERROR_TRYLATER)
997 					tok = PDF_TOK_EOF;
998 				else if (caught == FZ_ERROR_ABORT)
999 					fz_rethrow(ctx);
1000 				else if (caught == FZ_ERROR_MINOR)
1001 					/* ignore minor errors */ ;
1002 				else if (caught == FZ_ERROR_SYNTAX)
1003 				{
1004 					if (++syntax_errors >= MAX_SYNTAX_ERRORS)
1005 					{
1006 						fz_warn(ctx, "too many syntax errors; ignoring rest of page");
1007 						tok = PDF_TOK_EOF;
1008 					}
1009 				}
1010 				else
1011 				{
1012 					fz_rethrow(ctx);
1013 				}
1014 			}
1015 
1016 			/* If we do catch an error, then reset ourselves to a base lexing state */
1017 			in_text_array = 0;
1018 		}
1019 	}
1020 	while (tok != PDF_TOK_EOF);
1021 }
1022 
1023 void
pdf_process_contents(fz_context * ctx,pdf_processor * proc,pdf_document * doc,pdf_obj * rdb,pdf_obj * stmobj,fz_cookie * cookie)1024 pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, pdf_obj *stmobj, fz_cookie *cookie)
1025 {
1026 	pdf_csi csi;
1027 	pdf_lexbuf buf;
1028 	fz_stream *stm = NULL;
1029 
1030 	if (!stmobj)
1031 		return;
1032 
1033 	fz_var(stm);
1034 
1035 	pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
1036 	pdf_init_csi(ctx, &csi, doc, rdb, &buf, cookie);
1037 
1038 	fz_try(ctx)
1039 	{
1040 		fz_defer_reap_start(ctx);
1041 		stm = pdf_open_contents_stream(ctx, doc, stmobj);
1042 		pdf_process_stream(ctx, proc, &csi, stm);
1043 		pdf_process_end(ctx, proc, &csi);
1044 	}
1045 	fz_always(ctx)
1046 	{
1047 		fz_defer_reap_end(ctx);
1048 		fz_drop_stream(ctx, stm);
1049 		pdf_clear_stack(ctx, &csi);
1050 		pdf_lexbuf_fin(ctx, &buf);
1051 	}
1052 	fz_catch(ctx)
1053 	{
1054 		proc->close_processor = NULL; /* aborted run, don't warn about unclosed processor */
1055 		fz_rethrow(ctx);
1056 	}
1057 }
1058 
1059 /* Bug 702543: It looks like certain types of annotation are never
1060  * printed. */
1061 static int
pdf_should_print_annot(fz_context * ctx,pdf_annot * annot)1062 pdf_should_print_annot(fz_context *ctx, pdf_annot *annot)
1063 {
1064 	enum pdf_annot_type type = pdf_annot_type(ctx, annot);
1065 
1066 	/* We may need to add more types here. */
1067 	if (type == PDF_ANNOT_FILE_ATTACHMENT)
1068 		return 0;
1069 
1070 	return 1;
1071 }
1072 
1073 void
pdf_process_annot(fz_context * ctx,pdf_processor * proc,pdf_document * doc,pdf_page * page,pdf_annot * annot,fz_cookie * cookie)1074 pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_cookie *cookie)
1075 {
1076 	int flags = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(F));
1077 
1078 	if (flags & (PDF_ANNOT_IS_INVISIBLE | PDF_ANNOT_IS_HIDDEN))
1079 		return;
1080 
1081 	/* popup annotations should never be drawn */
1082 	if (pdf_annot_type(ctx, annot) == PDF_ANNOT_POPUP)
1083 		return;
1084 
1085 	if (proc->usage)
1086 	{
1087 		if (!strcmp(proc->usage, "Print"))
1088 		{
1089 			if (!(flags & PDF_ANNOT_IS_PRINT))
1090 				return;
1091 			if (!pdf_should_print_annot(ctx, annot))
1092 				return;
1093 		}
1094 		if (!strcmp(proc->usage, "View") && (flags & PDF_ANNOT_IS_NO_VIEW))
1095 			return;
1096 	}
1097 
1098 	/* TODO: NoZoom and NoRotate */
1099 
1100 	/* XXX what resources, if any, to use for this check? */
1101 	if (pdf_is_hidden_ocg(ctx, doc->ocg, NULL, proc->usage, pdf_dict_get(ctx, annot->obj, PDF_NAME(OC))))
1102 		return;
1103 
1104 	if (proc->op_q && proc->op_cm && proc->op_Do_form && proc->op_Q && annot->ap)
1105 	{
1106 		fz_matrix matrix = pdf_annot_transform(ctx, annot);
1107 		proc->op_q(ctx, proc);
1108 		proc->op_cm(ctx, proc,
1109 			matrix.a, matrix.b,
1110 			matrix.c, matrix.d,
1111 			matrix.e, matrix.f);
1112 		proc->op_Do_form(ctx, proc, NULL, annot->ap, pdf_page_resources(ctx, page));
1113 		proc->op_Q(ctx, proc);
1114 	}
1115 }
1116 
1117 void
pdf_process_glyph(fz_context * ctx,pdf_processor * proc,pdf_document * doc,pdf_obj * rdb,fz_buffer * contents)1118 pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *rdb, fz_buffer *contents)
1119 {
1120 	pdf_csi csi;
1121 	pdf_lexbuf buf;
1122 	fz_stream *stm = NULL;
1123 
1124 	fz_var(stm);
1125 
1126 	if (!contents)
1127 		return;
1128 
1129 	pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
1130 	pdf_init_csi(ctx, &csi, doc, rdb, &buf, NULL);
1131 
1132 	fz_try(ctx)
1133 	{
1134 		stm = fz_open_buffer(ctx, contents);
1135 		pdf_process_stream(ctx, proc, &csi, stm);
1136 		pdf_process_end(ctx, proc, &csi);
1137 	}
1138 	fz_always(ctx)
1139 	{
1140 		fz_drop_stream(ctx, stm);
1141 		pdf_clear_stack(ctx, &csi);
1142 		pdf_lexbuf_fin(ctx, &buf);
1143 	}
1144 	fz_catch(ctx)
1145 	{
1146 		/* Note: Any SYNTAX errors should have been swallowed
1147 		 * by pdf_process_stream, but in case any escape from other
1148 		 * functions, recast the error type here to be safe. */
1149 		if (fz_caught(ctx) == FZ_ERROR_SYNTAX)
1150 			fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in content stream");
1151 		fz_rethrow(ctx);
1152 	}
1153 }
1154 
1155 void
pdf_tos_save(fz_context * ctx,pdf_text_object_state * tos,fz_matrix save[2])1156 pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2])
1157 {
1158 	save[0] = tos->tm;
1159 	save[1] = tos->tlm;
1160 }
1161 
1162 void
pdf_tos_restore(fz_context * ctx,pdf_text_object_state * tos,fz_matrix save[2])1163 pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2])
1164 {
1165 	tos->tm = save[0];
1166 	tos->tlm = save[1];
1167 }
1168 
1169 fz_text *
pdf_tos_get_text(fz_context * ctx,pdf_text_object_state * tos)1170 pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos)
1171 {
1172 	fz_text *text = tos->text;
1173 
1174 	tos->text = NULL;
1175 
1176 	return text;
1177 }
1178 
1179 void
pdf_tos_reset(fz_context * ctx,pdf_text_object_state * tos,int render)1180 pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render)
1181 {
1182 	tos->text = fz_new_text(ctx);
1183 	tos->text_mode = render;
1184 	tos->text_bbox = fz_empty_rect;
1185 }
1186 
1187 int
pdf_tos_make_trm(fz_context * ctx,pdf_text_object_state * tos,pdf_text_state * text,pdf_font_desc * fontdesc,int cid,fz_matrix * trm)1188 pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm)
1189 {
1190 	fz_matrix tsm;
1191 
1192 	tsm.a = text->size * text->scale;
1193 	tsm.b = 0;
1194 	tsm.c = 0;
1195 	tsm.d = text->size;
1196 	tsm.e = 0;
1197 	tsm.f = text->rise;
1198 
1199 	if (fontdesc->wmode == 0)
1200 	{
1201 		pdf_hmtx h = pdf_lookup_hmtx(ctx, fontdesc, cid);
1202 		float w0 = h.w * 0.001f;
1203 		tos->char_tx = (w0 * text->size + text->char_space) * text->scale;
1204 		tos->char_ty = 0;
1205 	}
1206 
1207 	if (fontdesc->wmode == 1)
1208 	{
1209 		pdf_vmtx v = pdf_lookup_vmtx(ctx, fontdesc, cid);
1210 		float w1 = v.w * 0.001f;
1211 		tsm.e -= v.x * fabsf(text->size) * 0.001f;
1212 		tsm.f -= v.y * text->size * 0.001f;
1213 		tos->char_tx = 0;
1214 		tos->char_ty = w1 * text->size + text->char_space;
1215 	}
1216 
1217 	*trm = fz_concat(tsm, tos->tm);
1218 
1219 	tos->cid = cid;
1220 	tos->gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1221 	tos->fontdesc = fontdesc;
1222 
1223 	/* Compensate for the glyph cache limited positioning precision */
1224 	tos->char_bbox = fz_expand_rect(fz_bound_glyph(ctx, fontdesc->font, tos->gid, *trm), 1);
1225 
1226 	return tos->gid;
1227 }
1228 
1229 void
pdf_tos_move_after_char(fz_context * ctx,pdf_text_object_state * tos)1230 pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos)
1231 {
1232 	tos->text_bbox = fz_union_rect(tos->text_bbox, tos->char_bbox);
1233 	tos->tm = fz_pre_translate(tos->tm, tos->char_tx, tos->char_ty);
1234 }
1235 
1236 void
pdf_tos_translate(pdf_text_object_state * tos,float tx,float ty)1237 pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty)
1238 {
1239 	tos->tlm = fz_pre_translate(tos->tlm, tx, ty);
1240 	tos->tm = tos->tlm;
1241 }
1242 
1243 void
pdf_tos_set_matrix(pdf_text_object_state * tos,float a,float b,float c,float d,float e,float f)1244 pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f)
1245 {
1246 	tos->tm.a = a;
1247 	tos->tm.b = b;
1248 	tos->tm.c = c;
1249 	tos->tm.d = d;
1250 	tos->tm.e = e;
1251 	tos->tm.f = f;
1252 	tos->tlm = tos->tm;
1253 }
1254 
1255 void
pdf_tos_newline(pdf_text_object_state * tos,float leading)1256 pdf_tos_newline(pdf_text_object_state *tos, float leading)
1257 {
1258 	tos->tlm = fz_pre_translate(tos->tlm, 0, -leading);
1259 	tos->tm = tos->tlm;
1260 }
1261