1 #ifndef PDF_INTERPRET_H
2 #define PDF_INTERPRET_H
3 
4 #include "mupdf/pdf/font.h"
5 #include "mupdf/pdf/resource.h"
6 
7 typedef struct pdf_gstate pdf_gstate;
8 typedef struct pdf_processor pdf_processor;
9 
10 void *pdf_new_processor(fz_context *ctx, int size);
11 void pdf_close_processor(fz_context *ctx, pdf_processor *proc);
12 void pdf_drop_processor(fz_context *ctx, pdf_processor *proc);
13 
14 struct pdf_processor
15 {
16 	void (*close_processor)(fz_context *ctx, pdf_processor *proc);
17 	void (*drop_processor)(fz_context *ctx, pdf_processor *proc);
18 
19 	/* general graphics state */
20 	void (*op_w)(fz_context *ctx, pdf_processor *proc, float linewidth);
21 	void (*op_j)(fz_context *ctx, pdf_processor *proc, int linejoin);
22 	void (*op_J)(fz_context *ctx, pdf_processor *proc, int linecap);
23 	void (*op_M)(fz_context *ctx, pdf_processor *proc, float miterlimit);
24 	void (*op_d)(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase);
25 	void (*op_ri)(fz_context *ctx, pdf_processor *proc, const char *intent);
26 	void (*op_i)(fz_context *ctx, pdf_processor *proc, float flatness);
27 
28 	void (*op_gs_begin)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate);
29 	void (*op_gs_BM)(fz_context *ctx, pdf_processor *proc, const char *blendmode);
30 	void (*op_gs_ca)(fz_context *ctx, pdf_processor *proc, float alpha);
31 	void (*op_gs_CA)(fz_context *ctx, pdf_processor *proc, float alpha);
32 	void (*op_gs_SMask)(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, pdf_obj *page_resources, float *bc, int luminosity);
33 	void (*op_gs_end)(fz_context *ctx, pdf_processor *proc);
34 
35 	/* special graphics state */
36 	void (*op_q)(fz_context *ctx, pdf_processor *proc);
37 	void (*op_Q)(fz_context *ctx, pdf_processor *proc);
38 	void (*op_cm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f);
39 
40 	/* path construction */
41 	void (*op_m)(fz_context *ctx, pdf_processor *proc, float x, float y);
42 	void (*op_l)(fz_context *ctx, pdf_processor *proc, float x, float y);
43 	void (*op_c)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3);
44 	void (*op_v)(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3);
45 	void (*op_y)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3);
46 	void (*op_h)(fz_context *ctx, pdf_processor *proc);
47 	void (*op_re)(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h);
48 
49 	/* path painting */
50 	void (*op_S)(fz_context *ctx, pdf_processor *proc);
51 	void (*op_s)(fz_context *ctx, pdf_processor *proc);
52 	void (*op_F)(fz_context *ctx, pdf_processor *proc);
53 	void (*op_f)(fz_context *ctx, pdf_processor *proc);
54 	void (*op_fstar)(fz_context *ctx, pdf_processor *proc);
55 	void (*op_B)(fz_context *ctx, pdf_processor *proc);
56 	void (*op_Bstar)(fz_context *ctx, pdf_processor *proc);
57 	void (*op_b)(fz_context *ctx, pdf_processor *proc);
58 	void (*op_bstar)(fz_context *ctx, pdf_processor *proc);
59 	void (*op_n)(fz_context *ctx, pdf_processor *proc);
60 
61 	/* clipping paths */
62 	void (*op_W)(fz_context *ctx, pdf_processor *proc);
63 	void (*op_Wstar)(fz_context *ctx, pdf_processor *proc);
64 
65 	/* text objects */
66 	void (*op_BT)(fz_context *ctx, pdf_processor *proc);
67 	void (*op_ET)(fz_context *ctx, pdf_processor *proc);
68 
69 	/* text state */
70 	void (*op_Tc)(fz_context *ctx, pdf_processor *proc, float charspace);
71 	void (*op_Tw)(fz_context *ctx, pdf_processor *proc, float wordspace);
72 	void (*op_Tz)(fz_context *ctx, pdf_processor *proc, float scale);
73 	void (*op_TL)(fz_context *ctx, pdf_processor *proc, float leading);
74 	void (*op_Tf)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size);
75 	void (*op_Tr)(fz_context *ctx, pdf_processor *proc, int render);
76 	void (*op_Ts)(fz_context *ctx, pdf_processor *proc, float rise);
77 
78 	/* text positioning */
79 	void (*op_Td)(fz_context *ctx, pdf_processor *proc, float tx, float ty);
80 	void (*op_TD)(fz_context *ctx, pdf_processor *proc, float tx, float ty);
81 	void (*op_Tm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f);
82 	void (*op_Tstar)(fz_context *ctx, pdf_processor *proc);
83 
84 	/* text showing */
85 	void (*op_TJ)(fz_context *ctx, pdf_processor *proc, pdf_obj *array);
86 	void (*op_Tj)(fz_context *ctx, pdf_processor *proc, char *str, size_t len);
87 	void (*op_squote)(fz_context *ctx, pdf_processor *proc, char *str, size_t len);
88 	void (*op_dquote)(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len);
89 
90 	/* type 3 fonts */
91 	void (*op_d0)(fz_context *ctx, pdf_processor *proc, float wx, float wy);
92 	void (*op_d1)(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury);
93 
94 	/* color */
95 	void (*op_CS)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs);
96 	void (*op_cs)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs);
97 	void (*op_SC_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color);
98 	void (*op_sc_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color);
99 	void (*op_SC_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
100 	void (*op_sc_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
101 	void (*op_SC_color)(fz_context *ctx, pdf_processor *proc, int n, float *color);
102 	void (*op_sc_color)(fz_context *ctx, pdf_processor *proc, int n, float *color);
103 
104 	void (*op_G)(fz_context *ctx, pdf_processor *proc, float g);
105 	void (*op_g)(fz_context *ctx, pdf_processor *proc, float g);
106 	void (*op_RG)(fz_context *ctx, pdf_processor *proc, float r, float g, float b);
107 	void (*op_rg)(fz_context *ctx, pdf_processor *proc, float r, float g, float b);
108 	void (*op_K)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k);
109 	void (*op_k)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k);
110 
111 	/* shadings, images, xobjects */
112 	void (*op_BI)(fz_context *ctx, pdf_processor *proc, fz_image *image, const char *colorspace_name);
113 	void (*op_sh)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade);
114 	void (*op_Do_image)(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image);
115 	void (*op_Do_form)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *form, pdf_obj *page_resources);
116 
117 	/* marked content */
118 	void (*op_MP)(fz_context *ctx, pdf_processor *proc, const char *tag);
119 	void (*op_DP)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked);
120 	void (*op_BMC)(fz_context *ctx, pdf_processor *proc, const char *tag);
121 	void (*op_BDC)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked);
122 	void (*op_EMC)(fz_context *ctx, pdf_processor *proc);
123 
124 	/* compatibility */
125 	void (*op_BX)(fz_context *ctx, pdf_processor *proc);
126 	void (*op_EX)(fz_context *ctx, pdf_processor *proc);
127 
128 	/* Virtual ops for ExtGState entries */
129 	void (*op_gs_OP)(fz_context *ctx, pdf_processor *proc, int b);
130 	void (*op_gs_op)(fz_context *ctx, pdf_processor *proc, int b);
131 	void (*op_gs_OPM)(fz_context *ctx, pdf_processor *proc, int i);
132 	void (*op_gs_UseBlackPtComp)(fz_context *ctx, pdf_processor *proc, pdf_obj *name);
133 
134 	/* END is used to signify end of stream (finalise and close down) */
135 	void (*op_END)(fz_context *ctx, pdf_processor *proc);
136 
137 	/* interpreter state that persists across content streams */
138 	const char *usage;
139 	int hidden;
140 };
141 
142 typedef struct
143 {
144 	/* input */
145 	pdf_document *doc;
146 	pdf_obj *rdb;
147 	pdf_lexbuf *buf;
148 	fz_cookie *cookie;
149 
150 	/* state */
151 	int gstate;
152 	int xbalance;
153 	int in_text;
154 	fz_rect d1_rect;
155 
156 	/* stack */
157 	pdf_obj *obj;
158 	char name[256];
159 	char string[256];
160 	size_t string_len;
161 	int top;
162 	float stack[32];
163 } pdf_csi;
164 
165 /* Functions to set up pdf_process structures */
166 
167 pdf_processor *pdf_new_run_processor(fz_context *ctx, fz_device *dev, fz_matrix ctm, const char *usage, pdf_gstate *gstate, fz_default_colorspaces *default_cs, fz_cookie *cookie);
168 
169 /*
170 	Create a buffer processor.
171 
172 	This collects the incoming PDF operator stream into an fz_buffer.
173 
174 	buffer: The (possibly empty) buffer to which operators will be
175 	appended.
176 
177 	ahxencode: If 0, then image streams will be send as binary,
178 	otherwise they will be asciihexencoded.
179 */
180 pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode);
181 
182 /*
183 	Create an output processor. This
184 	sends the incoming PDF operator stream to an fz_output stream.
185 
186 	out: The output stream to which operators will be sent.
187 
188 	ahxencode: If 0, then image streams will be send as binary,
189 	otherwise they will be asciihexencoded.
190 */
191 pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode);
192 
193 /*
194 	opaque: Opaque value that is passed to all the filter functions.
195 
196 	image_filter: A function called to assess whether a given
197 	image should be removed or not.
198 
199 	text_filter: A function called to assess whether a given
200 	character should be removed or not.
201 
202 	after_text_object: A function called after each text object.
203 	This allows the caller to insert some extra content if
204 	desired.
205 
206 	end_page: A function called at the end of a page.
207 	This allows the caller to insert some extra content after
208 	all other content.
209 
210 	sanitize: If false, will only clean the syntax. This disables all filtering!
211 
212 	recurse: Clean/sanitize/filter resources recursively.
213 
214 	instance_forms: Always recurse on XObject Form resources, but will
215 	create a new instance of each XObject Form that is used, filtered
216 	individually.
217 
218 	ascii: If true, escape all binary data in the output.
219 */
220 typedef struct
221 {
222 	void *opaque;
223 	fz_image *(*image_filter)(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image);
224 	int (*text_filter)(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox);
225 	void (*after_text_object)(fz_context *ctx, void *opaque, pdf_document *doc, pdf_processor *chain, fz_matrix ctm);
226 	void (*end_page)(fz_context *ctx, fz_buffer *buffer, void *arg);
227 
228 	int recurse;
229 	int instance_forms;
230 	int sanitize;
231 	int ascii;
232 } pdf_filter_options;
233 
234 /*
235 	Create a filter processor. This filters the PDF operators
236 	it is fed, and passes them down (with some changes) to the
237 	child filter.
238 
239 	The changes made by the filter are:
240 
241 	* No operations are allowed to change the top level gstate.
242 	Additional q/Q operators are inserted to prevent this.
243 
244 	* Repeated/unnecessary colour operators are removed (so,
245 	for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to
246 	"0.5 g")
247 
248 	The intention of these changes is to provide a simpler,
249 	but equivalent stream, repairing problems with mismatched
250 	operators, maintaining structure (such as BMC, EMC calls)
251 	and leaving the graphics state in an known (default) state
252 	so that subsequent operations (such as synthesising new
253 	operators to be appended to the stream) are easier.
254 
255 	The net graphical effect of the filtered operator stream
256 	should be identical to the incoming operator stream.
257 
258 	chain: The child processor to which the filtered operators
259 	will be fed.
260 
261 	old_res: The incoming resource dictionary.
262 
263 	new_res: An (initially empty) resource dictionary that will
264 	be populated by copying entries from the old dictionary to
265 	the new one as they are used. At the end therefore, this
266 	contains exactly those resource objects actually required.
267 
268 	The filter options struct allows you to filter objects using callbacks.
269 */
270 pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_document *doc, pdf_processor *chain, pdf_obj *old_res, pdf_obj *new_res, int struct_parents, fz_matrix transform, pdf_filter_options *filter);
271 pdf_obj *pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix ctm, pdf_filter_options *filter);
272 
273 /*
274 	Functions to actually process annotations, glyphs and general stream objects.
275 */
276 void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *obj, pdf_obj *res, fz_cookie *cookie);
277 void pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_cookie *cookie);
278 void pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *resources, fz_buffer *contents);
279 
280 /* Text handling helper functions */
281 typedef struct
282 {
283 	float char_space;
284 	float word_space;
285 	float scale;
286 	float leading;
287 	pdf_font_desc *font;
288 	float size;
289 	int render;
290 	float rise;
291 } pdf_text_state;
292 
293 typedef struct
294 {
295 	fz_text *text;
296 	fz_rect text_bbox;
297 	fz_matrix tlm;
298 	fz_matrix tm;
299 	int text_mode;
300 
301 	int cid;
302 	int gid;
303 	fz_rect char_bbox;
304 	pdf_font_desc *fontdesc;
305 	float char_tx;
306 	float char_ty;
307 } pdf_text_object_state;
308 
309 void pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]);
310 void pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]);
311 fz_text *pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos);
312 void pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render);
313 int pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm);
314 void pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos);
315 void pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty);
316 void pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f);
317 void pdf_tos_newline(pdf_text_object_state *tos, float leading);
318 
319 #endif
320