1 #ifndef PDF_INTERPRET_H 2 #define PDF_INTERPRET_H 3 4 #include "mupdf/pdf/font.h" 5 #include "mupdf/pdf/resource.h" 6 7 typedef struct pdf_gstate pdf_gstate; 8 typedef struct pdf_processor pdf_processor; 9 10 void *pdf_new_processor(fz_context *ctx, int size); 11 void pdf_close_processor(fz_context *ctx, pdf_processor *proc); 12 void pdf_drop_processor(fz_context *ctx, pdf_processor *proc); 13 14 struct pdf_processor 15 { 16 void (*close_processor)(fz_context *ctx, pdf_processor *proc); 17 void (*drop_processor)(fz_context *ctx, pdf_processor *proc); 18 19 /* general graphics state */ 20 void (*op_w)(fz_context *ctx, pdf_processor *proc, float linewidth); 21 void (*op_j)(fz_context *ctx, pdf_processor *proc, int linejoin); 22 void (*op_J)(fz_context *ctx, pdf_processor *proc, int linecap); 23 void (*op_M)(fz_context *ctx, pdf_processor *proc, float miterlimit); 24 void (*op_d)(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase); 25 void (*op_ri)(fz_context *ctx, pdf_processor *proc, const char *intent); 26 void (*op_i)(fz_context *ctx, pdf_processor *proc, float flatness); 27 28 void (*op_gs_begin)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate); 29 void (*op_gs_BM)(fz_context *ctx, pdf_processor *proc, const char *blendmode); 30 void (*op_gs_ca)(fz_context *ctx, pdf_processor *proc, float alpha); 31 void (*op_gs_CA)(fz_context *ctx, pdf_processor *proc, float alpha); 32 void (*op_gs_SMask)(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, pdf_obj *page_resources, float *bc, int luminosity); 33 void (*op_gs_end)(fz_context *ctx, pdf_processor *proc); 34 35 /* special graphics state */ 36 void (*op_q)(fz_context *ctx, pdf_processor *proc); 37 void (*op_Q)(fz_context *ctx, pdf_processor *proc); 38 void (*op_cm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f); 39 40 /* path construction */ 41 void (*op_m)(fz_context *ctx, pdf_processor *proc, float x, float y); 42 void (*op_l)(fz_context *ctx, pdf_processor *proc, float x, float y); 43 void (*op_c)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3); 44 void (*op_v)(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3); 45 void (*op_y)(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3); 46 void (*op_h)(fz_context *ctx, pdf_processor *proc); 47 void (*op_re)(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h); 48 49 /* path painting */ 50 void (*op_S)(fz_context *ctx, pdf_processor *proc); 51 void (*op_s)(fz_context *ctx, pdf_processor *proc); 52 void (*op_F)(fz_context *ctx, pdf_processor *proc); 53 void (*op_f)(fz_context *ctx, pdf_processor *proc); 54 void (*op_fstar)(fz_context *ctx, pdf_processor *proc); 55 void (*op_B)(fz_context *ctx, pdf_processor *proc); 56 void (*op_Bstar)(fz_context *ctx, pdf_processor *proc); 57 void (*op_b)(fz_context *ctx, pdf_processor *proc); 58 void (*op_bstar)(fz_context *ctx, pdf_processor *proc); 59 void (*op_n)(fz_context *ctx, pdf_processor *proc); 60 61 /* clipping paths */ 62 void (*op_W)(fz_context *ctx, pdf_processor *proc); 63 void (*op_Wstar)(fz_context *ctx, pdf_processor *proc); 64 65 /* text objects */ 66 void (*op_BT)(fz_context *ctx, pdf_processor *proc); 67 void (*op_ET)(fz_context *ctx, pdf_processor *proc); 68 69 /* text state */ 70 void (*op_Tc)(fz_context *ctx, pdf_processor *proc, float charspace); 71 void (*op_Tw)(fz_context *ctx, pdf_processor *proc, float wordspace); 72 void (*op_Tz)(fz_context *ctx, pdf_processor *proc, float scale); 73 void (*op_TL)(fz_context *ctx, pdf_processor *proc, float leading); 74 void (*op_Tf)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size); 75 void (*op_Tr)(fz_context *ctx, pdf_processor *proc, int render); 76 void (*op_Ts)(fz_context *ctx, pdf_processor *proc, float rise); 77 78 /* text positioning */ 79 void (*op_Td)(fz_context *ctx, pdf_processor *proc, float tx, float ty); 80 void (*op_TD)(fz_context *ctx, pdf_processor *proc, float tx, float ty); 81 void (*op_Tm)(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f); 82 void (*op_Tstar)(fz_context *ctx, pdf_processor *proc); 83 84 /* text showing */ 85 void (*op_TJ)(fz_context *ctx, pdf_processor *proc, pdf_obj *array); 86 void (*op_Tj)(fz_context *ctx, pdf_processor *proc, char *str, size_t len); 87 void (*op_squote)(fz_context *ctx, pdf_processor *proc, char *str, size_t len); 88 void (*op_dquote)(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len); 89 90 /* type 3 fonts */ 91 void (*op_d0)(fz_context *ctx, pdf_processor *proc, float wx, float wy); 92 void (*op_d1)(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury); 93 94 /* color */ 95 void (*op_CS)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs); 96 void (*op_cs)(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs); 97 void (*op_SC_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color); 98 void (*op_sc_pattern)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color); 99 void (*op_SC_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade); 100 void (*op_sc_shade)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade); 101 void (*op_SC_color)(fz_context *ctx, pdf_processor *proc, int n, float *color); 102 void (*op_sc_color)(fz_context *ctx, pdf_processor *proc, int n, float *color); 103 104 void (*op_G)(fz_context *ctx, pdf_processor *proc, float g); 105 void (*op_g)(fz_context *ctx, pdf_processor *proc, float g); 106 void (*op_RG)(fz_context *ctx, pdf_processor *proc, float r, float g, float b); 107 void (*op_rg)(fz_context *ctx, pdf_processor *proc, float r, float g, float b); 108 void (*op_K)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k); 109 void (*op_k)(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k); 110 111 /* shadings, images, xobjects */ 112 void (*op_BI)(fz_context *ctx, pdf_processor *proc, fz_image *image, const char *colorspace_name); 113 void (*op_sh)(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade); 114 void (*op_Do_image)(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image); 115 void (*op_Do_form)(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *form, pdf_obj *page_resources); 116 117 /* marked content */ 118 void (*op_MP)(fz_context *ctx, pdf_processor *proc, const char *tag); 119 void (*op_DP)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked); 120 void (*op_BMC)(fz_context *ctx, pdf_processor *proc, const char *tag); 121 void (*op_BDC)(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked); 122 void (*op_EMC)(fz_context *ctx, pdf_processor *proc); 123 124 /* compatibility */ 125 void (*op_BX)(fz_context *ctx, pdf_processor *proc); 126 void (*op_EX)(fz_context *ctx, pdf_processor *proc); 127 128 /* Virtual ops for ExtGState entries */ 129 void (*op_gs_OP)(fz_context *ctx, pdf_processor *proc, int b); 130 void (*op_gs_op)(fz_context *ctx, pdf_processor *proc, int b); 131 void (*op_gs_OPM)(fz_context *ctx, pdf_processor *proc, int i); 132 void (*op_gs_UseBlackPtComp)(fz_context *ctx, pdf_processor *proc, pdf_obj *name); 133 134 /* END is used to signify end of stream (finalise and close down) */ 135 void (*op_END)(fz_context *ctx, pdf_processor *proc); 136 137 /* interpreter state that persists across content streams */ 138 const char *usage; 139 int hidden; 140 }; 141 142 typedef struct 143 { 144 /* input */ 145 pdf_document *doc; 146 pdf_obj *rdb; 147 pdf_lexbuf *buf; 148 fz_cookie *cookie; 149 150 /* state */ 151 int gstate; 152 int xbalance; 153 int in_text; 154 fz_rect d1_rect; 155 156 /* stack */ 157 pdf_obj *obj; 158 char name[256]; 159 char string[256]; 160 size_t string_len; 161 int top; 162 float stack[32]; 163 } pdf_csi; 164 165 /* Functions to set up pdf_process structures */ 166 167 pdf_processor *pdf_new_run_processor(fz_context *ctx, fz_device *dev, fz_matrix ctm, const char *usage, pdf_gstate *gstate, fz_default_colorspaces *default_cs, fz_cookie *cookie); 168 169 /* 170 Create a buffer processor. 171 172 This collects the incoming PDF operator stream into an fz_buffer. 173 174 buffer: The (possibly empty) buffer to which operators will be 175 appended. 176 177 ahxencode: If 0, then image streams will be send as binary, 178 otherwise they will be asciihexencoded. 179 */ 180 pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode); 181 182 /* 183 Create an output processor. This 184 sends the incoming PDF operator stream to an fz_output stream. 185 186 out: The output stream to which operators will be sent. 187 188 ahxencode: If 0, then image streams will be send as binary, 189 otherwise they will be asciihexencoded. 190 */ 191 pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode); 192 193 /* 194 opaque: Opaque value that is passed to all the filter functions. 195 196 image_filter: A function called to assess whether a given 197 image should be removed or not. 198 199 text_filter: A function called to assess whether a given 200 character should be removed or not. 201 202 after_text_object: A function called after each text object. 203 This allows the caller to insert some extra content if 204 desired. 205 206 end_page: A function called at the end of a page. 207 This allows the caller to insert some extra content after 208 all other content. 209 210 sanitize: If false, will only clean the syntax. This disables all filtering! 211 212 recurse: Clean/sanitize/filter resources recursively. 213 214 instance_forms: Always recurse on XObject Form resources, but will 215 create a new instance of each XObject Form that is used, filtered 216 individually. 217 218 ascii: If true, escape all binary data in the output. 219 */ 220 typedef struct 221 { 222 void *opaque; 223 fz_image *(*image_filter)(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image); 224 int (*text_filter)(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox); 225 void (*after_text_object)(fz_context *ctx, void *opaque, pdf_document *doc, pdf_processor *chain, fz_matrix ctm); 226 void (*end_page)(fz_context *ctx, fz_buffer *buffer, void *arg); 227 228 int recurse; 229 int instance_forms; 230 int sanitize; 231 int ascii; 232 } pdf_filter_options; 233 234 /* 235 Create a filter processor. This filters the PDF operators 236 it is fed, and passes them down (with some changes) to the 237 child filter. 238 239 The changes made by the filter are: 240 241 * No operations are allowed to change the top level gstate. 242 Additional q/Q operators are inserted to prevent this. 243 244 * Repeated/unnecessary colour operators are removed (so, 245 for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to 246 "0.5 g") 247 248 The intention of these changes is to provide a simpler, 249 but equivalent stream, repairing problems with mismatched 250 operators, maintaining structure (such as BMC, EMC calls) 251 and leaving the graphics state in an known (default) state 252 so that subsequent operations (such as synthesising new 253 operators to be appended to the stream) are easier. 254 255 The net graphical effect of the filtered operator stream 256 should be identical to the incoming operator stream. 257 258 chain: The child processor to which the filtered operators 259 will be fed. 260 261 old_res: The incoming resource dictionary. 262 263 new_res: An (initially empty) resource dictionary that will 264 be populated by copying entries from the old dictionary to 265 the new one as they are used. At the end therefore, this 266 contains exactly those resource objects actually required. 267 268 The filter options struct allows you to filter objects using callbacks. 269 */ 270 pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_document *doc, pdf_processor *chain, pdf_obj *old_res, pdf_obj *new_res, int struct_parents, fz_matrix transform, pdf_filter_options *filter); 271 pdf_obj *pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix ctm, pdf_filter_options *filter); 272 273 /* 274 Functions to actually process annotations, glyphs and general stream objects. 275 */ 276 void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *obj, pdf_obj *res, fz_cookie *cookie); 277 void pdf_process_annot(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_cookie *cookie); 278 void pdf_process_glyph(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *resources, fz_buffer *contents); 279 280 /* Text handling helper functions */ 281 typedef struct 282 { 283 float char_space; 284 float word_space; 285 float scale; 286 float leading; 287 pdf_font_desc *font; 288 float size; 289 int render; 290 float rise; 291 } pdf_text_state; 292 293 typedef struct 294 { 295 fz_text *text; 296 fz_rect text_bbox; 297 fz_matrix tlm; 298 fz_matrix tm; 299 int text_mode; 300 301 int cid; 302 int gid; 303 fz_rect char_bbox; 304 pdf_font_desc *fontdesc; 305 float char_tx; 306 float char_ty; 307 } pdf_text_object_state; 308 309 void pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]); 310 void pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]); 311 fz_text *pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos); 312 void pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render); 313 int pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm); 314 void pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos); 315 void pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty); 316 void pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f); 317 void pdf_tos_newline(pdf_text_object_state *tos, float leading); 318 319 #endif 320