1 #ifndef MUPDF_PDF_DOCUMENT_H 2 #define MUPDF_PDF_DOCUMENT_H 3 4 typedef struct pdf_xref pdf_xref; 5 typedef struct pdf_ocg_descriptor pdf_ocg_descriptor; 6 7 typedef struct pdf_page pdf_page; 8 typedef struct pdf_annot pdf_annot; 9 typedef struct pdf_annot pdf_widget; 10 typedef struct pdf_js pdf_js; 11 12 enum 13 { 14 PDF_LEXBUF_SMALL = 256, 15 PDF_LEXBUF_LARGE = 65536 16 }; 17 18 typedef struct 19 { 20 size_t size; 21 size_t base_size; 22 size_t len; 23 int64_t i; 24 float f; 25 char *scratch; 26 char buffer[PDF_LEXBUF_SMALL]; 27 } pdf_lexbuf; 28 29 typedef struct 30 { 31 pdf_lexbuf base; 32 char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL]; 33 } pdf_lexbuf_large; 34 35 /* 36 Document event structures are mostly opaque to the app. Only the type 37 is visible to the app. 38 */ 39 typedef struct pdf_doc_event pdf_doc_event; 40 41 /* 42 the type of function via which the app receives 43 document events. 44 */ 45 typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_event *event, void *data); 46 47 /* 48 Open a PDF document. 49 50 Open a PDF document by reading its cross reference table, so 51 MuPDF can locate PDF objects inside the file. Upon an broken 52 cross reference table or other parse errors MuPDF will restart 53 parsing the file from the beginning to try to rebuild a 54 (hopefully correct) cross reference table to allow further 55 processing of the file. 56 57 The returned pdf_document should be used when calling most 58 other PDF functions. Note that it wraps the context, so those 59 functions implicitly get access to the global state in 60 context. 61 62 filename: a path to a file as it would be given to open(2). 63 */ 64 pdf_document *pdf_open_document(fz_context *ctx, const char *filename); 65 66 /* 67 Opens a PDF document. 68 69 Same as pdf_open_document, but takes a stream instead of a 70 filename to locate the PDF document to open. Increments the 71 reference count of the stream. See fz_open_file, 72 fz_open_file_w or fz_open_fd for opening a stream, and 73 fz_drop_stream for closing an open stream. 74 */ 75 pdf_document *pdf_open_document_with_stream(fz_context *ctx, fz_stream *file); 76 77 /* 78 Closes and frees an opened PDF document. 79 80 The resource store in the context associated with pdf_document 81 is emptied. 82 */ 83 void pdf_drop_document(fz_context *ctx, pdf_document *doc); 84 85 pdf_document *pdf_keep_document(fz_context *ctx, pdf_document *doc); 86 87 /* 88 down-cast a fz_document to a pdf_document. 89 Returns NULL if underlying document is not PDF 90 */ 91 pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc); 92 93 /* 94 Down-cast generic fitz objects into pdf specific variants. 95 Returns NULL if the objects are not from a PDF document. 96 */ 97 pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr); 98 pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr); 99 100 int pdf_needs_password(fz_context *ctx, pdf_document *doc); 101 102 /* 103 Attempt to authenticate a 104 password. 105 106 Returns 0 for failure, non-zero for success. 107 108 In the non-zero case: 109 bit 0 set => no password required 110 bit 1 set => user password authenticated 111 bit 2 set => owner password authenticated 112 */ 113 int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw); 114 115 int pdf_has_permission(fz_context *ctx, pdf_document *doc, fz_permission p); 116 int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, int size); 117 118 fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc); 119 120 /* 121 Get the number of layer configurations defined in this document. 122 123 doc: The document in question. 124 */ 125 int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc); 126 127 void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc); 128 129 130 typedef struct 131 { 132 const char *name; 133 const char *creator; 134 } pdf_layer_config; 135 136 /* 137 Fetch the name (and optionally creator) of the given layer config. 138 139 doc: The document in question. 140 141 config_num: A value in the 0..n-1 range, where n is the 142 value returned from pdf_count_layer_configs. 143 144 info: Pointer to structure to fill in. Pointers within 145 this structure may be set to NULL if no information is 146 available. 147 */ 148 void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info); 149 150 /* 151 Set the current configuration. 152 This updates the visibility of the optional content groups 153 within the document. 154 155 doc: The document in question. 156 157 config_num: A value in the 0..n-1 range, where n is the 158 value returned from pdf_count_layer_configs. 159 */ 160 void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num); 161 162 /* 163 Returns the number of entries in the 'UI' for this layer configuration. 164 165 doc: The document in question. 166 */ 167 int pdf_count_layer_config_ui(fz_context *ctx, pdf_document *doc); 168 169 /* 170 Select a checkbox/radiobox within the 'UI' for this layer 171 configuration. 172 173 Selecting a UI entry that is a radiobox may disable 174 other UI entries. 175 176 doc: The document in question. 177 178 ui: A value in the 0..m-1 range, where m is the value 179 returned by pdf_count_layer_config_ui. 180 */ 181 void pdf_select_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui); 182 183 /* 184 Select a checkbox/radiobox within the 'UI' for this layer configuration. 185 186 doc: The document in question. 187 188 ui: A value in the 0..m-1 range, where m is the value 189 returned by pdf_count_layer_config_ui. 190 */ 191 void pdf_deselect_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui); 192 193 /* 194 Toggle a checkbox/radiobox within the 'UI' for this layer configuration. 195 196 Toggling a UI entry that is a radiobox may disable 197 other UI entries. 198 199 doc: The document in question. 200 201 ui: A value in the 0..m-1 range, where m is the value 202 returned by pdf_count_layer_config_ui. 203 */ 204 void pdf_toggle_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui); 205 206 typedef enum 207 { 208 PDF_LAYER_UI_LABEL = 0, 209 PDF_LAYER_UI_CHECKBOX = 1, 210 PDF_LAYER_UI_RADIOBOX = 2 211 } pdf_layer_config_ui_type; 212 213 typedef struct 214 { 215 const char *text; 216 int depth; 217 pdf_layer_config_ui_type type; 218 int selected; 219 int locked; 220 } pdf_layer_config_ui; 221 222 /* 223 Get the info for a given entry in the layer config ui. 224 225 doc: The document in question. 226 227 ui: A value in the 0..m-1 range, where m is the value 228 returned by pdf_count_layer_config_ui. 229 230 info: Pointer to a structure to fill in with information 231 about the requested ui entry. 232 */ 233 void pdf_layer_config_ui_info(fz_context *ctx, pdf_document *doc, int ui, pdf_layer_config_ui *info); 234 235 /* 236 Write the current layer config back into the document as the default state. 237 */ 238 void pdf_set_layer_config_as_default(fz_context *ctx, pdf_document *doc); 239 240 /* 241 Determine whether changes have been made since the 242 document was opened or last saved. 243 */ 244 int pdf_has_unsaved_changes(fz_context *ctx, pdf_document *doc); 245 246 /* 247 Determine if this PDF has been repaired since opening. 248 */ 249 int pdf_was_repaired(fz_context *ctx, pdf_document *doc); 250 251 /* Object that can perform the cryptographic operation necessary for document signing */ 252 typedef struct pdf_pkcs7_signer pdf_pkcs7_signer; 253 254 /* Unsaved signature fields */ 255 typedef struct pdf_unsaved_sig 256 { 257 pdf_obj *field; 258 size_t byte_range_start; 259 size_t byte_range_end; 260 size_t contents_start; 261 size_t contents_end; 262 pdf_pkcs7_signer *signer; 263 struct pdf_unsaved_sig *next; 264 } pdf_unsaved_sig; 265 266 typedef struct 267 { 268 int page; 269 int object; 270 } pdf_rev_page_map; 271 272 typedef struct 273 { 274 int number; /* Page object number */ 275 int64_t offset; /* Offset of page object */ 276 int64_t index; /* Index into shared hint_shared_ref */ 277 } pdf_hint_page; 278 279 typedef struct 280 { 281 int number; /* Object number of first object */ 282 int64_t offset; /* Offset of first object */ 283 } pdf_hint_shared; 284 285 typedef struct { 286 char *key; 287 fz_xml_doc *value; 288 } pdf_xfa_entry; 289 290 typedef struct { 291 int count; 292 pdf_xfa_entry *entries; 293 } pdf_xfa; 294 295 struct pdf_document 296 { 297 fz_document super; 298 299 fz_stream *file; 300 301 int version; 302 int64_t startxref; 303 int64_t file_size; 304 pdf_crypt *crypt; 305 pdf_ocg_descriptor *ocg; 306 fz_colorspace *oi; 307 308 int max_xref_len; 309 int num_xref_sections; 310 int saved_num_xref_sections; 311 int num_incremental_sections; 312 int xref_base; 313 int disallow_new_increments; 314 pdf_xref *xref_sections; 315 pdf_xref *saved_xref_sections; 316 int *xref_index; 317 int save_in_progress; 318 int has_xref_streams; 319 int has_old_style_xrefs; 320 int has_linearization_object; 321 322 int rev_page_count; 323 pdf_rev_page_map *rev_page_map; 324 325 int repair_attempted; 326 327 /* State indicating which file parsing method we are using */ 328 int file_reading_linearly; 329 int64_t file_length; 330 331 int linear_page_count; 332 pdf_obj *linear_obj; /* Linearized object (if used) */ 333 pdf_obj **linear_page_refs; /* Page objects for linear loading */ 334 int linear_page1_obj_num; 335 336 /* The state for the pdf_progressive_advance parser */ 337 int64_t linear_pos; 338 int linear_page_num; 339 340 int hint_object_offset; 341 int hint_object_length; 342 int hints_loaded; /* Set to 1 after the hints loading has completed, 343 * whether successful or not! */ 344 /* Page n references shared object references: 345 * hint_shared_ref[i] 346 * where 347 * i = s to e-1 348 * s = hint_page[n]->index 349 * e = hint_page[n+1]->index 350 * Shared object reference r accesses objects: 351 * rs to re-1 352 * where 353 * rs = hint_shared[r]->number 354 * re = hint_shared[r]->count + rs 355 * These are guaranteed to lie within the region starting at 356 * hint_shared[r]->offset of length hint_shared[r]->length 357 */ 358 pdf_hint_page *hint_page; 359 int *hint_shared_ref; 360 pdf_hint_shared *hint_shared; 361 int hint_obj_offsets_max; 362 int64_t *hint_obj_offsets; 363 364 int resources_localised; 365 366 pdf_lexbuf_large lexbuf; 367 368 pdf_js *js; 369 370 int recalculate; 371 int dirty; 372 int redacted; 373 374 pdf_doc_event_cb *event_cb; 375 void *event_cb_data; 376 377 int num_type3_fonts; 378 int max_type3_fonts; 379 fz_font **type3_fonts; 380 381 struct { 382 fz_hash_table *fonts; 383 } resources; 384 385 int orphans_max; 386 int orphans_count; 387 pdf_obj **orphans; 388 389 pdf_xfa xfa; 390 }; 391 392 pdf_document *pdf_create_document(fz_context *ctx); 393 394 typedef struct pdf_graft_map pdf_graft_map; 395 396 /* 397 Return a deep copied object equivalent to the 398 supplied object, suitable for use within the given document. 399 400 dst: The document in which the returned object is to be used. 401 402 obj: The object deep copy. 403 404 Note: If grafting multiple objects, you should use a pdf_graft_map 405 to avoid potential duplication of target objects. 406 */ 407 pdf_obj *pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_obj *obj); 408 409 /* 410 Prepare a graft map object to allow objects 411 to be deep copied from one document to the given one, avoiding 412 problems with duplicated child objects. 413 414 dst: The document to copy objects to. 415 416 Note: all the source objects must come from the same document. 417 */ 418 pdf_graft_map *pdf_new_graft_map(fz_context *ctx, pdf_document *dst); 419 420 pdf_graft_map *pdf_keep_graft_map(fz_context *ctx, pdf_graft_map *map); 421 void pdf_drop_graft_map(fz_context *ctx, pdf_graft_map *map); 422 423 /* 424 Return a deep copied object equivalent 425 to the supplied object, suitable for use within the target 426 document of the map. 427 428 map: A map targeted at the document in which the returned 429 object is to be used. 430 431 obj: The object to be copied. 432 433 Note: Copying multiple objects via the same graft map ensures 434 that any shared children are not copied more than once. 435 */ 436 pdf_obj *pdf_graft_mapped_object(fz_context *ctx, pdf_graft_map *map, pdf_obj *obj); 437 438 /* 439 Graft a page (and its resources) from the src document to the 440 destination document of the graft. This involves a deep copy 441 of the objects in question. 442 443 map: A map targetted at the document into which the page should 444 be inserted. 445 446 page_to: The position within the destination document at which 447 the page should be inserted (pages numbered from 0, with -1 448 meaning "at the end"). 449 450 src: The document from which the page should be copied. 451 452 page_from: The page number which should be copied from the src 453 document (pages numbered from 0, with -1 meaning "at the end"). 454 */ 455 void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from); 456 void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from); 457 458 /* 459 Create a device that will record the 460 graphical operations given to it into a sequence of 461 pdf operations, together with a set of resources. This 462 sequence/set pair can then be used as the basis for 463 adding a page to the document (see pdf_add_page). 464 465 doc: The document for which these are intended. 466 467 mediabox: The bbox for the created page. 468 469 presources: Pointer to a place to put the created 470 resources dictionary. 471 472 pcontents: Pointer to a place to put the created 473 contents buffer. 474 */ 475 fz_device *pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents); 476 477 /* 478 Create a pdf_obj within a document that 479 represents a page, from a previously created resources 480 dictionary and page content stream. This should then be 481 inserted into the document using pdf_insert_page. 482 483 After this call the page exists within the document 484 structure, but is not actually ever displayed as it is 485 not linked into the PDF page tree. 486 487 doc: The document to which to add the page. 488 489 mediabox: The mediabox for the page (should be identical 490 to that used when creating the resources/contents). 491 492 rotate: 0, 90, 180 or 270. The rotation to use for the 493 page. 494 495 resources: The resources dictionary for the new page 496 (typically created by pdf_page_write). 497 498 contents: The page contents for the new page (typically 499 create by pdf_page_write). 500 */ 501 pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents); 502 503 /* 504 Insert a page previously created by 505 pdf_add_page into the pages tree of the document. 506 507 doc: The document to insert into. 508 509 at: The page number to insert at. 0 inserts at the start. 510 negative numbers, or INT_MAX insert at the end. Otherwise 511 n inserts after page n. 512 513 page: The page to insert. 514 */ 515 void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page); 516 517 /* 518 Delete a page from the page tree of 519 a document. This does not remove the page contents 520 or resources from the file. 521 522 doc: The document to operate on. 523 524 number: The page to remove (numbered from 0) 525 */ 526 void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number); 527 528 /* 529 Delete a range of pages from the 530 page tree of a document. This does not remove the page 531 contents or resources from the file. 532 533 doc: The document to operate on. 534 535 start, end: The range of pages (numbered from 0) 536 (inclusive, exclusive) to remove. If end is negative or 537 greater than the number of pages in the document, it 538 will be taken to be the end of the document. 539 */ 540 void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end); 541 542 fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc); 543 void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang); 544 545 /* 546 In calls to fz_save_document, the following options structure can be used 547 to control aspects of the writing process. This structure may grow 548 in the future, and should be zero-filled to allow forwards compatibility. 549 */ 550 typedef struct 551 { 552 int do_incremental; /* Write just the changed objects. */ 553 int do_pretty; /* Pretty-print dictionaries and arrays. */ 554 int do_ascii; /* ASCII hex encode binary streams. */ 555 int do_compress; /* Compress streams. */ 556 int do_compress_images; /* Compress (or leave compressed) image streams. */ 557 int do_compress_fonts; /* Compress (or leave compressed) font streams. */ 558 int do_decompress; /* Decompress streams (except when compressing images/fonts). */ 559 int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */ 560 int do_linear; /* Write linearised. */ 561 int do_clean; /* Clean content streams. */ 562 int do_sanitize; /* Sanitize content streams. */ 563 int do_appearance; /* (Re)create appearance streams. */ 564 int do_encrypt; /* Encryption method to use: keep, none, rc4-40, etc. */ 565 int permissions; /* Document encryption permissions. */ 566 char opwd_utf8[128]; /* Owner password. */ 567 char upwd_utf8[128]; /* User password. */ 568 } pdf_write_options; 569 570 extern const pdf_write_options pdf_default_write_options; 571 572 /* 573 Parse option string into a pdf_write_options struct. 574 Matches the command line options to 'mutool clean': 575 g: garbage collect 576 d, i, f: expand all, fonts, images 577 l: linearize 578 a: ascii hex encode 579 z: deflate 580 c: clean content streams 581 s: sanitize content streams 582 */ 583 pdf_write_options *pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args); 584 585 /* 586 Returns true if there are digital signatures waiting to 587 to updated on save. 588 */ 589 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc); 590 591 /* 592 Write out the document to an output stream with all changes finalised. 593 */ 594 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, pdf_write_options *opts); 595 596 /* 597 Write out the document to a file with all changes finalised. 598 */ 599 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *opts); 600 601 char *pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts); 602 603 /* 604 Return true if the document can be saved incrementally. Applying 605 redactions or having a repaired document make incremental saving 606 impossible. 607 */ 608 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc); 609 610 #endif 611