1 #ifndef MUPDF_PDF_DOCUMENT_H
2 #define MUPDF_PDF_DOCUMENT_H
3 
4 typedef struct pdf_xref pdf_xref;
5 typedef struct pdf_ocg_descriptor pdf_ocg_descriptor;
6 
7 typedef struct pdf_page pdf_page;
8 typedef struct pdf_annot pdf_annot;
9 typedef struct pdf_annot pdf_widget;
10 typedef struct pdf_js pdf_js;
11 
12 enum
13 {
14 	PDF_LEXBUF_SMALL = 256,
15 	PDF_LEXBUF_LARGE = 65536
16 };
17 
18 typedef struct
19 {
20 	size_t size;
21 	size_t base_size;
22 	size_t len;
23 	int64_t i;
24 	float f;
25 	char *scratch;
26 	char buffer[PDF_LEXBUF_SMALL];
27 } pdf_lexbuf;
28 
29 typedef struct
30 {
31 	pdf_lexbuf base;
32 	char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
33 } pdf_lexbuf_large;
34 
35 /*
36 	Document event structures are mostly opaque to the app. Only the type
37 	is visible to the app.
38 */
39 typedef struct pdf_doc_event pdf_doc_event;
40 
41 /*
42 	the type of function via which the app receives
43 	document events.
44 */
45 typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_event *event, void *data);
46 
47 /*
48 	Open a PDF document.
49 
50 	Open a PDF document by reading its cross reference table, so
51 	MuPDF can locate PDF objects inside the file. Upon an broken
52 	cross reference table or other parse errors MuPDF will restart
53 	parsing the file from the beginning to try to rebuild a
54 	(hopefully correct) cross reference table to allow further
55 	processing of the file.
56 
57 	The returned pdf_document should be used when calling most
58 	other PDF functions. Note that it wraps the context, so those
59 	functions implicitly get access to the global state in
60 	context.
61 
62 	filename: a path to a file as it would be given to open(2).
63 */
64 pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
65 
66 /*
67 	Opens a PDF document.
68 
69 	Same as pdf_open_document, but takes a stream instead of a
70 	filename to locate the PDF document to open. Increments the
71 	reference count of the stream. See fz_open_file,
72 	fz_open_file_w or fz_open_fd for opening a stream, and
73 	fz_drop_stream for closing an open stream.
74 */
75 pdf_document *pdf_open_document_with_stream(fz_context *ctx, fz_stream *file);
76 
77 /*
78 	Closes and frees an opened PDF document.
79 
80 	The resource store in the context associated with pdf_document
81 	is emptied.
82 */
83 void pdf_drop_document(fz_context *ctx, pdf_document *doc);
84 
85 pdf_document *pdf_keep_document(fz_context *ctx, pdf_document *doc);
86 
87 /*
88 	down-cast a fz_document to a pdf_document.
89 	Returns NULL if underlying document is not PDF
90 */
91 pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc);
92 
93 /*
94 	Down-cast generic fitz objects into pdf specific variants.
95 	Returns NULL if the objects are not from a PDF document.
96 */
97 pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr);
98 pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr);
99 
100 int pdf_needs_password(fz_context *ctx, pdf_document *doc);
101 
102 /*
103 	Attempt to authenticate a
104 	password.
105 
106 	Returns 0 for failure, non-zero for success.
107 
108 	In the non-zero case:
109 		bit 0 set => no password required
110 		bit 1 set => user password authenticated
111 		bit 2 set => owner password authenticated
112 */
113 int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw);
114 
115 int pdf_has_permission(fz_context *ctx, pdf_document *doc, fz_permission p);
116 int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, int size);
117 
118 fz_outline *pdf_load_outline(fz_context *ctx, pdf_document *doc);
119 
120 /*
121 	Get the number of layer configurations defined in this document.
122 
123 	doc: The document in question.
124 */
125 int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc);
126 
127 void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc);
128 
129 
130 typedef struct
131 {
132 	const char *name;
133 	const char *creator;
134 } pdf_layer_config;
135 
136 /*
137 	Fetch the name (and optionally creator) of the given layer config.
138 
139 	doc: The document in question.
140 
141 	config_num: A value in the 0..n-1 range, where n is the
142 	value returned from pdf_count_layer_configs.
143 
144 	info: Pointer to structure to fill in. Pointers within
145 	this structure may be set to NULL if no information is
146 	available.
147 */
148 void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info);
149 
150 /*
151 	Set the current configuration.
152 	This updates the visibility of the optional content groups
153 	within the document.
154 
155 	doc: The document in question.
156 
157 	config_num: A value in the 0..n-1 range, where n is the
158 	value returned from pdf_count_layer_configs.
159 */
160 void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num);
161 
162 /*
163 	Returns the number of entries in the 'UI' for this layer configuration.
164 
165 	doc: The document in question.
166 */
167 int pdf_count_layer_config_ui(fz_context *ctx, pdf_document *doc);
168 
169 /*
170 	Select a checkbox/radiobox within the 'UI' for this layer
171 	configuration.
172 
173 	Selecting a UI entry that is a radiobox may disable
174 	other UI entries.
175 
176 	doc: The document in question.
177 
178 	ui: A value in the 0..m-1 range, where m is the value
179 	returned by pdf_count_layer_config_ui.
180 */
181 void pdf_select_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
182 
183 /*
184 	Select a checkbox/radiobox within the 'UI' for this layer configuration.
185 
186 	doc: The document in question.
187 
188 	ui: A value in the 0..m-1 range, where m is the value
189 	returned by pdf_count_layer_config_ui.
190 */
191 void pdf_deselect_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
192 
193 /*
194 	Toggle a checkbox/radiobox within the 'UI' for this layer configuration.
195 
196 	Toggling a UI entry that is a radiobox may disable
197 	other UI entries.
198 
199 	doc: The document in question.
200 
201 	ui: A value in the 0..m-1 range, where m is the value
202 	returned by pdf_count_layer_config_ui.
203 */
204 void pdf_toggle_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui);
205 
206 typedef enum
207 {
208 	PDF_LAYER_UI_LABEL = 0,
209 	PDF_LAYER_UI_CHECKBOX = 1,
210 	PDF_LAYER_UI_RADIOBOX = 2
211 } pdf_layer_config_ui_type;
212 
213 typedef struct
214 {
215 	const char *text;
216 	int depth;
217 	pdf_layer_config_ui_type type;
218 	int selected;
219 	int locked;
220 } pdf_layer_config_ui;
221 
222 /*
223 	Get the info for a given entry in the layer config ui.
224 
225 	doc: The document in question.
226 
227 	ui: A value in the 0..m-1 range, where m is the value
228 	returned by pdf_count_layer_config_ui.
229 
230 	info: Pointer to a structure to fill in with information
231 	about the requested ui entry.
232 */
233 void pdf_layer_config_ui_info(fz_context *ctx, pdf_document *doc, int ui, pdf_layer_config_ui *info);
234 
235 /*
236 	Write the current layer config back into the document as the default state.
237 */
238 void pdf_set_layer_config_as_default(fz_context *ctx, pdf_document *doc);
239 
240 /*
241 	Determine whether changes have been made since the
242 	document was opened or last saved.
243 */
244 int pdf_has_unsaved_changes(fz_context *ctx, pdf_document *doc);
245 
246 /*
247 	Determine if this PDF has been repaired since opening.
248 */
249 int pdf_was_repaired(fz_context *ctx, pdf_document *doc);
250 
251 /* Object that can perform the cryptographic operation necessary for document signing */
252 typedef struct pdf_pkcs7_signer pdf_pkcs7_signer;
253 
254 /* Unsaved signature fields */
255 typedef struct pdf_unsaved_sig
256 {
257 	pdf_obj *field;
258 	size_t byte_range_start;
259 	size_t byte_range_end;
260 	size_t contents_start;
261 	size_t contents_end;
262 	pdf_pkcs7_signer *signer;
263 	struct pdf_unsaved_sig *next;
264 } pdf_unsaved_sig;
265 
266 typedef struct
267 {
268 	int page;
269 	int object;
270 } pdf_rev_page_map;
271 
272 typedef struct
273 {
274 	int number; /* Page object number */
275 	int64_t offset; /* Offset of page object */
276 	int64_t index; /* Index into shared hint_shared_ref */
277 } pdf_hint_page;
278 
279 typedef struct
280 {
281 	int number; /* Object number of first object */
282 	int64_t offset; /* Offset of first object */
283 } pdf_hint_shared;
284 
285 typedef struct {
286 	char *key;
287 	fz_xml_doc *value;
288 } pdf_xfa_entry;
289 
290 typedef struct {
291 	int count;
292 	pdf_xfa_entry *entries;
293 } pdf_xfa;
294 
295 struct pdf_document
296 {
297 	fz_document super;
298 
299 	fz_stream *file;
300 
301 	int version;
302 	int64_t startxref;
303 	int64_t file_size;
304 	pdf_crypt *crypt;
305 	pdf_ocg_descriptor *ocg;
306 	fz_colorspace *oi;
307 
308 	int max_xref_len;
309 	int num_xref_sections;
310 	int saved_num_xref_sections;
311 	int num_incremental_sections;
312 	int xref_base;
313 	int disallow_new_increments;
314 	pdf_xref *xref_sections;
315 	pdf_xref *saved_xref_sections;
316 	int *xref_index;
317 	int save_in_progress;
318 	int has_xref_streams;
319 	int has_old_style_xrefs;
320 	int has_linearization_object;
321 
322 	int rev_page_count;
323 	pdf_rev_page_map *rev_page_map;
324 
325 	int repair_attempted;
326 
327 	/* State indicating which file parsing method we are using */
328 	int file_reading_linearly;
329 	int64_t file_length;
330 
331 	int linear_page_count;
332 	pdf_obj *linear_obj; /* Linearized object (if used) */
333 	pdf_obj **linear_page_refs; /* Page objects for linear loading */
334 	int linear_page1_obj_num;
335 
336 	/* The state for the pdf_progressive_advance parser */
337 	int64_t linear_pos;
338 	int linear_page_num;
339 
340 	int hint_object_offset;
341 	int hint_object_length;
342 	int hints_loaded; /* Set to 1 after the hints loading has completed,
343 			   * whether successful or not! */
344 	/* Page n references shared object references:
345 	 *   hint_shared_ref[i]
346 	 * where
347 	 *      i = s to e-1
348 	 *	s = hint_page[n]->index
349 	 *	e = hint_page[n+1]->index
350 	 * Shared object reference r accesses objects:
351 	 *   rs to re-1
352 	 * where
353 	 *   rs = hint_shared[r]->number
354 	 *   re = hint_shared[r]->count + rs
355 	 * These are guaranteed to lie within the region starting at
356 	 * hint_shared[r]->offset of length hint_shared[r]->length
357 	 */
358 	pdf_hint_page *hint_page;
359 	int *hint_shared_ref;
360 	pdf_hint_shared *hint_shared;
361 	int hint_obj_offsets_max;
362 	int64_t *hint_obj_offsets;
363 
364 	int resources_localised;
365 
366 	pdf_lexbuf_large lexbuf;
367 
368 	pdf_js *js;
369 
370 	int recalculate;
371 	int dirty;
372 	int redacted;
373 
374 	pdf_doc_event_cb *event_cb;
375 	void *event_cb_data;
376 
377 	int num_type3_fonts;
378 	int max_type3_fonts;
379 	fz_font **type3_fonts;
380 
381 	struct {
382 		fz_hash_table *fonts;
383 	} resources;
384 
385 	int orphans_max;
386 	int orphans_count;
387 	pdf_obj **orphans;
388 
389 	pdf_xfa xfa;
390 };
391 
392 pdf_document *pdf_create_document(fz_context *ctx);
393 
394 typedef struct pdf_graft_map pdf_graft_map;
395 
396 /*
397 	Return a deep copied object equivalent to the
398 	supplied object, suitable for use within the given document.
399 
400 	dst: The document in which the returned object is to be used.
401 
402 	obj: The object deep copy.
403 
404 	Note: If grafting multiple objects, you should use a pdf_graft_map
405 	to avoid potential duplication of target objects.
406 */
407 pdf_obj *pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_obj *obj);
408 
409 /*
410 	Prepare a graft map object to allow objects
411 	to be deep copied from one document to the given one, avoiding
412 	problems with duplicated child objects.
413 
414 	dst: The document to copy objects to.
415 
416 	Note: all the source objects must come from the same document.
417 */
418 pdf_graft_map *pdf_new_graft_map(fz_context *ctx, pdf_document *dst);
419 
420 pdf_graft_map *pdf_keep_graft_map(fz_context *ctx, pdf_graft_map *map);
421 void pdf_drop_graft_map(fz_context *ctx, pdf_graft_map *map);
422 
423 /*
424 	Return a deep copied object equivalent
425 	to the supplied object, suitable for use within the target
426 	document of the map.
427 
428 	map: A map targeted at the document in which the returned
429 	object is to be used.
430 
431 	obj: The object to be copied.
432 
433 	Note: Copying multiple objects via the same graft map ensures
434 	that any shared children are not copied more than once.
435 */
436 pdf_obj *pdf_graft_mapped_object(fz_context *ctx, pdf_graft_map *map, pdf_obj *obj);
437 
438 /*
439 	Graft a page (and its resources) from the src document to the
440 	destination document of the graft. This involves a deep copy
441 	of the objects in question.
442 
443 	map: A map targetted at the document into which the page should
444 	be inserted.
445 
446 	page_to: The position within the destination document at which
447 	the page should be inserted (pages numbered from 0, with -1
448 	meaning "at the end").
449 
450 	src: The document from which the page should be copied.
451 
452 	page_from: The page number which should be copied from the src
453 	document (pages numbered from 0, with -1 meaning "at the end").
454 */
455 void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from);
456 void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from);
457 
458 /*
459 	Create a device that will record the
460 	graphical operations given to it into a sequence of
461 	pdf operations, together with a set of resources. This
462 	sequence/set pair can then be used as the basis for
463 	adding a page to the document (see pdf_add_page).
464 
465 	doc: The document for which these are intended.
466 
467 	mediabox: The bbox for the created page.
468 
469 	presources: Pointer to a place to put the created
470 	resources dictionary.
471 
472 	pcontents: Pointer to a place to put the created
473 	contents buffer.
474 */
475 fz_device *pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents);
476 
477 /*
478 	Create a pdf_obj within a document that
479 	represents a page, from a previously created resources
480 	dictionary and page content stream. This should then be
481 	inserted into the document using pdf_insert_page.
482 
483 	After this call the page exists within the document
484 	structure, but is not actually ever displayed as it is
485 	not linked into the PDF page tree.
486 
487 	doc: The document to which to add the page.
488 
489 	mediabox: The mediabox for the page (should be identical
490 	to that used when creating the resources/contents).
491 
492 	rotate: 0, 90, 180 or 270. The rotation to use for the
493 	page.
494 
495 	resources: The resources dictionary for the new page
496 	(typically created by pdf_page_write).
497 
498 	contents: The page contents for the new page (typically
499 	create by pdf_page_write).
500 */
501 pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents);
502 
503 /*
504 	Insert a page previously created by
505 	pdf_add_page into the pages tree of the document.
506 
507 	doc: The document to insert into.
508 
509 	at: The page number to insert at. 0 inserts at the start.
510 	negative numbers, or INT_MAX insert at the end. Otherwise
511 	n inserts after page n.
512 
513 	page: The page to insert.
514 */
515 void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page);
516 
517 /*
518 	Delete a page from the page tree of
519 	a document. This does not remove the page contents
520 	or resources from the file.
521 
522 	doc: The document to operate on.
523 
524 	number: The page to remove (numbered from 0)
525 */
526 void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number);
527 
528 /*
529 	Delete a range of pages from the
530 	page tree of a document. This does not remove the page
531 	contents or resources from the file.
532 
533 	doc: The document to operate on.
534 
535 	start, end: The range of pages (numbered from 0)
536 	(inclusive, exclusive) to remove. If end is negative or
537 	greater than the number of pages in the document, it
538 	will be taken to be the end of the document.
539 */
540 void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end);
541 
542 fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc);
543 void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang);
544 
545 /*
546 	In calls to fz_save_document, the following options structure can be used
547 	to control aspects of the writing process. This structure may grow
548 	in the future, and should be zero-filled to allow forwards compatibility.
549 */
550 typedef struct
551 {
552 	int do_incremental; /* Write just the changed objects. */
553 	int do_pretty; /* Pretty-print dictionaries and arrays. */
554 	int do_ascii; /* ASCII hex encode binary streams. */
555 	int do_compress; /* Compress streams. */
556 	int do_compress_images; /* Compress (or leave compressed) image streams. */
557 	int do_compress_fonts; /* Compress (or leave compressed) font streams. */
558 	int do_decompress; /* Decompress streams (except when compressing images/fonts). */
559 	int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */
560 	int do_linear; /* Write linearised. */
561 	int do_clean; /* Clean content streams. */
562 	int do_sanitize; /* Sanitize content streams. */
563 	int do_appearance; /* (Re)create appearance streams. */
564 	int do_encrypt; /* Encryption method to use: keep, none, rc4-40, etc. */
565 	int permissions; /* Document encryption permissions. */
566 	char opwd_utf8[128]; /* Owner password. */
567 	char upwd_utf8[128]; /* User password. */
568 } pdf_write_options;
569 
570 extern const pdf_write_options pdf_default_write_options;
571 
572 /*
573 	Parse option string into a pdf_write_options struct.
574 	Matches the command line options to 'mutool clean':
575 		g: garbage collect
576 		d, i, f: expand all, fonts, images
577 		l: linearize
578 		a: ascii hex encode
579 		z: deflate
580 		c: clean content streams
581 		s: sanitize content streams
582 */
583 pdf_write_options *pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args);
584 
585 /*
586 	Returns true if there are digital signatures waiting to
587 	to updated on save.
588 */
589 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc);
590 
591 /*
592 	Write out the document to an output stream with all changes finalised.
593 */
594 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, pdf_write_options *opts);
595 
596 /*
597 	Write out the document to a file with all changes finalised.
598 */
599 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *opts);
600 
601 char *pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts);
602 
603 /*
604 	Return true if the document can be saved incrementally. Applying
605 	redactions or having a repaired document make incremental saving
606 	impossible.
607 */
608 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc);
609 
610 #endif
611