1 // Copyright (c) 2005-2021 Jay Berkenbilt
2 //
3 // This file is part of qpdf.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 // Versions of qpdf prior to version 7 were released under the terms
18 // of version 2.0 of the Artistic License. At your option, you may
19 // continue to consider qpdf to be licensed under those terms. Please
20 // see the manual for additional information.
21 
22 #ifndef QPDFACROFORMDOCUMENTHELPER_HH
23 #define QPDFACROFORMDOCUMENTHELPER_HH
24 
25 // This document helper is intended to help with operations on
26 // interactive forms. Here are the key things to know:
27 
28 // * The PDF specification talks about interactive forms and also
29 //   about form XObjects. While form XObjects appear in parts of
30 //   interactive forms, this class is concerned about interactive
31 //   forms, not form XObjects.
32 //
33 // * Interactive forms are discussed in the PDF Specification (ISO PDF
34 //   32000-1:2008) section 12.7. Also relevant is the section about
35 //   Widget annotations. Annotations are discussed in section 12.5
36 //   with annotation dictionaries discussed in 12.5.1. Widget
37 //   annotations are discussed specifically in section 12.5.6.19.
38 //
39 // * What you need to know about the structure of interactive forms in
40 //   PDF files:
41 //
42 //   - The document catalog contains the key "/AcroForm" which
43 //     contains a list of fields. Fields are represented as a tree
44 //     structure much like pages. Nodes in the fields tree may contain
45 //     other fields. Fields may inherit values of many of their
46 //     attributes from ancestors in the tree.
47 //
48 //   - Fields may also have children that are widget annotations. As a
49 //     special case, and a cause of considerable confusion, if a field
50 //     has a single annotation as a child, the annotation dictionary
51 //     may be merged with the field dictionary. In that case, the
52 //     field and the annotation are in the same object. Note that,
53 //     while field dictionary attributes are inherited, annotation
54 //     dictionary attributes are not.
55 //
56 //   - A page dictionary contains a key called "/Annots" which
57 //     contains a simple list of annotations. For any given annotation
58 //     of subtype "/Widget", you should encounter that annotation in
59 //     the "/Annots" dictionary of a page, and you should also be able
60 //     to reach it by traversing through the "/AcroForm" dictionary
61 //     from the document catalog. In the simplest case (and also a
62 //     very common case), a form field's widget annotation will be
63 //     merged with the field object, and the object will appear
64 //     directly both under "/Annots" in the page dictionary and under
65 //     "/Fields" in the "/AcroForm" dictionary. In a more complex
66 //     case, you may have to trace through various "/Kids" elements in
67 //     the "/AcroForm" field entry until you find the annotation
68 //     dictionary.
69 
70 
71 #include <qpdf/QPDFDocumentHelper.hh>
72 
73 #include <qpdf/DLL.h>
74 
75 #include <qpdf/QPDFAnnotationObjectHelper.hh>
76 #include <qpdf/QPDFFormFieldObjectHelper.hh>
77 #include <qpdf/QPDFPageObjectHelper.hh>
78 
79 #include <map>
80 #include <set>
81 #include <vector>
82 
83 class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
84 {
85   public:
86     QPDF_DLL
87     QPDFAcroFormDocumentHelper(QPDF&);
88     QPDF_DLL
~QPDFAcroFormDocumentHelper()89     virtual ~QPDFAcroFormDocumentHelper()
90     {
91     }
92 
93     // This class lazily creates an internal cache of the mapping
94     // among form fields, annotations, and pages. Methods within this
95     // class preserve the validity of this cache. However, if you
96     // modify pages' annotation dictionaries, the document's /AcroForm
97     // dictionary, or any form fields manually in a way that alters
98     // the association between forms, fields, annotations, and pages,
99     // it may cause this cache to become invalid. This method marks
100     // the cache invalid and forces it to be regenerated the next time
101     // it is needed.
102     QPDF_DLL
103     void invalidateCache();
104 
105     QPDF_DLL
106     bool hasAcroForm();
107 
108     // Add a form field, initializing the document's AcroForm
109     // dictionary if needed, updating the cache if necessary. Note
110     // that you are adding fields that are copies of other fields,
111     // this method may result in multiple fields existing with the
112     // same qualified name, which can have unexpected side effects. In
113     // that case, you should use addAndRenameFormFields() instead.
114     QPDF_DLL
115     void addFormField(QPDFFormFieldObjectHelper);
116 
117     // Add a collection of form fields making sure that their fully
118     // qualified names don't conflict with already present form
119     // fields. Fields within the collection of new fields that have
120     // the same name as each other will continue to do so.
121     QPDF_DLL
122     void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields);
123 
124     // Remove fields from the fields array
125     QPDF_DLL
126     void removeFormFields(std::set<QPDFObjGen> const&);
127 
128     // Set the name of a field, updating internal records of field
129     // names. Name should be UTF-8 encoded.
130     QPDF_DLL
131     void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name);
132 
133     // Return a vector of all terminal fields in a document. Terminal
134     // fields are fields that have no children that are also fields.
135     // Terminal fields may still have children that are annotations.
136     // Intermediate nodes in the fields tree are not included in this
137     // list, but you can still reach them through the getParent method
138     // of the field object helper.
139     QPDF_DLL
140     std::vector<QPDFFormFieldObjectHelper> getFormFields();
141 
142     // Return all the form fields that have the given fully-qualified
143     // name and also have an explicit "/T" attribute. For this
144     // information to be accurate, any changes to field names must be
145     // done through setFormFieldName() above.
146     QPDF_DLL
147     std::set<QPDFObjGen>
148     getFieldsWithQualifiedName(std::string const& name);
149 
150     // Return the annotations associated with a terminal field. Note
151     // that in the case of a field having a single annotation, the
152     // underlying object will typically be the same as the underlying
153     // object for the field.
154     QPDF_DLL
155     std::vector<QPDFAnnotationObjectHelper>
156     getAnnotationsForField(QPDFFormFieldObjectHelper);
157 
158     // Return annotations of subtype /Widget for a page.
159     QPDF_DLL
160     std::vector<QPDFAnnotationObjectHelper>
161     getWidgetAnnotationsForPage(QPDFPageObjectHelper);
162 
163     // Return top-level form fields for a page.
164     QPDF_DLL
165     std::vector<QPDFFormFieldObjectHelper>
166     getFormFieldsForPage(QPDFPageObjectHelper);
167 
168     // Return the terminal field that is associated with this
169     // annotation. If the annotation dictionary is merged with the
170     // field dictionary, the underlying object will be the same, but
171     // this is not always the case. Note that if you call this method
172     // with an annotation that is not a widget annotation, there will
173     // not be an associated field, and this method will return a
174     // helper associated with a null object (isNull() == true).
175     QPDF_DLL
176     QPDFFormFieldObjectHelper
177     getFieldForAnnotation(QPDFAnnotationObjectHelper);
178 
179     // Return the current value of /NeedAppearances. If
180     // /NeedAppearances is missing, return false as that is how PDF
181     // viewers are supposed to interpret it.
182     QPDF_DLL
183     bool getNeedAppearances();
184 
185     // Indicate whether appearance streams must be regenerated. If you
186     // modify a field value, you should call setNeedAppearances(true)
187     // unless you also generate an appearance stream for the
188     // corresponding annotation at the same time. If you generate
189     // appearance streams for all fields, you can call
190     // setNeedAppearances(false). If you use
191     // QPDFFormFieldObjectHelper::setV, it will automatically call
192     // this method unless you tell it not to.
193     QPDF_DLL
194     void setNeedAppearances(bool);
195 
196     // If /NeedAppearances is false, do nothing. Otherwise generate
197     // appearance streams for all widget annotations that need them.
198     // See comments in QPDFFormFieldObjectHelper.hh for
199     // generateAppearance for limitations. For checkbox and radio
200     // button fields, this code ensures that appearance state is
201     // consistent with the field's value and uses any pre-existing
202     // appearance streams.
203     QPDF_DLL
204     void generateAppearancesIfNeeded();
205 
206     // Note: this method works on all annotations, not just ones with
207     // associated fields. For each annotation in old_annots, apply the
208     // given transformation matrix to create a new annotation. New
209     // annotations are appended to new_annots. If the annotation is
210     // associated with a form field, a new form field is created that
211     // points to the new annotation and is appended to new_fields, and
212     // the old field is added to old_fields.
213     //
214     // old_annots may belong to a different QPDF object. In that case,
215     // you should pass in from_qpdf, and copyForeignObject will be
216     // called automatically. If this is the case, for efficiency, you
217     // may pass in a QPDFAcroFormDocumentHelper for the other file to
218     // avoid the expensive process of creating one for each call to
219     // transformAnnotations. New fields and annotations are not added
220     // to the document or pages. You have to do that yourself after
221     // calling transformAnnotations. If this operation will leave
222     // orphaned fields behind, such as if you are replacing the old
223     // annotations with the new ones on the same page and the fields
224     // and annotations are not shared, you will also need to remove
225     // the old fields to prevent them from hanging round unreferenced.
226     QPDF_DLL
227     void transformAnnotations(
228         QPDFObjectHandle old_annots,
229         std::vector<QPDFObjectHandle>& new_annots,
230         std::vector<QPDFObjectHandle>& new_fields,
231         std::set<QPDFObjGen>& old_fields,
232         QPDFMatrix const& cm,
233         QPDF* from_qpdf = nullptr,
234         QPDFAcroFormDocumentHelper* from_afdh = nullptr);
235 
236     // Copy form fields and annotations from one page to another,
237     // allowing the from page to be in a different QPDF or in the same
238     // QPDF. This would typically be called after calling addPage to
239     // add field/annotation awareness. When just copying the page by
240     // itself, annotations end up being shared, and fields end up
241     // being omitted because there is no reference to the field from
242     // the page. This method ensures that each separate copy of a page
243     // has private annotations and that fields and annotations are
244     // properly updated to resolve conflicts that may occur from
245     // common resource and field names across documents. It is
246     // basically a wrapper around transformAnnotations that handles
247     // updating the receiving page. If new_fields is non-null, any
248     // newly created fields are added to it.
249     QPDF_DLL
250     void fixCopiedAnnotations(
251         QPDFObjectHandle to_page,
252         QPDFObjectHandle from_page,
253         QPDFAcroFormDocumentHelper& from_afdh,
254         std::set<QPDFObjGen>* new_fields = nullptr);
255 
256     // copyFieldsFromForeignPage was added in qpdf 10.2 and made to do
257     // nothing in 10.3. It wasn't actually doing the right thing and
258     // would result in broken files in all but the simplest case of a
259     // single page from one file being added to another file, as
260     // happens with qpdf --split-pages.
261     [[deprecated("Use fixCopiedAnnotations instead")]]
262     // ABI: delete this method
263     QPDF_DLL
264     void copyFieldsFromForeignPage(
265         QPDFPageObjectHelper foreign_page,
266         QPDFAcroFormDocumentHelper& foreign_afdh,
267         std::vector<QPDFObjectHandle>* copied_fields = nullptr);
268 
269   private:
270     void analyze();
271     void traverseField(QPDFObjectHandle field,
272                        QPDFObjectHandle parent,
273                        int depth, std::set<QPDFObjGen>& visited);
274     QPDFObjectHandle getOrCreateAcroForm();
275     void adjustInheritedFields(
276         QPDFObjectHandle obj,
277         bool override_da, std::string const& from_default_da,
278         bool override_q, int from_default_q);
279     void adjustDefaultAppearances(
280         QPDFObjectHandle obj,
281         std::map<std::string,
282                  std::map<std::string, std::string>> const& dr_map);
283     void adjustAppearanceStream(
284         QPDFObjectHandle stream,
285         std::map<std::string,
286                  std::map<std::string, std::string>> dr_map);
287 
288     class Members
289     {
290         friend class QPDFAcroFormDocumentHelper;
291 
292       public:
293         QPDF_DLL
294         ~Members();
295 
296       private:
297         Members();
298         Members(Members const&);
299 
300         bool cache_valid;
301         std::map<QPDFObjGen,
302                  std::vector<QPDFAnnotationObjectHelper>
303                  > field_to_annotations;
304         std::map<QPDFObjGen, QPDFFormFieldObjectHelper> annotation_to_field;
305         std::map<QPDFObjGen, std::string> field_to_name;
306         std::map<std::string, std::set<QPDFObjGen>> name_to_fields;
307     };
308 
309     PointerHolder<Members> m;
310 };
311 
312 #endif // QPDFACROFORMDOCUMENTHELPER_HH
313