1 // Copyright (c) 2005-2021 Jay Berkenbilt 2 // 3 // This file is part of qpdf. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 // Versions of qpdf prior to version 7 were released under the terms 18 // of version 2.0 of the Artistic License. At your option, you may 19 // continue to consider qpdf to be licensed under those terms. Please 20 // see the manual for additional information. 21 22 #ifndef QPDFACROFORMDOCUMENTHELPER_HH 23 #define QPDFACROFORMDOCUMENTHELPER_HH 24 25 // This document helper is intended to help with operations on 26 // interactive forms. Here are the key things to know: 27 28 // * The PDF specification talks about interactive forms and also 29 // about form XObjects. While form XObjects appear in parts of 30 // interactive forms, this class is concerned about interactive 31 // forms, not form XObjects. 32 // 33 // * Interactive forms are discussed in the PDF Specification (ISO PDF 34 // 32000-1:2008) section 12.7. Also relevant is the section about 35 // Widget annotations. Annotations are discussed in section 12.5 36 // with annotation dictionaries discussed in 12.5.1. Widget 37 // annotations are discussed specifically in section 12.5.6.19. 38 // 39 // * What you need to know about the structure of interactive forms in 40 // PDF files: 41 // 42 // - The document catalog contains the key "/AcroForm" which 43 // contains a list of fields. Fields are represented as a tree 44 // structure much like pages. Nodes in the fields tree may contain 45 // other fields. Fields may inherit values of many of their 46 // attributes from ancestors in the tree. 47 // 48 // - Fields may also have children that are widget annotations. As a 49 // special case, and a cause of considerable confusion, if a field 50 // has a single annotation as a child, the annotation dictionary 51 // may be merged with the field dictionary. In that case, the 52 // field and the annotation are in the same object. Note that, 53 // while field dictionary attributes are inherited, annotation 54 // dictionary attributes are not. 55 // 56 // - A page dictionary contains a key called "/Annots" which 57 // contains a simple list of annotations. For any given annotation 58 // of subtype "/Widget", you should encounter that annotation in 59 // the "/Annots" dictionary of a page, and you should also be able 60 // to reach it by traversing through the "/AcroForm" dictionary 61 // from the document catalog. In the simplest case (and also a 62 // very common case), a form field's widget annotation will be 63 // merged with the field object, and the object will appear 64 // directly both under "/Annots" in the page dictionary and under 65 // "/Fields" in the "/AcroForm" dictionary. In a more complex 66 // case, you may have to trace through various "/Kids" elements in 67 // the "/AcroForm" field entry until you find the annotation 68 // dictionary. 69 70 71 #include <qpdf/QPDFDocumentHelper.hh> 72 73 #include <qpdf/DLL.h> 74 75 #include <qpdf/QPDFAnnotationObjectHelper.hh> 76 #include <qpdf/QPDFFormFieldObjectHelper.hh> 77 #include <qpdf/QPDFPageObjectHelper.hh> 78 79 #include <map> 80 #include <set> 81 #include <vector> 82 83 class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper 84 { 85 public: 86 QPDF_DLL 87 QPDFAcroFormDocumentHelper(QPDF&); 88 QPDF_DLL ~QPDFAcroFormDocumentHelper()89 virtual ~QPDFAcroFormDocumentHelper() 90 { 91 } 92 93 // This class lazily creates an internal cache of the mapping 94 // among form fields, annotations, and pages. Methods within this 95 // class preserve the validity of this cache. However, if you 96 // modify pages' annotation dictionaries, the document's /AcroForm 97 // dictionary, or any form fields manually in a way that alters 98 // the association between forms, fields, annotations, and pages, 99 // it may cause this cache to become invalid. This method marks 100 // the cache invalid and forces it to be regenerated the next time 101 // it is needed. 102 QPDF_DLL 103 void invalidateCache(); 104 105 QPDF_DLL 106 bool hasAcroForm(); 107 108 // Add a form field, initializing the document's AcroForm 109 // dictionary if needed, updating the cache if necessary. Note 110 // that you are adding fields that are copies of other fields, 111 // this method may result in multiple fields existing with the 112 // same qualified name, which can have unexpected side effects. In 113 // that case, you should use addAndRenameFormFields() instead. 114 QPDF_DLL 115 void addFormField(QPDFFormFieldObjectHelper); 116 117 // Add a collection of form fields making sure that their fully 118 // qualified names don't conflict with already present form 119 // fields. Fields within the collection of new fields that have 120 // the same name as each other will continue to do so. 121 QPDF_DLL 122 void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields); 123 124 // Remove fields from the fields array 125 QPDF_DLL 126 void removeFormFields(std::set<QPDFObjGen> const&); 127 128 // Set the name of a field, updating internal records of field 129 // names. Name should be UTF-8 encoded. 130 QPDF_DLL 131 void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); 132 133 // Return a vector of all terminal fields in a document. Terminal 134 // fields are fields that have no children that are also fields. 135 // Terminal fields may still have children that are annotations. 136 // Intermediate nodes in the fields tree are not included in this 137 // list, but you can still reach them through the getParent method 138 // of the field object helper. 139 QPDF_DLL 140 std::vector<QPDFFormFieldObjectHelper> getFormFields(); 141 142 // Return all the form fields that have the given fully-qualified 143 // name and also have an explicit "/T" attribute. For this 144 // information to be accurate, any changes to field names must be 145 // done through setFormFieldName() above. 146 QPDF_DLL 147 std::set<QPDFObjGen> 148 getFieldsWithQualifiedName(std::string const& name); 149 150 // Return the annotations associated with a terminal field. Note 151 // that in the case of a field having a single annotation, the 152 // underlying object will typically be the same as the underlying 153 // object for the field. 154 QPDF_DLL 155 std::vector<QPDFAnnotationObjectHelper> 156 getAnnotationsForField(QPDFFormFieldObjectHelper); 157 158 // Return annotations of subtype /Widget for a page. 159 QPDF_DLL 160 std::vector<QPDFAnnotationObjectHelper> 161 getWidgetAnnotationsForPage(QPDFPageObjectHelper); 162 163 // Return top-level form fields for a page. 164 QPDF_DLL 165 std::vector<QPDFFormFieldObjectHelper> 166 getFormFieldsForPage(QPDFPageObjectHelper); 167 168 // Return the terminal field that is associated with this 169 // annotation. If the annotation dictionary is merged with the 170 // field dictionary, the underlying object will be the same, but 171 // this is not always the case. Note that if you call this method 172 // with an annotation that is not a widget annotation, there will 173 // not be an associated field, and this method will return a 174 // helper associated with a null object (isNull() == true). 175 QPDF_DLL 176 QPDFFormFieldObjectHelper 177 getFieldForAnnotation(QPDFAnnotationObjectHelper); 178 179 // Return the current value of /NeedAppearances. If 180 // /NeedAppearances is missing, return false as that is how PDF 181 // viewers are supposed to interpret it. 182 QPDF_DLL 183 bool getNeedAppearances(); 184 185 // Indicate whether appearance streams must be regenerated. If you 186 // modify a field value, you should call setNeedAppearances(true) 187 // unless you also generate an appearance stream for the 188 // corresponding annotation at the same time. If you generate 189 // appearance streams for all fields, you can call 190 // setNeedAppearances(false). If you use 191 // QPDFFormFieldObjectHelper::setV, it will automatically call 192 // this method unless you tell it not to. 193 QPDF_DLL 194 void setNeedAppearances(bool); 195 196 // If /NeedAppearances is false, do nothing. Otherwise generate 197 // appearance streams for all widget annotations that need them. 198 // See comments in QPDFFormFieldObjectHelper.hh for 199 // generateAppearance for limitations. For checkbox and radio 200 // button fields, this code ensures that appearance state is 201 // consistent with the field's value and uses any pre-existing 202 // appearance streams. 203 QPDF_DLL 204 void generateAppearancesIfNeeded(); 205 206 // Note: this method works on all annotations, not just ones with 207 // associated fields. For each annotation in old_annots, apply the 208 // given transformation matrix to create a new annotation. New 209 // annotations are appended to new_annots. If the annotation is 210 // associated with a form field, a new form field is created that 211 // points to the new annotation and is appended to new_fields, and 212 // the old field is added to old_fields. 213 // 214 // old_annots may belong to a different QPDF object. In that case, 215 // you should pass in from_qpdf, and copyForeignObject will be 216 // called automatically. If this is the case, for efficiency, you 217 // may pass in a QPDFAcroFormDocumentHelper for the other file to 218 // avoid the expensive process of creating one for each call to 219 // transformAnnotations. New fields and annotations are not added 220 // to the document or pages. You have to do that yourself after 221 // calling transformAnnotations. If this operation will leave 222 // orphaned fields behind, such as if you are replacing the old 223 // annotations with the new ones on the same page and the fields 224 // and annotations are not shared, you will also need to remove 225 // the old fields to prevent them from hanging round unreferenced. 226 QPDF_DLL 227 void transformAnnotations( 228 QPDFObjectHandle old_annots, 229 std::vector<QPDFObjectHandle>& new_annots, 230 std::vector<QPDFObjectHandle>& new_fields, 231 std::set<QPDFObjGen>& old_fields, 232 QPDFMatrix const& cm, 233 QPDF* from_qpdf = nullptr, 234 QPDFAcroFormDocumentHelper* from_afdh = nullptr); 235 236 // Copy form fields and annotations from one page to another, 237 // allowing the from page to be in a different QPDF or in the same 238 // QPDF. This would typically be called after calling addPage to 239 // add field/annotation awareness. When just copying the page by 240 // itself, annotations end up being shared, and fields end up 241 // being omitted because there is no reference to the field from 242 // the page. This method ensures that each separate copy of a page 243 // has private annotations and that fields and annotations are 244 // properly updated to resolve conflicts that may occur from 245 // common resource and field names across documents. It is 246 // basically a wrapper around transformAnnotations that handles 247 // updating the receiving page. If new_fields is non-null, any 248 // newly created fields are added to it. 249 QPDF_DLL 250 void fixCopiedAnnotations( 251 QPDFObjectHandle to_page, 252 QPDFObjectHandle from_page, 253 QPDFAcroFormDocumentHelper& from_afdh, 254 std::set<QPDFObjGen>* new_fields = nullptr); 255 256 // copyFieldsFromForeignPage was added in qpdf 10.2 and made to do 257 // nothing in 10.3. It wasn't actually doing the right thing and 258 // would result in broken files in all but the simplest case of a 259 // single page from one file being added to another file, as 260 // happens with qpdf --split-pages. 261 [[deprecated("Use fixCopiedAnnotations instead")]] 262 // ABI: delete this method 263 QPDF_DLL 264 void copyFieldsFromForeignPage( 265 QPDFPageObjectHelper foreign_page, 266 QPDFAcroFormDocumentHelper& foreign_afdh, 267 std::vector<QPDFObjectHandle>* copied_fields = nullptr); 268 269 private: 270 void analyze(); 271 void traverseField(QPDFObjectHandle field, 272 QPDFObjectHandle parent, 273 int depth, std::set<QPDFObjGen>& visited); 274 QPDFObjectHandle getOrCreateAcroForm(); 275 void adjustInheritedFields( 276 QPDFObjectHandle obj, 277 bool override_da, std::string const& from_default_da, 278 bool override_q, int from_default_q); 279 void adjustDefaultAppearances( 280 QPDFObjectHandle obj, 281 std::map<std::string, 282 std::map<std::string, std::string>> const& dr_map); 283 void adjustAppearanceStream( 284 QPDFObjectHandle stream, 285 std::map<std::string, 286 std::map<std::string, std::string>> dr_map); 287 288 class Members 289 { 290 friend class QPDFAcroFormDocumentHelper; 291 292 public: 293 QPDF_DLL 294 ~Members(); 295 296 private: 297 Members(); 298 Members(Members const&); 299 300 bool cache_valid; 301 std::map<QPDFObjGen, 302 std::vector<QPDFAnnotationObjectHelper> 303 > field_to_annotations; 304 std::map<QPDFObjGen, QPDFFormFieldObjectHelper> annotation_to_field; 305 std::map<QPDFObjGen, std::string> field_to_name; 306 std::map<std::string, std::set<QPDFObjGen>> name_to_fields; 307 }; 308 309 PointerHolder<Members> m; 310 }; 311 312 #endif // QPDFACROFORMDOCUMENTHELPER_HH 313