1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/pdf/SkPDFDocumentPriv.h"
9 #include "src/pdf/SkPDFTag.h"
10
11 // Table 333 in PDF 32000-1:2008
tag_name_from_type(SkPDF::DocumentStructureType type)12 static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
13 switch (type) {
14 #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
15 M(Document);
16 M(Part);
17 M(Art);
18 M(Sect);
19 M(Div);
20 M(BlockQuote);
21 M(Caption);
22 M(TOC);
23 M(TOCI);
24 M(Index);
25 M(NonStruct);
26 M(Private);
27 M(H);
28 M(H1);
29 M(H2);
30 M(H3);
31 M(H4);
32 M(H5);
33 M(H6);
34 M(P);
35 M(L);
36 M(LI);
37 M(Lbl);
38 M(LBody);
39 M(Table);
40 M(TR);
41 M(TH);
42 M(TD);
43 M(THead);
44 M(TBody);
45 M(TFoot);
46 M(Span);
47 M(Quote);
48 M(Note);
49 M(Reference);
50 M(BibEntry);
51 M(Code);
52 M(Link);
53 M(Annot);
54 M(Ruby);
55 M(RB);
56 M(RT);
57 M(RP);
58 M(Warichu);
59 M(WT);
60 M(WP);
61 M(Figure);
62 M(Formula);
63 M(Form);
64 #undef M
65 }
66 SK_ABORT("bad tag");
67 }
68
69 struct SkPDFTagNode {
70 SkPDFTagNode* fChildren = nullptr;
71 size_t fChildCount = 0;
72 struct MarkedContentInfo {
73 unsigned fPageIndex;
74 int fMarkId;
75 };
76 SkTArray<MarkedContentInfo> fMarkedContent;
77 int fNodeId;
78 SkPDF::DocumentStructureType fType;
79 SkPDFIndirectReference fRef;
80 enum State {
81 kUnknown,
82 kYes,
83 kNo,
84 } fCanDiscard = kUnknown;
85 };
86
SkPDFTagTree()87 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
88
89 SkPDFTagTree::~SkPDFTagTree() = default;
90
copy(const SkPDF::StructureElementNode & node,SkPDFTagNode * dst,SkArenaAlloc * arena,SkTHashMap<int,SkPDFTagNode * > * nodeMap)91 static void copy(const SkPDF::StructureElementNode& node,
92 SkPDFTagNode* dst,
93 SkArenaAlloc* arena,
94 SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
95 nodeMap->set(node.fNodeId, dst);
96 size_t childCount = node.fChildCount;
97 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
98 dst->fChildCount = childCount;
99 dst->fNodeId = node.fNodeId;
100 dst->fType = node.fType;
101 dst->fChildren = children;
102 for (size_t i = 0; i < childCount; ++i) {
103 copy(node.fChildren[i], &children[i], arena, nodeMap);
104 }
105 }
106
init(const SkPDF::StructureElementNode * node)107 void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
108 if (node) {
109 fRoot = fArena.make<SkPDFTagNode>();
110 copy(*node, fRoot, &fArena, &fNodeMap);
111 }
112 }
113
reset()114 void SkPDFTagTree::reset() {
115 fArena.reset();
116 fNodeMap.reset();
117 fMarksPerPage.reset();
118 fRoot = nullptr;
119 }
120
getMarkIdForNodeId(int nodeId,unsigned pageIndex)121 int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
122 if (!fRoot) {
123 return -1;
124 }
125 SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
126 if (!tagPtr) {
127 return -1;
128 }
129 SkPDFTagNode* tag = *tagPtr;
130 SkASSERT(tag);
131 while (fMarksPerPage.size() < pageIndex + 1) {
132 fMarksPerPage.push_back();
133 }
134 SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
135 int markId = pageMarks.count();
136 tag->fMarkedContent.push_back({pageIndex, markId});
137 pageMarks.push_back(tag);
138 return markId;
139 }
140
can_discard(SkPDFTagNode * node)141 static bool can_discard(SkPDFTagNode* node) {
142 if (node->fCanDiscard == SkPDFTagNode::kYes) {
143 return true;
144 }
145 if (node->fCanDiscard == SkPDFTagNode::kNo) {
146 return false;
147 }
148 if (!node->fMarkedContent.empty()) {
149 node->fCanDiscard = SkPDFTagNode::kNo;
150 return false;
151 }
152 for (size_t i = 0; i < node->fChildCount; ++i) {
153 if (!can_discard(&node->fChildren[i])) {
154 node->fCanDiscard = SkPDFTagNode::kNo;
155 return false;
156 }
157 }
158 node->fCanDiscard = SkPDFTagNode::kYes;
159 return true;
160 }
161
162
prepare_tag_tree_to_emit(SkPDFIndirectReference parent,SkPDFTagNode * node,SkPDFDocument * doc)163 SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
164 SkPDFTagNode* node,
165 SkPDFDocument* doc) {
166 SkPDFIndirectReference ref = doc->reserveRef();
167 std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
168 SkPDFTagNode* children = node->fChildren;
169 size_t childCount = node->fChildCount;
170 for (size_t i = 0; i < childCount; ++i) {
171 SkPDFTagNode* child = &children[i];
172 if (!(can_discard(child))) {
173 kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
174 }
175 }
176 for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
177 std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
178 mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
179 mcr->insertInt("MCID", info.fMarkId);
180 kids->appendObject(std::move(mcr));
181 }
182 node->fRef = ref;
183 SkPDFDict dict("StructElem");
184 dict.insertName("S", tag_name_from_type(node->fType));
185 dict.insertRef("P", parent);
186 dict.insertObject("K", std::move(kids));
187 return doc->emit(dict, ref);
188 }
189
makeStructTreeRoot(SkPDFDocument * doc)190 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
191 if (!fRoot) {
192 return SkPDFIndirectReference();
193 }
194 if (can_discard(fRoot)) {
195 SkDEBUGFAIL("PDF has tag tree but no marked content.");
196 }
197 SkPDFIndirectReference ref = doc->reserveRef();
198
199 unsigned pageCount = SkToUInt(doc->pageCount());
200
201 // Build the StructTreeRoot.
202 SkPDFDict structTreeRoot("StructTreeRoot");
203 structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
204 structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
205
206 // Build the parent tree, which is a mapping from the marked
207 // content IDs on each page to their corressponding tags.
208 SkPDFDict parentTree("ParentTree");
209 auto parentTreeNums = SkPDFMakeArray();
210
211 SkASSERT(fMarksPerPage.size() <= pageCount);
212 for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
213 const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
214 SkPDFArray markToTagArray;
215 for (SkPDFTagNode* mark : pageMarks) {
216 SkASSERT(mark->fRef);
217 markToTagArray.appendRef(mark->fRef);
218 }
219 parentTreeNums->appendInt(j);
220 parentTreeNums->appendRef(doc->emit(markToTagArray));
221 }
222 parentTree.insertObject("Nums", std::move(parentTreeNums));
223 structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
224 return doc->emit(structTreeRoot, ref);
225 }
226
227