1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3
4 #include <string.h>
5
6 static pdf_obj *
pdf_lookup_name_imp(fz_context * ctx,pdf_obj * node,pdf_obj * needle)7 pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, pdf_obj *needle)
8 {
9 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
10 pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
11
12 if (pdf_is_array(ctx, kids))
13 {
14 int l = 0;
15 int r = pdf_array_len(ctx, kids) - 1;
16
17 while (l <= r)
18 {
19 int m = (l + r) >> 1;
20 pdf_obj *kid = pdf_array_get(ctx, kids, m);
21 pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
22 pdf_obj *first = pdf_array_get(ctx, limits, 0);
23 pdf_obj *last = pdf_array_get(ctx, limits, 1);
24
25 if (pdf_objcmp(ctx, needle, first) < 0)
26 r = m - 1;
27 else if (pdf_objcmp(ctx, needle, last) > 0)
28 l = m + 1;
29 else
30 {
31 pdf_obj *obj;
32
33 if (pdf_mark_obj(ctx, node))
34 break;
35 fz_try(ctx)
36 obj = pdf_lookup_name_imp(ctx, kid, needle);
37 fz_always(ctx)
38 pdf_unmark_obj(ctx, node);
39 fz_catch(ctx)
40 fz_rethrow(ctx);
41 return obj;
42 }
43 }
44 }
45
46 if (pdf_is_array(ctx, names))
47 {
48 int l = 0;
49 int r = (pdf_array_len(ctx, names) / 2) - 1;
50
51 while (l <= r)
52 {
53 int m = (l + r) >> 1;
54 int c;
55 pdf_obj *key = pdf_array_get(ctx, names, m * 2);
56 pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
57
58 c = pdf_objcmp(ctx, needle, key);
59 if (c < 0)
60 r = m - 1;
61 else if (c > 0)
62 l = m + 1;
63 else
64 return val;
65 }
66
67 /* Spec says names should be sorted (hence the binary search,
68 * above), but Acrobat copes with non-sorted. Drop back to a
69 * simple search if the binary search fails. */
70 r = pdf_array_len(ctx, names)/2;
71 for (l = 0; l < r; l++)
72 if (!pdf_objcmp(ctx, needle, pdf_array_get(ctx, names, l * 2)))
73 return pdf_array_get(ctx, names, l * 2 + 1);
74 }
75
76 return NULL;
77 }
78
79 pdf_obj *
pdf_lookup_name(fz_context * ctx,pdf_document * doc,pdf_obj * which,pdf_obj * needle)80 pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
81 {
82 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
83 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
84 pdf_obj *tree = pdf_dict_get(ctx, names, which);
85 return pdf_lookup_name_imp(ctx, tree, needle);
86 }
87
88 pdf_obj *
pdf_lookup_dest(fz_context * ctx,pdf_document * doc,pdf_obj * needle)89 pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
90 {
91 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
92 pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
93 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
94 pdf_obj *dest = NULL;
95
96 /* PDF 1.1 has destinations in a dictionary */
97 if (dests)
98 {
99 if (pdf_is_name(ctx, needle))
100 return pdf_dict_get(ctx, dests, needle);
101 else
102 return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
103 }
104
105 /* PDF 1.2 has destinations in a name tree */
106 if (names && !dest)
107 {
108 pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
109 return pdf_lookup_name_imp(ctx, tree, needle);
110 }
111
112 return NULL;
113 }
114
115 static void
pdf_load_name_tree_imp(fz_context * ctx,pdf_obj * dict,pdf_document * doc,pdf_obj * node)116 pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node)
117 {
118 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
119 pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
120 int i;
121
122 if (kids && !pdf_mark_obj(ctx, node))
123 {
124 fz_try(ctx)
125 {
126 int len = pdf_array_len(ctx, kids);
127 for (i = 0; i < len; i++)
128 pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i));
129 }
130 fz_always(ctx)
131 pdf_unmark_obj(ctx, node);
132 fz_catch(ctx)
133 fz_rethrow(ctx);
134 }
135
136 if (names)
137 {
138 int len = pdf_array_len(ctx, names);
139 for (i = 0; i + 1 < len; i += 2)
140 {
141 pdf_obj *key = pdf_array_get(ctx, names, i);
142 pdf_obj *val = pdf_array_get(ctx, names, i + 1);
143 if (pdf_is_string(ctx, key))
144 {
145 key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
146 fz_try(ctx)
147 pdf_dict_put(ctx, dict, key, val);
148 fz_always(ctx)
149 pdf_drop_obj(ctx, key);
150 fz_catch(ctx)
151 fz_rethrow(ctx);
152 }
153 else if (pdf_is_name(ctx, key))
154 {
155 pdf_dict_put(ctx, dict, key, val);
156 }
157 }
158 }
159 }
160
161 /* FIXME: fz_try/fz_catch needed here */
162 pdf_obj *
pdf_load_name_tree(fz_context * ctx,pdf_document * doc,pdf_obj * which)163 pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
164 {
165 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
166 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
167 pdf_obj *tree = pdf_dict_get(ctx, names, which);
168 if (pdf_is_dict(ctx, tree))
169 {
170 pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
171 pdf_load_name_tree_imp(ctx, dict, doc, tree);
172 return dict;
173 }
174 return NULL;
175 }
176
177 pdf_obj *
pdf_lookup_number(fz_context * ctx,pdf_obj * node,int needle)178 pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
179 {
180 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
181 pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
182
183 if (pdf_is_array(ctx, kids))
184 {
185 int l = 0;
186 int r = pdf_array_len(ctx, kids) - 1;
187
188 while (l <= r)
189 {
190 int m = (l + r) >> 1;
191 pdf_obj *kid = pdf_array_get(ctx, kids, m);
192 pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
193 int first = pdf_to_int(ctx, pdf_array_get(ctx, limits, 0));
194 int last = pdf_to_int(ctx, pdf_array_get(ctx, limits, 1));
195
196 if (needle < first)
197 r = m - 1;
198 else if (needle > last)
199 l = m + 1;
200 else
201 {
202 pdf_obj *obj;
203
204 if (pdf_mark_obj(ctx, node))
205 break;
206 fz_try(ctx)
207 obj = pdf_lookup_number(ctx, kid, needle);
208 fz_always(ctx)
209 pdf_unmark_obj(ctx, node);
210 fz_catch(ctx)
211 fz_rethrow(ctx);
212 return obj;
213 }
214 }
215 }
216
217 if (pdf_is_array(ctx, nums))
218 {
219 pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
220 int l = 0;
221 int r = (pdf_array_len(ctx, nums) / 2) - 1;
222
223 while (l <= r)
224 {
225 int m = (l + r) >> 1;
226 int key = pdf_to_int(ctx, pdf_array_get(ctx, nums, m * 2));
227 pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
228
229 if (needle < key)
230 r = m - 1;
231 else if (needle > key)
232 l = m + 1;
233 else
234 return val;
235 }
236
237 /* Parallel the nametree lookup above by allowing for non-sorted lists. */
238 r = pdf_array_len(ctx, nums)/2;
239 for (l = 0; l < r; l++)
240 if (needle == pdf_to_int(ctx, pdf_array_get(ctx, nums, l * 2)))
241 return pdf_array_get(ctx, nums, l * 2 + 1);
242 }
243
244 return NULL;
245 }
246
247 static void
pdf_walk_tree_kid(fz_context * ctx,pdf_obj * obj,pdf_obj * kid_name,void (* arrive)(fz_context *,pdf_obj *,void *,pdf_obj **),void (* leave)(fz_context *,pdf_obj *,void *),void * arg,pdf_obj ** inherit_names,pdf_obj ** inherit_vals)248 pdf_walk_tree_kid(fz_context *ctx,
249 pdf_obj *obj,
250 pdf_obj *kid_name,
251 void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
252 void (*leave)(fz_context *, pdf_obj *, void *),
253 void *arg,
254 pdf_obj **inherit_names,
255 pdf_obj **inherit_vals)
256 {
257 pdf_obj **new_vals = NULL;
258
259 if (obj == NULL || pdf_mark_obj(ctx, obj))
260 return;
261
262 fz_var(new_vals);
263
264 fz_try(ctx)
265 {
266 /* First we run through the names we've been asked to collect
267 * inherited values for updating the values. */
268 if (inherit_names != NULL)
269 {
270 int i, n;
271
272 for (n = 0; inherit_names[n] != NULL; n++);
273
274 for (i = 0; i < n; i++)
275 {
276 pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]);
277 if (v != NULL)
278 {
279 if (new_vals == NULL)
280 {
281 new_vals = fz_malloc_array(ctx, n, pdf_obj *);
282 memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *));
283 inherit_vals = new_vals;
284 }
285 inherit_vals[i] = v;
286 }
287 }
288 }
289
290 if (arrive)
291 arrive(ctx, obj, arg, inherit_vals);
292 pdf_walk_tree(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals);
293 if (leave)
294 leave(ctx, obj, arg);
295 }
296 fz_always(ctx)
297 {
298 fz_free(ctx, new_vals);
299 pdf_unmark_obj(ctx, obj);
300 }
301 fz_catch(ctx)
302 fz_rethrow(ctx);
303 }
304
pdf_walk_tree(fz_context * ctx,pdf_obj * obj,pdf_obj * kid_name,void (* arrive)(fz_context *,pdf_obj *,void *,pdf_obj **),void (* leave)(fz_context *,pdf_obj *,void *),void * arg,pdf_obj ** inherit_names,pdf_obj ** inherit_vals)305 void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
306 void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
307 void (*leave)(fz_context *, pdf_obj *, void *),
308 void *arg,
309 pdf_obj **inherit_names,
310 pdf_obj **inherit_vals)
311 {
312 if (obj == NULL || pdf_mark_obj(ctx, obj))
313 return;
314
315 fz_try(ctx)
316 {
317 if (pdf_is_array(ctx, obj))
318 {
319 int i, n = pdf_array_len(ctx, obj);
320 for (i = 0; i < n; i++)
321 pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals);
322 }
323 else
324 {
325 pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals);
326 }
327 }
328 fz_always(ctx)
329 pdf_unmark_obj(ctx, obj);
330 fz_catch(ctx)
331 fz_rethrow(ctx);
332 }
333