1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <string.h>
5 
6 static pdf_obj *
pdf_lookup_name_imp(fz_context * ctx,pdf_obj * node,pdf_obj * needle)7 pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, pdf_obj *needle)
8 {
9 	pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
10 	pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
11 
12 	if (pdf_is_array(ctx, kids))
13 	{
14 		int l = 0;
15 		int r = pdf_array_len(ctx, kids) - 1;
16 
17 		while (l <= r)
18 		{
19 			int m = (l + r) >> 1;
20 			pdf_obj *kid = pdf_array_get(ctx, kids, m);
21 			pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
22 			pdf_obj *first = pdf_array_get(ctx, limits, 0);
23 			pdf_obj *last = pdf_array_get(ctx, limits, 1);
24 
25 			if (pdf_objcmp(ctx, needle, first) < 0)
26 				r = m - 1;
27 			else if (pdf_objcmp(ctx, needle, last) > 0)
28 				l = m + 1;
29 			else
30 			{
31 				pdf_obj *obj;
32 
33 				if (pdf_mark_obj(ctx, node))
34 					break;
35 				fz_try(ctx)
36 					obj = pdf_lookup_name_imp(ctx, kid, needle);
37 				fz_always(ctx)
38 					pdf_unmark_obj(ctx, node);
39 				fz_catch(ctx)
40 					fz_rethrow(ctx);
41 				return obj;
42 			}
43 		}
44 	}
45 
46 	if (pdf_is_array(ctx, names))
47 	{
48 		int l = 0;
49 		int r = (pdf_array_len(ctx, names) / 2) - 1;
50 
51 		while (l <= r)
52 		{
53 			int m = (l + r) >> 1;
54 			int c;
55 			pdf_obj *key = pdf_array_get(ctx, names, m * 2);
56 			pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
57 
58 			c = pdf_objcmp(ctx, needle, key);
59 			if (c < 0)
60 				r = m - 1;
61 			else if (c > 0)
62 				l = m + 1;
63 			else
64 				return val;
65 		}
66 
67 		/* Spec says names should be sorted (hence the binary search,
68 		 * above), but Acrobat copes with non-sorted. Drop back to a
69 		 * simple search if the binary search fails. */
70 		r = pdf_array_len(ctx, names)/2;
71 		for (l = 0; l < r; l++)
72 			if (!pdf_objcmp(ctx, needle, pdf_array_get(ctx, names, l * 2)))
73 				return pdf_array_get(ctx, names, l * 2 + 1);
74 	}
75 
76 	return NULL;
77 }
78 
79 pdf_obj *
pdf_lookup_name(fz_context * ctx,pdf_document * doc,pdf_obj * which,pdf_obj * needle)80 pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
81 {
82 	pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
83 	pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
84 	pdf_obj *tree = pdf_dict_get(ctx, names, which);
85 	return pdf_lookup_name_imp(ctx, tree, needle);
86 }
87 
88 pdf_obj *
pdf_lookup_dest(fz_context * ctx,pdf_document * doc,pdf_obj * needle)89 pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
90 {
91 	pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
92 	pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
93 	pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
94 	pdf_obj *dest = NULL;
95 
96 	/* PDF 1.1 has destinations in a dictionary */
97 	if (dests)
98 	{
99 		if (pdf_is_name(ctx, needle))
100 			return pdf_dict_get(ctx, dests, needle);
101 		else
102 			return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
103 	}
104 
105 	/* PDF 1.2 has destinations in a name tree */
106 	if (names && !dest)
107 	{
108 		pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
109 		return pdf_lookup_name_imp(ctx, tree, needle);
110 	}
111 
112 	return NULL;
113 }
114 
115 static void
pdf_load_name_tree_imp(fz_context * ctx,pdf_obj * dict,pdf_document * doc,pdf_obj * node)116 pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node)
117 {
118 	pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
119 	pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
120 	int i;
121 
122 	if (kids && !pdf_mark_obj(ctx, node))
123 	{
124 		fz_try(ctx)
125 		{
126 			int len = pdf_array_len(ctx, kids);
127 			for (i = 0; i < len; i++)
128 				pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i));
129 		}
130 		fz_always(ctx)
131 			pdf_unmark_obj(ctx, node);
132 		fz_catch(ctx)
133 			fz_rethrow(ctx);
134 	}
135 
136 	if (names)
137 	{
138 		int len = pdf_array_len(ctx, names);
139 		for (i = 0; i + 1 < len; i += 2)
140 		{
141 			pdf_obj *key = pdf_array_get(ctx, names, i);
142 			pdf_obj *val = pdf_array_get(ctx, names, i + 1);
143 			if (pdf_is_string(ctx, key))
144 			{
145 				key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
146 				fz_try(ctx)
147 					pdf_dict_put(ctx, dict, key, val);
148 				fz_always(ctx)
149 					pdf_drop_obj(ctx, key);
150 				fz_catch(ctx)
151 					fz_rethrow(ctx);
152 			}
153 			else if (pdf_is_name(ctx, key))
154 			{
155 				pdf_dict_put(ctx, dict, key, val);
156 			}
157 		}
158 	}
159 }
160 
161 /* FIXME: fz_try/fz_catch needed here */
162 pdf_obj *
pdf_load_name_tree(fz_context * ctx,pdf_document * doc,pdf_obj * which)163 pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
164 {
165 	pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
166 	pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
167 	pdf_obj *tree = pdf_dict_get(ctx, names, which);
168 	if (pdf_is_dict(ctx, tree))
169 	{
170 		pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
171 		pdf_load_name_tree_imp(ctx, dict, doc, tree);
172 		return dict;
173 	}
174 	return NULL;
175 }
176 
177 pdf_obj *
pdf_lookup_number(fz_context * ctx,pdf_obj * node,int needle)178 pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
179 {
180 	pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
181 	pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
182 
183 	if (pdf_is_array(ctx, kids))
184 	{
185 		int l = 0;
186 		int r = pdf_array_len(ctx, kids) - 1;
187 
188 		while (l <= r)
189 		{
190 			int m = (l + r) >> 1;
191 			pdf_obj *kid = pdf_array_get(ctx, kids, m);
192 			pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
193 			int first = pdf_to_int(ctx, pdf_array_get(ctx, limits, 0));
194 			int last = pdf_to_int(ctx, pdf_array_get(ctx, limits, 1));
195 
196 			if (needle < first)
197 				r = m - 1;
198 			else if (needle > last)
199 				l = m + 1;
200 			else
201 			{
202 				pdf_obj *obj;
203 
204 				if (pdf_mark_obj(ctx, node))
205 					break;
206 				fz_try(ctx)
207 					obj = pdf_lookup_number(ctx, kid, needle);
208 				fz_always(ctx)
209 					pdf_unmark_obj(ctx, node);
210 				fz_catch(ctx)
211 					fz_rethrow(ctx);
212 				return obj;
213 			}
214 		}
215 	}
216 
217 	if (pdf_is_array(ctx, nums))
218 	{
219 		pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
220 		int l = 0;
221 		int r = (pdf_array_len(ctx, nums) / 2) - 1;
222 
223 		while (l <= r)
224 		{
225 			int m = (l + r) >> 1;
226 			int key = pdf_to_int(ctx, pdf_array_get(ctx, nums, m * 2));
227 			pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
228 
229 			if (needle < key)
230 				r = m - 1;
231 			else if (needle > key)
232 				l = m + 1;
233 			else
234 				return val;
235 		}
236 
237 		/* Parallel the nametree lookup above by allowing for non-sorted lists. */
238 		r = pdf_array_len(ctx, nums)/2;
239 		for (l = 0; l < r; l++)
240 			if (needle == pdf_to_int(ctx, pdf_array_get(ctx, nums, l * 2)))
241 				return pdf_array_get(ctx, nums, l * 2 + 1);
242 	}
243 
244 	return NULL;
245 }
246 
247 static void
pdf_walk_tree_kid(fz_context * ctx,pdf_obj * obj,pdf_obj * kid_name,void (* arrive)(fz_context *,pdf_obj *,void *,pdf_obj **),void (* leave)(fz_context *,pdf_obj *,void *),void * arg,pdf_obj ** inherit_names,pdf_obj ** inherit_vals)248 pdf_walk_tree_kid(fz_context *ctx,
249 			pdf_obj *obj,
250 			pdf_obj *kid_name,
251 			void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
252 			void (*leave)(fz_context *, pdf_obj *, void *),
253 			void *arg,
254 			pdf_obj **inherit_names,
255 			pdf_obj **inherit_vals)
256 {
257 	pdf_obj **new_vals = NULL;
258 
259 	if (obj == NULL || pdf_mark_obj(ctx, obj))
260 		return;
261 
262 	fz_var(new_vals);
263 
264 	fz_try(ctx)
265 	{
266 		/* First we run through the names we've been asked to collect
267 		 * inherited values for updating the values. */
268 		if (inherit_names != NULL)
269 		{
270 			int i, n;
271 
272 			for (n = 0; inherit_names[n] != NULL; n++);
273 
274 			for (i = 0; i < n; i++)
275 			{
276 				pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]);
277 				if (v != NULL)
278 				{
279 					if (new_vals == NULL)
280 					{
281 						new_vals = fz_malloc_array(ctx, n, pdf_obj *);
282 						memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *));
283 						inherit_vals = new_vals;
284 					}
285 					inherit_vals[i] = v;
286 				}
287 			}
288 		}
289 
290 		if (arrive)
291 			arrive(ctx, obj, arg, inherit_vals);
292 		pdf_walk_tree(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals);
293 		if (leave)
294 			leave(ctx, obj, arg);
295 	}
296 	fz_always(ctx)
297 	{
298 		fz_free(ctx, new_vals);
299 		pdf_unmark_obj(ctx, obj);
300 	}
301 	fz_catch(ctx)
302 		fz_rethrow(ctx);
303 }
304 
pdf_walk_tree(fz_context * ctx,pdf_obj * obj,pdf_obj * kid_name,void (* arrive)(fz_context *,pdf_obj *,void *,pdf_obj **),void (* leave)(fz_context *,pdf_obj *,void *),void * arg,pdf_obj ** inherit_names,pdf_obj ** inherit_vals)305 void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
306 			void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
307 			void (*leave)(fz_context *, pdf_obj *, void *),
308 			void *arg,
309 			pdf_obj **inherit_names,
310 			pdf_obj **inherit_vals)
311 {
312 	if (obj == NULL || pdf_mark_obj(ctx, obj))
313 		return;
314 
315 	fz_try(ctx)
316 	{
317 		if (pdf_is_array(ctx, obj))
318 		{
319 			int i, n = pdf_array_len(ctx, obj);
320 			for (i = 0; i < n; i++)
321 				pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals);
322 		}
323 		else
324 		{
325 			pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals);
326 		}
327 	}
328 	fz_always(ctx)
329 		pdf_unmark_obj(ctx, obj);
330 	fz_catch(ctx)
331 		fz_rethrow(ctx);
332 }
333