1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <assert.h>
5 #include <string.h>
6 
7 #undef CHECK_SPLAY
8 #undef DUMP_SPLAY
9 
10 /*
11  * Allocate, destroy and simple parameters.
12  */
13 
14 void
pdf_drop_cmap_imp(fz_context * ctx,fz_storable * cmap_)15 pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_)
16 {
17 	pdf_cmap *cmap = (pdf_cmap *)cmap_;
18 	pdf_drop_cmap(ctx, cmap->usecmap);
19 	fz_free(ctx, cmap->ranges);
20 	fz_free(ctx, cmap->xranges);
21 	fz_free(ctx, cmap->mranges);
22 	fz_free(ctx, cmap->dict);
23 	fz_free(ctx, cmap->tree);
24 	fz_free(ctx, cmap);
25 }
26 
27 pdf_cmap *
pdf_new_cmap(fz_context * ctx)28 pdf_new_cmap(fz_context *ctx)
29 {
30 	pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap);
31 	FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp);
32 	return cmap;
33 }
34 
35 /* Could be a macro for speed */
36 pdf_cmap *
pdf_keep_cmap(fz_context * ctx,pdf_cmap * cmap)37 pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
38 {
39 	return fz_keep_storable(ctx, &cmap->storable);
40 }
41 
42 /* Could be a macro for speed */
43 void
pdf_drop_cmap(fz_context * ctx,pdf_cmap * cmap)44 pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
45 {
46 	fz_drop_storable(ctx, &cmap->storable);
47 }
48 
49 void
pdf_set_usecmap(fz_context * ctx,pdf_cmap * cmap,pdf_cmap * usecmap)50 pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
51 {
52 	int i;
53 
54 	pdf_drop_cmap(ctx, cmap->usecmap);
55 	cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
56 
57 	if (cmap->codespace_len == 0)
58 	{
59 		cmap->codespace_len = usecmap->codespace_len;
60 		for (i = 0; i < usecmap->codespace_len; i++)
61 			cmap->codespace[i] = usecmap->codespace[i];
62 	}
63 }
64 
65 int
pdf_cmap_wmode(fz_context * ctx,pdf_cmap * cmap)66 pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
67 {
68 	return cmap->wmode;
69 }
70 
71 void
pdf_set_cmap_wmode(fz_context * ctx,pdf_cmap * cmap,int wmode)72 pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
73 {
74 	cmap->wmode = wmode;
75 }
76 
77 void
pdf_add_codespace(fz_context * ctx,pdf_cmap * cmap,unsigned int low,unsigned int high,size_t n)78 pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n)
79 {
80 	if (cmap->codespace_len + 1 == nelem(cmap->codespace))
81 	{
82 		fz_warn(ctx, "assert: too many code space ranges");
83 		return;
84 	}
85 
86 	if ((uint32_t)n != n)
87 	{
88 		fz_warn(ctx, "assert: code space range too large");
89 		return;
90 	}
91 
92 	cmap->codespace[cmap->codespace_len].n = (int)n;
93 	cmap->codespace[cmap->codespace_len].low = low;
94 	cmap->codespace[cmap->codespace_len].high = high;
95 	cmap->codespace_len ++;
96 }
97 
98 struct cmap_splay {
99 	unsigned int low;
100 	unsigned int high;
101 	unsigned int out;
102 	unsigned int left;
103 	unsigned int right;
104 	unsigned int parent : 31;
105 	unsigned int many : 1;
106 };
107 
108 #define EMPTY ((unsigned int)0x40000000)
109 
110 /*
111 	The splaying steps used:
112 
113 	Case 1:	|              z              x
114 		|          y       D  =>  A       y
115 		|        x   C                  B   z
116 		|       A B                        C D
117 
118 	Case 2:	|         z              x
119 		|     y       D  =>   y     z
120 		|   A   x            A B   C D
121 		|  B C
122 
123 	Case 3:	|     y                 x
124 		|  x     C       =>   A     y
125 		| A B                      B C
126 */
127 
128 static void
move_to_root(cmap_splay * tree,unsigned int x)129 move_to_root(cmap_splay *tree, unsigned int x)
130 {
131 	if (x == EMPTY)
132 		return;
133 	do
134 	{
135 		unsigned int z, zp;
136 		unsigned int y = tree[x].parent;
137 		if (y == EMPTY)
138 			break;
139 		z = tree[y].parent;
140 		if (z == EMPTY)
141 		{
142 			/* Case 3 */
143 			tree[x].parent = EMPTY;
144 			tree[y].parent = x;
145 			if (tree[y].left == x)
146 			{
147 				/* Case 3 */
148 				tree[y].left = tree[x].right;
149 				if (tree[y].left != EMPTY)
150 					tree[tree[y].left].parent = y;
151 				tree[x].right = y;
152 			}
153 			else
154 			{
155 				/* Case 3 - reflected */
156 				assert(tree[y].right == x);
157 				tree[y].right = tree[x].left;
158 				if (tree[y].right != EMPTY)
159 					tree[tree[y].right].parent = y;
160 				tree[x].left = y;
161 			}
162 			break;
163 		}
164 
165 		zp = tree[z].parent;
166 		tree[x].parent = zp;
167 		if (zp != EMPTY) {
168 			if (tree[zp].left == z)
169 				tree[zp].left = x;
170 			else
171 			{
172 				assert(tree[zp].right == z);
173 				tree[zp].right = x;
174 			}
175 		}
176 		tree[y].parent = x;
177 		if (tree[y].left == x)
178 		{
179 			tree[y].left = tree[x].right;
180 			if (tree[y].left != EMPTY)
181 				tree[tree[y].left].parent = y;
182 			tree[x].right = y;
183 			if (tree[z].left == y)
184 			{
185 				/* Case 1 */
186 				tree[z].parent = y;
187 				tree[z].left = tree[y].right;
188 				if (tree[z].left != EMPTY)
189 					tree[tree[z].left].parent = z;
190 				tree[y].right = z;
191 			}
192 			else
193 			{
194 				/* Case 2 - reflected */
195 				assert(tree[z].right == y);
196 				tree[z].parent = x;
197 				tree[z].right = tree[x].left;
198 				if (tree[z].right != EMPTY)
199 					tree[tree[z].right].parent = z;
200 				tree[x].left = z;
201 			}
202 		}
203 		else
204 		{
205 			assert(tree[y].right == x);
206 			tree[y].right = tree[x].left;
207 			if (tree[y].right != EMPTY)
208 				tree[tree[y].right].parent = y;
209 			tree[x].left = y;
210 			if (tree[z].left == y)
211 			{
212 				/* Case 2 */
213 				tree[z].parent = x;
214 				tree[z].left = tree[x].right;
215 				if (tree[z].left != EMPTY)
216 					tree[tree[z].left].parent = z;
217 				tree[x].right = z;
218 			}
219 			else
220 			{
221 				/* Case 1 - reflected */
222 				assert(tree[z].right == y);
223 				tree[z].parent = y;
224 				tree[z].right = tree[y].left;
225 				if (tree[z].right != EMPTY)
226 					tree[tree[z].right].parent = z;
227 				tree[y].left = z;
228 			}
229 		}
230 	} while (1);
231 }
232 
delete_node(pdf_cmap * cmap,unsigned int current)233 static unsigned int delete_node(pdf_cmap *cmap, unsigned int current)
234 {
235 	cmap_splay *tree = cmap->tree;
236 	unsigned int parent;
237 	unsigned int replacement;
238 
239 	assert(current != EMPTY);
240 
241 	parent = tree[current].parent;
242 	if (tree[current].right == EMPTY)
243 	{
244 		if (parent == EMPTY)
245 		{
246 			replacement = cmap->ttop = tree[current].left;
247 		}
248 		else if (tree[parent].left == current)
249 		{
250 			replacement = tree[parent].left = tree[current].left;
251 		}
252 		else
253 		{
254 			assert(tree[parent].right == current);
255 			replacement = tree[parent].right = tree[current].left;
256 		}
257 		if (replacement != EMPTY)
258 			tree[replacement].parent = parent;
259 		else
260 			replacement = parent;
261 	}
262 	else if (tree[current].left == EMPTY)
263 	{
264 		if (parent == EMPTY)
265 		{
266 			replacement = cmap->ttop = tree[current].right;
267 		}
268 		else if (tree[parent].left == current)
269 		{
270 			replacement = tree[parent].left = tree[current].right;
271 		}
272 		else
273 		{
274 			assert(tree[parent].right == current);
275 			replacement = tree[parent].right = tree[current].right;
276 		}
277 		if (replacement != EMPTY)
278 			tree[replacement].parent = parent;
279 		else
280 			replacement = parent;
281 	}
282 	else
283 	{
284 		/* Hard case, find the in-order predecessor of current */
285 		unsigned int amputee = current;
286 		replacement = tree[current].left;
287 		while (tree[replacement].right != EMPTY) {
288 			amputee = replacement;
289 			replacement = tree[replacement].right;
290 		}
291 		/* Remove replacement from the tree */
292 		if (amputee == current)
293 		{
294 			tree[amputee].left = tree[replacement].left;
295 			if (tree[amputee].left != EMPTY)
296 				tree[tree[amputee].left].parent = amputee;
297 		}
298 		else
299 		{
300 			tree[amputee].right = tree[replacement].left;
301 			if (tree[amputee].right != EMPTY)
302 				tree[tree[amputee].right].parent = amputee;
303 		}
304 		/* Insert replacement in place of current */
305 		tree[replacement].parent = parent;
306 		if (parent == EMPTY)
307 		{
308 			tree[replacement].parent = EMPTY;
309 			cmap->ttop = replacement;
310 		}
311 		else if (tree[parent].left == current)
312 			tree[parent].left = replacement;
313 		else
314 		{
315 			assert(tree[parent].right == current);
316 			tree[parent].right = replacement;
317 		}
318 		tree[replacement].left = tree[current].left;
319 		if (tree[replacement].left != EMPTY)
320 			tree[tree[replacement].left].parent = replacement;
321 		tree[replacement].right = tree[current].right;
322 		if (tree[replacement].right != EMPTY)
323 			tree[tree[replacement].right].parent = replacement;
324 	}
325 
326 	/* current is now unlinked. We need to remove it from our array. */
327 	cmap->tlen--;
328 	if (current != (unsigned int) cmap->tlen)
329 	{
330 		if (replacement == (unsigned int) cmap->tlen)
331 			replacement = current;
332 		tree[current] = tree[cmap->tlen];
333 		parent = tree[current].parent;
334 		if (parent == EMPTY)
335 			cmap->ttop = current;
336 		else if (tree[parent].left == (unsigned int) cmap->tlen)
337 			tree[parent].left = current;
338 		else
339 		{
340 			assert(tree[parent].right == (unsigned int) cmap->tlen);
341 			tree[parent].right = current;
342 		}
343 		if (tree[current].left != EMPTY)
344 		{
345 			assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen);
346 			tree[tree[current].left].parent = current;
347 		}
348 		if (tree[current].right != EMPTY)
349 		{
350 			assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen);
351 			tree[tree[current].right].parent = current;
352 		}
353 	}
354 
355 	/* Return the node that we should continue searching from */
356 	return replacement;
357 }
358 
359 #ifdef DUMP_SPLAY
360 static void
dump_splay(cmap_splay * tree,unsigned int node,int depth,const char * pre)361 dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre)
362 {
363 	int i;
364 
365 	if (tree == NULL || node == EMPTY)
366 		return;
367 
368 	for (i = 0; i < depth; i++)
369 		fprintf(stderr, " ");
370 	fprintf(stderr, "%s%d:", pre, node);
371 	if (tree[node].parent == EMPTY)
372 		fprintf(stderr, "^EMPTY");
373 	else
374 		fprintf(stderr, "^%d", tree[node].parent);
375 	if (tree[node].left == EMPTY)
376 		fprintf(stderr, "<EMPTY");
377 	else
378 		fprintf(stderr, "<%d", tree[node].left);
379 	if (tree[node].right == EMPTY)
380 		fprintf(stderr, ">EMPTY");
381 	else
382 		fprintf(stderr, ">%d", tree[node].right);
383 	fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many);
384 	assert(tree[node].parent == EMPTY || depth);
385 	assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node);
386 	assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node);
387 	dump_splay(tree, tree[node].left, depth+1, "L");
388 	dump_splay(tree, tree[node].right, depth+1, "R");
389 }
390 #endif
391 
392 enum
393 {
394 	TOP = 0,
395 	LEFT = 1,
396 	RIGHT = 2
397 };
398 
walk_splay(cmap_splay * tree,unsigned int node,void (* fn)(cmap_splay *,void *),void * arg)399 static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg)
400 {
401 	int from = TOP;
402 
403 	while (node != EMPTY)
404 	{
405 		switch (from)
406 		{
407 		case TOP:
408 			if (tree[node].left != EMPTY)
409 			{
410 				node = tree[node].left;
411 				from = TOP;
412 				break;
413 			}
414 			/* fallthrough */
415 		case LEFT:
416 			fn(&tree[node], arg);
417 			if (tree[node].right != EMPTY)
418 			{
419 				node = tree[node].right;
420 				from = TOP;
421 				break;
422 			}
423 			/* fallthrough */
424 		case RIGHT:
425 			{
426 				unsigned int parent = tree[node].parent;
427 				if (parent == EMPTY)
428 					return;
429 				if (tree[parent].left == node)
430 					from = LEFT;
431 				else
432 				{
433 					assert(tree[parent].right == node);
434 					from = RIGHT;
435 				}
436 				node = parent;
437 			}
438 		}
439 	}
440 }
441 
442 #ifdef CHECK_SPLAY
443 
444 static int
tree_has_overlap(cmap_splay * tree,int node,int low,int high)445 tree_has_overlap(cmap_splay *tree, int node, int low, int high)
446 {
447 	if (tree[node].left != EMPTY)
448 		if (tree_has_overlap(tree, tree[node].left, low, high))
449 			return 1;
450 	if (tree[node].right != EMPTY)
451 		if (tree_has_overlap(tree, tree[node].right, low, high))
452 			return 1;
453 	return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high);
454 }
455 
456 static void
do_check(cmap_splay * node,void * arg)457 do_check(cmap_splay *node, void *arg)
458 {
459 	cmap_splay *tree = arg;
460 	unsigned int num = node - tree;
461 	assert(!node->many || node->low == node->high);
462 	assert(node->low <= node->high);
463 	assert((node->left == EMPTY) || (tree[node->left].parent == num &&
464 		tree[node->left].high < node->low));
465 	assert(node->right == EMPTY || (tree[node->right].parent == num &&
466 		node->high < tree[node->right].low));
467 	assert(!tree_has_overlap(tree, num, node->low, node->high));
468 }
469 
470 static void
check_splay(cmap_splay * tree,unsigned int node,int depth)471 check_splay(cmap_splay *tree, unsigned int node, int depth)
472 {
473 	if (node == EMPTY)
474 		return;
475 	assert(tree[node].parent == EMPTY);
476 	walk_splay(tree, node, do_check, tree);
477 }
478 #endif
479 
480 /*
481  * Add a range.
482  */
483 static void
add_range(fz_context * ctx,pdf_cmap * cmap,unsigned int low,unsigned int high,unsigned int out,int check_for_overlap,int many)484 add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many)
485 {
486 	int current;
487 	cmap_splay *tree;
488 	int i;
489 	int inrange = 0;
490 	unsigned int k, count;
491 
492 	if (low > high)
493 	{
494 		fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
495 		return;
496 	}
497 
498 	count = high - low + 1;
499 	for (k = 0; k < count; k++) {
500 		unsigned int c = low + k;
501 
502 		inrange = 0;
503 		for (i = 0; i < cmap->codespace_len; i++) {
504 			if (cmap->codespace[i].low <= c && c <= cmap->codespace[i].high)
505 				inrange = 1;
506 		}
507 		if (!inrange)
508 		{
509 			fz_warn(ctx, "ignoring CMap range (%u-%u) that is outside of the codespace", low, high);
510 			return;
511 		}
512 	}
513 
514 	tree = cmap->tree;
515 
516 	if (cmap->tlen)
517 	{
518 		unsigned int move = cmap->ttop;
519 		unsigned int gt = EMPTY;
520 		unsigned int lt = EMPTY;
521 		if (check_for_overlap)
522 		{
523 			/* Check for collision with the current node */
524 			do
525 			{
526 				current = move;
527 				/* Cases we might meet:
528 				 * tree[i]:        <----->
529 				 * case 0:     <->
530 				 * case 1:     <------->
531 				 * case 2:     <------------->
532 				 * case 3:           <->
533 				 * case 4:           <------->
534 				 * case 5:                 <->
535 				 */
536 				if (low <= tree[current].low && tree[current].low <= high)
537 				{
538 					/* case 1, reduces to case 0 */
539 					/* or case 2, deleting the node */
540 					tree[current].out += high + 1 - tree[current].low;
541 					tree[current].low = high + 1;
542 					if (tree[current].low > tree[current].high)
543 					{
544 						/* update lt/gt references that will be moved/stale after deleting current */
545 						if (gt == (unsigned int) cmap->tlen - 1)
546 							gt = current;
547 						if (lt == (unsigned int) cmap->tlen - 1)
548 							lt = current;
549 						/* delete_node() moves the element at cmap->tlen-1 into current */
550 						move = delete_node(cmap, current);
551 						current = EMPTY;
552 						continue;
553 					}
554 				}
555 				else if (low <= tree[current].high && tree[current].high <= high)
556 				{
557 					/* case 4, reduces to case 5 */
558 					tree[current].high = low - 1;
559 					assert(tree[current].low <= tree[current].high);
560 				}
561 				else if (tree[current].low < low && high < tree[current].high)
562 				{
563 					/* case 3, reduces to case 5 */
564 					int new_high = tree[current].high;
565 					tree[current].high = low-1;
566 					add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, tree[current].many);
567 					tree = cmap->tree;
568 				}
569 				/* Now look for where to move to next (left for case 0, right for case 5) */
570 				if (tree[current].low > high) {
571 					move = tree[current].left;
572 					gt = current;
573 				}
574 				else
575 				{
576 					move = tree[current].right;
577 					lt = current;
578 				}
579 			}
580 			while (move != EMPTY);
581 		}
582 		else
583 		{
584 			do
585 			{
586 				current = move;
587 				if (tree[current].low > high)
588 				{
589 					move = tree[current].left;
590 					gt = current;
591 				}
592 				else
593 				{
594 					move = tree[current].right;
595 					lt = current;
596 				}
597 			} while (move != EMPTY);
598 		}
599 		/* current is now the node to which we would be adding the new node */
600 		/* lt is the last node we traversed which is lt the new node. */
601 		/* gt is the last node we traversed which is gt the new node. */
602 
603 		if (!many)
604 		{
605 			/* Check for the 'merge' cases. */
606 			if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low)
607 			{
608 				tree[lt].high = high;
609 				if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
610 				{
611 					tree[lt].high = tree[gt].high;
612 					delete_node(cmap, gt);
613 				}
614 				goto exit;
615 			}
616 			if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
617 			{
618 				tree[gt].low = low;
619 				tree[gt].out = out;
620 				goto exit;
621 			}
622 		}
623 	}
624 	else
625 		current = EMPTY;
626 
627 	if (cmap->tlen == cmap->tcap)
628 	{
629 		int new_cap = cmap->tcap ? cmap->tcap * 2 : 256;
630 		tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay);
631 		cmap->tcap = new_cap;
632 	}
633 	tree[cmap->tlen].low = low;
634 	tree[cmap->tlen].high = high;
635 	tree[cmap->tlen].out = out;
636 	tree[cmap->tlen].parent = current;
637 	tree[cmap->tlen].left = EMPTY;
638 	tree[cmap->tlen].right = EMPTY;
639 	tree[cmap->tlen].many = many;
640 	cmap->tlen++;
641 	if (current == EMPTY)
642 		cmap->ttop = 0;
643 	else if (tree[current].low > high)
644 		tree[current].left = cmap->tlen-1;
645 	else
646 	{
647 		assert(tree[current].high < low);
648 		tree[current].right = cmap->tlen-1;
649 	}
650 	move_to_root(tree, cmap->tlen-1);
651 	cmap->ttop = cmap->tlen-1;
652 exit:
653 	{}
654 #ifdef CHECK_SPLAY
655 	check_splay(cmap->tree, cmap->ttop, 0);
656 #endif
657 #ifdef DUMP_SPLAY
658 	dump_splay(cmap->tree, cmap->ttop, 0, "");
659 #endif
660 }
661 
662 /*
663  * Add a one-to-many mapping.
664  */
665 static void
add_mrange(fz_context * ctx,pdf_cmap * cmap,unsigned int low,int * out,int len)666 add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
667 {
668 	int out_pos;
669 
670 	if (cmap->dlen + len + 1 > cmap->dcap)
671 	{
672 		int new_cap = cmap->dcap ? cmap->dcap * 2 : 256;
673 		cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int);
674 		cmap->dcap = new_cap;
675 	}
676 	out_pos = cmap->dlen;
677 	cmap->dict[out_pos] = len;
678 	memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len);
679 	cmap->dlen += len + 1;
680 
681 	add_range(ctx, cmap, low, low, out_pos, 1, 1);
682 }
683 
684 void
pdf_map_range_to_range(fz_context * ctx,pdf_cmap * cmap,unsigned int low,unsigned int high,int out)685 pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
686 {
687 	add_range(ctx, cmap, low, high, out, 1, 0);
688 }
689 
690 void
pdf_map_one_to_many(fz_context * ctx,pdf_cmap * cmap,unsigned int low,int * values,size_t len)691 pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len)
692 {
693 	if (len == 1)
694 	{
695 		add_range(ctx, cmap, low, low, values[0], 1, 0);
696 		return;
697 	}
698 
699 	/* Decode unicode surrogate pairs. */
700 	/* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */
701 	if (len == 2 &&
702 		values[0] >= 0xD800 && values[0] <= 0xDBFF &&
703 		values[1] >= 0xDC00 && values[1] <= 0xDFFF)
704 	{
705 		int rune = ((values[0] - 0xD800) << 10) + (values[1] - 0xDC00) + 0x10000;
706 		add_range(ctx, cmap, low, low, rune, 1, 0);
707 		return;
708 	}
709 
710 	if (len > PDF_MRANGE_CAP)
711 	{
712 		fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name);
713 		return;
714 	}
715 
716 	add_mrange(ctx, cmap, low, values, (int)len);
717 }
718 
719 static void
count_node_types(cmap_splay * node,void * arg)720 count_node_types(cmap_splay *node, void *arg)
721 {
722 	int *counts = (int *)arg;
723 
724 	if (node->many)
725 		counts[2]++;
726 	else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
727 		counts[0]++;
728 	else
729 		counts[1]++;
730 }
731 
732 static void
copy_node_types(cmap_splay * node,void * arg)733 copy_node_types(cmap_splay *node, void *arg)
734 {
735 	pdf_cmap *cmap = (pdf_cmap *)arg;
736 
737 	if (node->many)
738 	{
739 		assert(node->low == node->high);
740 		cmap->mranges[cmap->mlen].low = node->low;
741 		cmap->mranges[cmap->mlen].out = node->out;
742 		cmap->mlen++;
743 	}
744 	else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
745 	{
746 		cmap->ranges[cmap->rlen].low = node->low;
747 		cmap->ranges[cmap->rlen].high = node->high;
748 		cmap->ranges[cmap->rlen].out = node->out;
749 		cmap->rlen++;
750 	}
751 	else
752 	{
753 		cmap->xranges[cmap->xlen].low = node->low;
754 		cmap->xranges[cmap->xlen].high = node->high;
755 		cmap->xranges[cmap->xlen].out = node->out;
756 		cmap->xlen++;
757 	}
758 }
759 
760 void
pdf_sort_cmap(fz_context * ctx,pdf_cmap * cmap)761 pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
762 {
763 	int counts[3];
764 
765 	if (cmap->tree == NULL)
766 		return;
767 
768 	counts[0] = 0;
769 	counts[1] = 0;
770 	counts[2] = 0;
771 	walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts);
772 
773 	cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range");
774 	cmap->rcap = counts[0];
775 	cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange");
776 	cmap->xcap = counts[1];
777 	cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange");
778 	cmap->mcap = counts[2];
779 
780 	walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap);
781 
782 	fz_free(ctx, cmap->tree);
783 	cmap->tree = NULL;
784 }
785 
786 int
pdf_lookup_cmap(pdf_cmap * cmap,unsigned int cpt)787 pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt)
788 {
789 	pdf_range *ranges = cmap->ranges;
790 	pdf_xrange *xranges = cmap->xranges;
791 	int l, r, m;
792 
793 	l = 0;
794 	r = cmap->rlen - 1;
795 	while (l <= r)
796 	{
797 		m = (l + r) >> 1;
798 		if (cpt < ranges[m].low)
799 			r = m - 1;
800 		else if (cpt > ranges[m].high)
801 			l = m + 1;
802 		else
803 			return cpt - ranges[m].low + ranges[m].out;
804 	}
805 
806 	l = 0;
807 	r = cmap->xlen - 1;
808 	while (l <= r)
809 	{
810 		m = (l + r) >> 1;
811 		if (cpt < xranges[m].low)
812 			r = m - 1;
813 		else if (cpt > xranges[m].high)
814 			l = m + 1;
815 		else
816 			return cpt - xranges[m].low + xranges[m].out;
817 	}
818 
819 	if (cmap->usecmap)
820 		return pdf_lookup_cmap(cmap->usecmap, cpt);
821 
822 	return -1;
823 }
824 
825 int
pdf_lookup_cmap_full(pdf_cmap * cmap,unsigned int cpt,int * out)826 pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
827 {
828 	pdf_range *ranges = cmap->ranges;
829 	pdf_xrange *xranges = cmap->xranges;
830 	pdf_mrange *mranges = cmap->mranges;
831 	unsigned int i;
832 	int l, r, m;
833 
834 	l = 0;
835 	r = cmap->rlen - 1;
836 	while (l <= r)
837 	{
838 		m = (l + r) >> 1;
839 		if (cpt < ranges[m].low)
840 			r = m - 1;
841 		else if (cpt > ranges[m].high)
842 			l = m + 1;
843 		else
844 		{
845 			out[0] = cpt - ranges[m].low + ranges[m].out;
846 			return 1;
847 		}
848 	}
849 
850 	l = 0;
851 	r = cmap->xlen - 1;
852 	while (l <= r)
853 	{
854 		m = (l + r) >> 1;
855 		if (cpt < xranges[m].low)
856 			r = m - 1;
857 		else if (cpt > xranges[m].high)
858 			l = m + 1;
859 		else
860 		{
861 			out[0] = cpt - xranges[m].low + xranges[m].out;
862 			return 1;
863 		}
864 	}
865 
866 	l = 0;
867 	r = cmap->mlen - 1;
868 	while (l <= r)
869 	{
870 		m = (l + r) >> 1;
871 		if (cpt < mranges[m].low)
872 			r = m - 1;
873 		else if (cpt > mranges[m].low)
874 			l = m + 1;
875 		else
876 		{
877 			int *ptr = &cmap->dict[cmap->mranges[m].out];
878 			unsigned int len = (unsigned int)*ptr++;
879 			for (i = 0; i < len; ++i)
880 				out[i] = *ptr++;
881 			return len;
882 		}
883 	}
884 
885 	if (cmap->usecmap)
886 		return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
887 
888 	return 0;
889 }
890 
891 int
pdf_decode_cmap(pdf_cmap * cmap,unsigned char * buf,unsigned char * end,unsigned int * cpt)892 pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt)
893 {
894 	unsigned int c;
895 	int k, n;
896 	int len = end - buf;
897 
898 	if (len > 4)
899 		len = 4;
900 
901 	c = 0;
902 	for (n = 0; n < len; n++)
903 	{
904 		c = (c << 8) | buf[n];
905 		for (k = 0; k < cmap->codespace_len; k++)
906 		{
907 			if (cmap->codespace[k].n == n + 1)
908 			{
909 				if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
910 				{
911 					*cpt = c;
912 					return n + 1;
913 				}
914 			}
915 		}
916 	}
917 
918 	*cpt = 0;
919 	return 1;
920 }
921 
922 size_t
pdf_cmap_size(fz_context * ctx,pdf_cmap * cmap)923 pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap)
924 {
925 	if (cmap == NULL)
926 		return 0;
927 	if (cmap->storable.refs < 0)
928 		return 0;
929 
930 	return pdf_cmap_size(ctx, cmap->usecmap) +
931 		cmap->rcap * sizeof *cmap->ranges +
932 		cmap->xcap * sizeof *cmap->xranges +
933 		cmap->mcap * sizeof *cmap->mranges +
934 		cmap->tcap * sizeof *cmap->tree +
935 		sizeof(*cmap);
936 }
937