1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2
3 Copyright (C) 2007-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4 the dvipdfmx project team.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19 */
20
21 /*
22 * A large part of codes are brought from ttfdump-0.5.5.
23 */
24
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28
29 #include "system.h"
30 #include "mem.h"
31 #include "error.h"
32
33 #include "sfnt.h"
34
35
36 /* Sorry for placing this here.
37 * We need to rewrite TrueType font support code...
38 */
39 #include "cmap.h"
40 #include "cmap_write.h"
41
42 #include "tt_aux.h"
43 #include "tt_gsub.h"
44 #include "tt_post.h"
45
46 #include "unicode.h"
47 #include "agl.h"
48 #include "pdfparse.h"
49 #include "pdfresource.h"
50 #include "otl_conf.h"
51
52 #include "dpxfile.h"
53
54 /* Hash */
55 #include "dpxutil.h"
56
57 #include "tt_cmap.h"
58
59 #define VERBOSE_LEVEL_MIN 0
60 static int verbose = 0;
61 void
otf_cmap_set_verbose(void)62 otf_cmap_set_verbose (void)
63 {
64 otl_gsub_set_verbose();
65 verbose++;
66 }
67
68 /* format 0: byte encoding table */
69 struct cmap0
70 {
71 BYTE glyphIndexArray[256];
72 };
73
74 static struct cmap0 *
read_cmap0(sfnt * sfont,ULONG len)75 read_cmap0 (sfnt *sfont, ULONG len)
76 {
77 struct cmap0 *map;
78 int i;
79
80 if (len < 256)
81 ERROR("invalid cmap subtable");
82
83 map = NEW(1, struct cmap0);
84
85 for (i = 0; i < 256; i++)
86 map->glyphIndexArray[i] = sfnt_get_byte(sfont);
87
88 return map;
89 }
90
91 static void
release_cmap0(struct cmap0 * map)92 release_cmap0(struct cmap0 *map)
93 {
94 if (map)
95 RELEASE(map);
96 }
97
98 static USHORT
lookup_cmap0(struct cmap0 * map,USHORT cc)99 lookup_cmap0 (struct cmap0 *map, USHORT cc)
100 {
101 return ((cc > 255) ? 0 : map->glyphIndexArray[cc]);
102 }
103
104 /* format 2: high-byte mapping through table */
105 struct SubHeader
106 {
107 USHORT firstCode;
108 USHORT entryCount;
109 SHORT idDelta;
110 USHORT idRangeOffset;
111 };
112
113 struct cmap2
114 {
115 USHORT subHeaderKeys[256];
116 struct SubHeader *subHeaders;
117 USHORT *glyphIndexArray;
118 };
119
120 static struct cmap2 *
read_cmap2(sfnt * sfont,ULONG len)121 read_cmap2 (sfnt *sfont, ULONG len)
122 {
123 struct cmap2 *map;
124 USHORT i, n;
125
126 if (len < 512)
127 ERROR("invalid cmap subtable");
128
129 map = NEW(1, struct cmap2);
130
131 for (i = 0; i < 256; i++)
132 map->subHeaderKeys[i] = sfnt_get_ushort(sfont);
133
134 for (n = 0, i = 0; i < 256; i++) {
135 map->subHeaderKeys[i] /= 8;
136 if (n < map->subHeaderKeys[i])
137 n = map->subHeaderKeys[i];
138 }
139 n += 1; /* the number of subHeaders is one plus the max of subHeaderKeys */
140
141 map->subHeaders = NEW(n, struct SubHeader);
142 for (i = 0; i < n; i++) {
143 map->subHeaders[i].firstCode = sfnt_get_ushort(sfont);
144 map->subHeaders[i].entryCount = sfnt_get_ushort(sfont);
145 map->subHeaders[i].idDelta = sfnt_get_short(sfont);
146 map->subHeaders[i].idRangeOffset = sfnt_get_ushort(sfont);
147
148 /* It makes things easier to let the offset starts from
149 * the beginning of glyphIndexArray.
150 */
151 if (map->subHeaders[i].idRangeOffset != 0)
152 map->subHeaders[i].idRangeOffset -= (2 + (n - i - 1) * 8);
153 }
154
155 /* Caculate the length of glyphIndexArray, this is ugly,
156 * there should be a better way to get this information.
157 */
158 n = (USHORT) (len - 518 - n * 8) / 2;
159
160 map->glyphIndexArray = NEW(n, USHORT);
161 for (i = 0; i < n; i++)
162 map->glyphIndexArray[i] = sfnt_get_ushort(sfont);
163
164 return map;
165 }
166
167 static void
release_cmap2(struct cmap2 * map)168 release_cmap2 (struct cmap2 *map)
169 {
170 if (map) {
171 if (map->subHeaders)
172 RELEASE(map->subHeaders);
173 if (map->glyphIndexArray)
174 RELEASE(map->glyphIndexArray);
175 RELEASE(map);
176 }
177 }
178
179 static USHORT
lookup_cmap2(struct cmap2 * map,USHORT cc)180 lookup_cmap2 (struct cmap2 *map, USHORT cc)
181 {
182 USHORT idx = 0;
183 SHORT idDelta;
184 USHORT firstCode, entryCount, idRangeOffset;
185 int hi, lo;
186 USHORT i;
187
188 hi = (cc >> 8) & 0xff;
189 lo = cc & 0xff;
190
191 /* select which subHeader to use */
192 i = map->subHeaderKeys[hi];
193
194 firstCode = map->subHeaders[i].firstCode;
195 entryCount = map->subHeaders[i].entryCount;
196 idDelta = map->subHeaders[i].idDelta;
197 idRangeOffset = map->subHeaders[i].idRangeOffset / 2;
198
199 if (lo >= firstCode &&
200 lo < firstCode + entryCount) {
201 idRangeOffset += lo - firstCode;
202 idx = map->glyphIndexArray[idRangeOffset];
203 if (idx != 0)
204 idx = (idx + idDelta) & 0xffff;
205 }
206
207 return idx;
208 }
209
210 /*
211 * format 4: segment mapping to delta values
212 * - Microsoft standard character to glyph index mapping table
213 */
214 struct cmap4
215 {
216 USHORT segCountX2;
217 USHORT searchRange;
218 USHORT entrySelector;
219 USHORT rangeShift;
220 USHORT *endCount;
221 USHORT reservedPad;
222 USHORT *startCount;
223 USHORT *idDelta;
224 USHORT *idRangeOffset;
225 USHORT *glyphIndexArray;
226 };
227
228 static struct cmap4 *
read_cmap4(sfnt * sfont,ULONG len)229 read_cmap4(sfnt *sfont, ULONG len)
230 {
231 struct cmap4 *map;
232 USHORT i, n, segCount;
233
234 if (len < 8)
235 ERROR("invalid cmap subtable");
236
237 map = NEW(1, struct cmap4);
238
239 map->segCountX2 = segCount = sfnt_get_ushort(sfont);
240 map->searchRange = sfnt_get_ushort(sfont);
241 map->entrySelector = sfnt_get_ushort(sfont);
242 map->rangeShift = sfnt_get_ushort(sfont);
243
244 segCount /= 2;
245
246 map->endCount = NEW(segCount, USHORT);
247 for (i = 0; i < segCount; i++)
248 map->endCount[i] = sfnt_get_ushort(sfont);
249
250 map->reservedPad = sfnt_get_ushort(sfont);
251
252 map->startCount = NEW(segCount, USHORT);
253 for (i = 0; i < segCount; i++)
254 map->startCount[i] = sfnt_get_ushort(sfont);
255
256 map->idDelta = NEW(segCount, USHORT);
257 for (i = 0; i < segCount; i++)
258 map->idDelta[i] = sfnt_get_ushort(sfont);
259
260 map->idRangeOffset = NEW(segCount, USHORT);
261 for (i = 0; i < segCount; i++)
262 map->idRangeOffset[i] = sfnt_get_ushort(sfont);
263
264 n = (len - 16 - 8 * segCount) / 2;
265 if (n == 0)
266 map->glyphIndexArray = NULL;
267 else {
268 map->glyphIndexArray = NEW(n, USHORT);
269 for (i = 0; i < n; i++)
270 map->glyphIndexArray[i] = sfnt_get_ushort(sfont);
271 }
272
273 return map;
274 }
275
276 static void
release_cmap4(struct cmap4 * map)277 release_cmap4 (struct cmap4 *map)
278 {
279 if (map) {
280 if (map->endCount) RELEASE(map->endCount);
281 if (map->startCount) RELEASE(map->startCount);
282 if (map->idDelta) RELEASE(map->idDelta);
283 if (map->idRangeOffset) RELEASE(map->idRangeOffset);
284 if (map->glyphIndexArray) RELEASE(map->glyphIndexArray);
285 RELEASE(map);
286 }
287 }
288
289 static USHORT
lookup_cmap4(struct cmap4 * map,USHORT cc)290 lookup_cmap4 (struct cmap4 *map, USHORT cc)
291 {
292 USHORT gid = 0;
293 USHORT i, j, segCount;
294
295 /*
296 * Segments are sorted in order of increasing endCode values.
297 * Last segment maps 0xffff to gid 0 (?)
298 */
299 i = segCount = map->segCountX2 / 2;
300 while (i-- > 0 && cc <= map->endCount[i]) {
301 if (cc >= map->startCount[i]) {
302 if (map->idRangeOffset[i] == 0) {
303 gid = (cc + map->idDelta[i]) & 0xffff;
304 } else if (cc == 0xffff && map->idRangeOffset[i] == 0xffff) {
305 /* this is for protection against some old broken fonts... */
306 gid = 0;
307 } else {
308 j = map->idRangeOffset[i] - (segCount - i) * 2;
309 j = (cc - map->startCount[i]) + (j / 2);
310 gid = map->glyphIndexArray[j];
311 if (gid != 0)
312 gid = (gid + map->idDelta[i]) & 0xffff;
313 }
314 break;
315 }
316 }
317
318 return gid;
319 }
320
321 /* format 6: trimmed table mapping */
322 struct cmap6
323 {
324 USHORT firstCode;
325 USHORT entryCount;
326 USHORT *glyphIndexArray;
327 };
328
329 static struct cmap6 *
read_cmap6(sfnt * sfont,ULONG len)330 read_cmap6 (sfnt *sfont, ULONG len)
331 {
332 struct cmap6 *map;
333 USHORT i;
334
335 if (len < 4)
336 ERROR("invalid cmap subtable");
337
338 map = NEW(1, struct cmap6);
339 map->firstCode = sfnt_get_ushort(sfont);
340 map->entryCount = sfnt_get_ushort(sfont);
341 map->glyphIndexArray = NEW(map->entryCount, USHORT);
342
343 for (i = 0; i < map->entryCount; i++)
344 map->glyphIndexArray[i] = sfnt_get_ushort(sfont);
345
346 return map;
347 }
348
349 static void
release_cmap6(struct cmap6 * map)350 release_cmap6 (struct cmap6 *map)
351 {
352 if (map) {
353 if (map->glyphIndexArray)
354 RELEASE(map->glyphIndexArray);
355 RELEASE(map);
356 }
357 }
358
359 static USHORT
lookup_cmap6(struct cmap6 * map,USHORT cc)360 lookup_cmap6 (struct cmap6 *map, USHORT cc)
361 {
362 USHORT idx;
363
364 idx = cc - map->firstCode;
365 if (idx < map->entryCount)
366 return map->glyphIndexArray[idx];
367 return 0;
368 }
369
370 /* Format 8 and 10 not supported...
371 *
372 * format 8: mixed 16-bit and 32-bit coverage
373 * format 10: trimmed array
374 */
375
376 /*
377 * format 12: segmented coverage
378 *
379 * startGlyphID is 32-bit long, however, GlyphID is still 16-bit long !
380 */
381
382 struct charGroup
383 {
384 ULONG startCharCode;
385 ULONG endCharCode;
386 ULONG startGlyphID;
387 };
388
389 struct cmap12
390 {
391 ULONG nGroups;
392 struct charGroup *groups;
393 };
394
395 /* ULONG length */
396 static struct cmap12 *
read_cmap12(sfnt * sfont,ULONG len)397 read_cmap12 (sfnt *sfont, ULONG len)
398 {
399 struct cmap12 *map;
400 ULONG i;
401
402 if (len < 4)
403 ERROR("invalid cmap subtable");
404
405 map = NEW(1, struct cmap12);
406 map->nGroups = sfnt_get_ulong(sfont);
407 map->groups = NEW(map->nGroups, struct charGroup);
408
409 for (i = 0; i < map->nGroups; i++) {
410 map->groups[i].startCharCode = sfnt_get_ulong(sfont);
411 map->groups[i].endCharCode = sfnt_get_ulong(sfont);
412 map->groups[i].startGlyphID = sfnt_get_ulong(sfont);
413 }
414
415 return map;
416 }
417
418 static void
release_cmap12(struct cmap12 * map)419 release_cmap12 (struct cmap12 *map)
420 {
421 if (map) {
422 if (map->groups)
423 RELEASE(map->groups);
424 RELEASE(map);
425 }
426 }
427
428 static USHORT
lookup_cmap12(struct cmap12 * map,ULONG cccc)429 lookup_cmap12 (struct cmap12 *map, ULONG cccc)
430 {
431 USHORT gid = 0;
432 int i;
433
434 i = map->nGroups;
435 while (i-- >= 0 &&
436 cccc <= map->groups[i].endCharCode) {
437 if (cccc >= map->groups[i].startCharCode) {
438 gid = (USHORT) ((cccc -
439 map->groups[i].startCharCode +
440 map->groups[i].startGlyphID) & 0xffff);
441 break;
442 }
443 }
444
445 return gid;
446 }
447
448 /* read cmap */
449 tt_cmap *
tt_cmap_read(sfnt * sfont,USHORT platform,USHORT encoding)450 tt_cmap_read (sfnt *sfont, USHORT platform, USHORT encoding)
451 {
452 tt_cmap *cmap = NULL;
453 ULONG offset, length = 0;
454 USHORT p_id, e_id;
455 USHORT i, n_subtabs;
456
457 ASSERT(sfont);
458
459 offset = sfnt_locate_table(sfont, "cmap");
460 (void) sfnt_get_ushort(sfont);
461 n_subtabs = sfnt_get_ushort(sfont);
462
463 for (i = 0; i < n_subtabs; i++) {
464 p_id = sfnt_get_ushort(sfont);
465 e_id = sfnt_get_ushort(sfont);
466 if (p_id != platform || e_id != encoding)
467 sfnt_get_ulong(sfont);
468 else {
469 offset += sfnt_get_ulong(sfont);
470 break;
471 }
472 }
473
474 if (i == n_subtabs)
475 return NULL;
476
477 cmap = NEW(1, tt_cmap);
478 cmap->map = NULL;
479 cmap->platform = platform;
480 cmap->encoding = encoding;
481
482 sfnt_seek_set(sfont, offset);
483 cmap->format = sfnt_get_ushort(sfont);
484 /* Length and version (language) is ULONG for
485 * format 8, 10, 12 !
486 */
487 if (cmap->format <= 6) {
488 length = sfnt_get_ushort(sfont);
489 cmap->language = sfnt_get_ushort(sfont); /* language (Mac) */
490 } else {
491 if (sfnt_get_ushort(sfont) != 0) { /* reverved - 0 */
492 WARN("Unrecognized cmap subtable format.");
493 tt_cmap_release(cmap);
494 return NULL;
495 } else {
496 length = sfnt_get_ulong(sfont);
497 cmap->language = sfnt_get_ulong(sfont);
498 }
499 }
500
501 switch(cmap->format) {
502 case 0:
503 cmap->map = read_cmap0(sfont, length);
504 break;
505 case 2:
506 cmap->map = read_cmap2(sfont, length);
507 break;
508 case 4:
509 cmap->map = read_cmap4(sfont, length);
510 break;
511 case 6:
512 cmap->map = read_cmap6(sfont, length);
513 break;
514 case 12:
515 /* WARN("UCS-4 TrueType cmap table..."); */
516 cmap->map = read_cmap12(sfont, length);
517 break;
518 default:
519 WARN("Unrecognized OpenType/TrueType cmap format.");
520 tt_cmap_release(cmap);
521 return NULL;
522 }
523
524 if (!cmap->map) {
525 tt_cmap_release(cmap);
526 cmap = NULL;
527 }
528
529 return cmap;
530 }
531
532 void
tt_cmap_release(tt_cmap * cmap)533 tt_cmap_release (tt_cmap *cmap)
534 {
535
536 if (cmap) {
537 if (cmap->map) {
538 switch(cmap->format) {
539 case 0:
540 release_cmap0(cmap->map);
541 break;
542 case 2:
543 release_cmap2(cmap->map);
544 break;
545 case 4:
546 release_cmap4(cmap->map);
547 break;
548 case 6:
549 release_cmap6(cmap->map);
550 break;
551 case 12:
552 release_cmap12(cmap->map);
553 break;
554 default:
555 ERROR("Unrecognized OpenType/TrueType cmap format.");
556 }
557 }
558 RELEASE(cmap);
559 }
560
561 return;
562 }
563
564
565 USHORT
tt_cmap_lookup(tt_cmap * cmap,long cc)566 tt_cmap_lookup (tt_cmap *cmap, long cc)
567 {
568 USHORT gid = 0;
569
570 ASSERT(cmap);
571
572 if (cc > 0xffffL && cmap->format < 12) {
573 WARN("Four bytes charcode not supported in OpenType/TrueType cmap format 0...6.");
574 return 0;
575 }
576
577 switch (cmap->format) {
578 case 0:
579 gid = lookup_cmap0(cmap->map, (USHORT) cc);
580 break;
581 case 2:
582 gid = lookup_cmap2(cmap->map, (USHORT) cc);
583 break;
584 case 4:
585 gid = lookup_cmap4(cmap->map, (USHORT) cc);
586 break;
587 case 6:
588 gid = lookup_cmap6(cmap->map, (USHORT) cc);
589 break;
590 case 12:
591 gid = lookup_cmap12(cmap->map, (ULONG) cc);
592 break;
593 default:
594 ERROR("Unrecognized OpenType/TrueType cmap subtable format");
595 break;
596 }
597
598 return gid;
599 }
600
601 /* Sorry for placing this here.
602 * We need to rewrite TrueType font support code...
603 */
604
605 #define WBUF_SIZE 1024
606 static unsigned char wbuf[WBUF_SIZE];
607
608 static unsigned char srange_min[2] = {0x00, 0x00};
609 static unsigned char srange_max[2] = {0xff, 0xff};
610 static unsigned char lrange_min[4] = {0x00, 0x00, 0x00, 0x00};
611 static unsigned char lrange_max[4] = {0x7f, 0xff, 0xff, 0xff};
612
613 static void
load_cmap4(struct cmap4 * map,unsigned char * GIDToCIDMap,CMap * cmap)614 load_cmap4 (struct cmap4 *map,
615 unsigned char *GIDToCIDMap, CMap *cmap)
616 {
617 USHORT c0, c1, gid, cid;
618 USHORT j, d, segCount;
619 USHORT ch;
620 long i;
621
622 segCount = map->segCountX2 / 2;
623 for (i = segCount - 1; i >= 0 ; i--) {
624 c0 = map->startCount[i];
625 c1 = map->endCount[i];
626 d = map->idRangeOffset[i] / 2 - (segCount - i);
627 for (j = 0; j <= c1 - c0; j++) {
628 ch = c0 + j;
629 if (map->idRangeOffset[i] == 0) {
630 gid = (ch + map->idDelta[i]) & 0xffff;
631 } else if (c0 == 0xffff && c1 == 0xffff && map->idRangeOffset[i] == 0xffff) {
632 /* this is for protection against some old broken fonts... */
633 gid = 0;
634 } else {
635 gid = (map->glyphIndexArray[j+d] +
636 map->idDelta[i]) & 0xffff;
637 }
638 if (gid != 0 && gid != 0xffff) {
639 if (GIDToCIDMap) {
640 cid = ((GIDToCIDMap[2*gid] << 8)|GIDToCIDMap[2*gid+1]);
641 if (cid == 0)
642 WARN("GID %u does not have corresponding CID %u.",
643 gid, cid);
644 } else {
645 cid = gid;
646 }
647 wbuf[0] = 0;
648 wbuf[1] = 0;
649 wbuf[2] = (ch >> 8) & 0xff;
650 wbuf[3] = ch & 0xff;
651 CMap_add_cidchar(cmap, wbuf, 4, cid);
652 }
653 }
654 }
655
656 return;
657 }
658
659 static void
load_cmap12(struct cmap12 * map,unsigned char * GIDToCIDMap,CMap * cmap)660 load_cmap12 (struct cmap12 *map,
661 unsigned char *GIDToCIDMap, CMap *cmap)
662 {
663 ULONG i, ch; /* LONG ? */
664 USHORT gid, cid;
665
666 for (i = 0; i < map->nGroups; i++) {
667 for (ch = map->groups[i].startCharCode;
668 ch <= map->groups[i].endCharCode;
669 ch++) {
670 long d = ch - map->groups[i].startCharCode;
671 gid = (USHORT) ((map->groups[i].startGlyphID + d) & 0xffff);
672 if (GIDToCIDMap) {
673 cid = ((GIDToCIDMap[2*gid] << 8)|GIDToCIDMap[2*gid+1]);
674 if (cid == 0)
675 WARN("GID %u does not have corresponding CID %u.", gid, cid);
676 } else {
677 cid = gid;
678 }
679 wbuf[0] = (ch >> 24) & 0xff;
680 wbuf[1] = (ch >> 16) & 0xff;
681 wbuf[2] = (ch >> 8) & 0xff;
682 wbuf[3] = ch & 0xff;
683 CMap_add_cidchar(cmap, wbuf, 4, cid);
684 }
685 }
686
687 return;
688 }
689
690 /* OpenType CIDFont:
691 *
692 * We don't use GID for them. OpenType cmap table is for
693 * charcode to GID mapping rather than to-CID mapping.
694 */
695 #include "cid.h"
696
697 #include "tt_table.h"
698 #include "cff_types.h"
699 #include "cff_dict.h"
700 #include "cff.h"
701
702 static int
handle_CIDFont(sfnt * sfont,unsigned char ** GIDToCIDMap,CIDSysInfo * csi)703 handle_CIDFont (sfnt *sfont,
704 unsigned char **GIDToCIDMap, CIDSysInfo *csi)
705 {
706 cff_font *cffont;
707 long offset, i;
708 card16 num_glyphs, gid;
709 cff_charsets *charset;
710 unsigned char *map;
711 struct tt_maxp_table *maxp;
712
713 ASSERT(csi);
714
715 offset = sfnt_find_table_pos(sfont, "CFF ");
716 if (offset == 0) {
717 csi->registry = NULL;
718 csi->ordering = NULL;
719 *GIDToCIDMap = NULL;
720 return 0;
721 }
722
723 maxp = tt_read_maxp_table(sfont);
724 num_glyphs = (card16) maxp->numGlyphs;
725 RELEASE(maxp);
726 if (num_glyphs < 1)
727 ERROR("No glyph contained in this font...");
728
729 cffont = cff_open(sfont->stream, offset, 0);
730 if (!cffont)
731 ERROR("Could not open CFF font...");
732
733
734 if (!(cffont->flag & FONTTYPE_CIDFONT)) {
735 cff_close(cffont);
736 csi->registry = NULL;
737 csi->ordering = NULL;
738 *GIDToCIDMap = NULL;
739 return 0;
740 }
741
742 if (!cff_dict_known(cffont->topdict, "ROS")) {
743 ERROR("No CIDSystemInfo???");
744 } else {
745 card16 reg, ord;
746
747 reg = (card16) cff_dict_get(cffont->topdict, "ROS", 0);
748 ord = (card16) cff_dict_get(cffont->topdict, "ROS", 1);
749
750 csi->registry = cff_get_string(cffont, reg);
751 csi->ordering = cff_get_string(cffont, ord);
752 csi->supplement = (int) cff_dict_get(cffont->topdict, "ROS", 2);
753 }
754
755 cff_read_charsets(cffont);
756 charset = cffont->charsets;
757 if (!charset) {
758 ERROR("No CFF charset data???");
759 }
760
761 map = NEW(num_glyphs * 2, unsigned char);
762 memset(map, 0, num_glyphs * 2);
763 switch (charset->format) {
764 case 0:
765 {
766 s_SID *cids; /* CID... */
767
768 cids = charset->data.glyphs;
769 for (gid = 1, i = 0;
770 i < charset->num_entries; i++) {
771 map[2*gid ] = (cids[i] >> 8) & 0xff;
772 map[2*gid+1] = cids[i] & 0xff;
773 gid++;
774 }
775 }
776 break;
777 case 1:
778 {
779 cff_range1 *ranges;
780 card16 cid, count;
781
782 ranges = charset->data.range1;
783 for (gid = 1, i = 0;
784 i < charset->num_entries; i++) {
785 cid = ranges[i].first;
786 count = ranges[i].n_left + 1; /* card8 */
787 while (count-- > 0 &&
788 gid <= num_glyphs) {
789 map[2*gid ] = (cid >> 8) & 0xff;
790 map[2*gid + 1] = cid & 0xff;
791 gid++; cid++;
792 }
793 }
794 }
795 break;
796 case 2:
797 {
798 cff_range2 *ranges;
799 card16 cid, count;
800
801 ranges = charset->data.range2;
802 if (charset->num_entries == 1 &&
803 ranges[0].first == 1) {
804 /* "Complete" CIDFont */
805 RELEASE(map); map = NULL;
806 } else {
807 /* Not trivial mapping */
808 for (gid = 1, i = 0;
809 i < charset->num_entries; i++) {
810 cid = ranges[i].first;
811 count = ranges[i].n_left + 1;
812 while (count-- > 0 &&
813 gid <= num_glyphs) {
814 map[gid] = (cid >> 8) & 0xff;
815 map[gid] = cid & 0xff;
816 gid++; cid++;
817 }
818 }
819
820 }
821 }
822 break;
823 default:
824 RELEASE(map); map = NULL;
825 ERROR("Unknown CFF charset format...: %d", charset->format);
826 break;
827 }
828 cff_close(cffont);
829
830 *GIDToCIDMap = map;
831 return 1;
832 }
833
is_PUA_or_presentation(unsigned int uni)834 static int is_PUA_or_presentation (unsigned int uni)
835 {
836 return ((uni >= 0xE000 && uni <= 0xF8FF) || (uni >= 0xFB00 && uni <= 0xFB4F) ||
837 (uni >= 0xF0000 && uni <= 0xFFFFD) || (uni >= 0x100000 && uni <= 0x10FFFD));
838 }
839
840 static char*
sfnt_get_glyphname(struct tt_post_table * post,cff_font * cffont,USHORT gid)841 sfnt_get_glyphname(struct tt_post_table *post, cff_font *cffont, USHORT gid)
842 {
843 char* name = NULL;
844
845 if (post)
846 name = tt_get_glyphname(post, gid);
847
848 if (!name && cffont)
849 name = cff_get_glyphname(cffont, gid);
850
851 return name;
852 }
853
854 /*
855 * Substituted glyphs:
856 *
857 * Mapping information stored in cmap_add.
858 */
859 #ifndef is_used_char2
860 #define is_used_char2(b,c) (((b)[(c)/8]) & (1 << (7-((c)%8))))
861 #endif
862
863 static USHORT
handle_subst_glyphs(CMap * cmap,CMap * cmap_add,const char * used_glyphs,sfnt * sfont,cff_font * cffont)864 handle_subst_glyphs (CMap *cmap,
865 CMap *cmap_add,
866 const char *used_glyphs,
867 sfnt *sfont,
868 cff_font *cffont)
869 {
870 USHORT count;
871 USHORT i;
872 struct tt_post_table *post = NULL;
873
874 if (!cmap_add)
875 post = tt_read_post_table(sfont);
876
877 for (count = 0, i = 0; i < 8192; i++) {
878 int j;
879 long len, inbytesleft, outbytesleft;
880 const unsigned char *inbuf;
881 unsigned char *outbuf;
882
883 if (used_glyphs[i] == 0)
884 continue;
885
886 for (j = 0; j < 8; j++) {
887 USHORT gid = 8 * i + j;
888
889 if (!is_used_char2(used_glyphs, gid))
890 continue;
891
892 if (!cmap_add) {
893 #define MAX_UNICODES 16
894 /* try to look up Unicode values from the glyph name... */
895 char* name;
896 long unicodes[MAX_UNICODES];
897 int unicode_count = -1;
898 name = sfnt_get_glyphname(post, cffont, gid);
899 if (name) {
900 unicode_count = agl_get_unicodes(name, unicodes, MAX_UNICODES);
901 }
902 #undef MAX_UNICODES
903 if (unicode_count == -1) {
904 if (name)
905 MESG("No Unicode mapping available: GID=%u, name=%s\n", gid, name);
906 else
907 MESG("No Unicode mapping available: GID=%u\n", gid);
908 } else {
909 /* the Unicode characters go into wbuf[2] and following, in UTF16BE */
910 /* we rely on WBUF_SIZE being more than adequate for MAX_UNICODES */
911 unsigned char* p = wbuf + 2;
912 int k;
913 len = 0;
914 for (k = 0; k < unicode_count; ++k) {
915 len += UC_sput_UTF16BE(unicodes[k], &p, wbuf+WBUF_SIZE);
916 }
917 wbuf[0] = (gid >> 8) & 0xff;
918 wbuf[1] = gid & 0xff;
919 CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len);
920 }
921 RELEASE(name);
922 } else {
923 wbuf[0] = (gid >> 8) & 0xff;
924 wbuf[1] = gid & 0xff;
925
926 inbuf = wbuf;
927 inbytesleft = 2;
928 outbuf = wbuf + 2;
929 outbytesleft = WBUF_SIZE - 2;
930 CMap_decode(cmap_add, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
931
932 if (inbytesleft != 0) {
933 WARN("CMap conversion failed...");
934 } else {
935 len = WBUF_SIZE - 2 - outbytesleft;
936 CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len);
937 count++;
938
939 if (verbose > VERBOSE_LEVEL_MIN) {
940 long _i;
941
942 MESG("otf_cmap>> Additional ToUnicode mapping: <%04X> <", gid);
943 for (_i = 0; _i < len; _i++) {
944 MESG("%02X", wbuf[2 + _i]);
945 }
946 MESG(">\n");
947 }
948 }
949 }
950 }
951 }
952
953 if (post)
954 tt_release_post_table(post);
955
956 return count;
957 }
958
959 static cff_font *
prepare_CIDFont_from_sfnt(sfnt * sfont)960 prepare_CIDFont_from_sfnt(sfnt* sfont)
961 {
962 cff_font *cffont;
963 unsigned long offset = 0;
964
965 if (sfont->type != SFNT_TYPE_POSTSCRIPT ||
966 sfnt_read_table_directory(sfont, 0) < 0 ||
967 (offset = sfnt_find_table_pos(sfont, "CFF ")) == 0) {
968 return NULL;
969 }
970
971 cffont = cff_open(sfont->stream, offset, 0);
972 if (!cffont)
973 return NULL;
974
975 cff_read_charsets(cffont);
976 return cffont;
977 }
978
979 static USHORT
add_to_cmap_if_used(CMap * cmap,cff_font * cffont,char * used_chars,USHORT gid,ULONG ch)980 add_to_cmap_if_used (CMap *cmap,
981 cff_font *cffont,
982 char *used_chars,
983 USHORT gid,
984 ULONG ch)
985 {
986 USHORT count = 0;
987 USHORT cid = cffont ? cff_charsets_lookup_inverse(cffont, gid) : gid;
988 if (is_used_char2(used_chars, cid)) {
989 int len;
990 unsigned char *p = wbuf + 2;
991
992 count++;
993
994 wbuf[0] = (cid >> 8) & 0xff;
995 wbuf[1] = (cid & 0xff);
996 len = UC_sput_UTF16BE((long) ch, &p, wbuf + WBUF_SIZE);
997 CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len);
998
999 /* Skip PUA characters and alphabetic presentation forms, allowing
1000 * handle_subst_glyphs() as it might find better mapping. Fixes the
1001 * mapping of ligatures encoded in PUA in fonts like Linux Libertine
1002 * and old Adobe fonts.
1003 */
1004 if (!is_PUA_or_presentation(ch)) {
1005 /* Avoid duplicate entry
1006 * There are problem when two Unicode code is mapped to
1007 * single glyph...
1008 */
1009 used_chars[cid / 8] &= ~(1 << (7 - (cid % 8)));
1010 }
1011 }
1012
1013 return count;
1014 }
1015
1016 static USHORT
create_ToUnicode_cmap4(CMap * cmap,struct cmap4 * map,char * used_chars,cff_font * cffont)1017 create_ToUnicode_cmap4 (CMap *cmap,
1018 struct cmap4 *map,
1019 char *used_chars,
1020 cff_font *cffont)
1021 {
1022 USHORT count = 0, segCount = map->segCountX2 / 2;
1023 USHORT i, j;
1024
1025 for (i = 0; i < segCount; i++) {
1026 USHORT c0 = map->startCount[i];
1027 USHORT c1 = map->endCount[i];
1028 USHORT d = map->idRangeOffset[i] / 2 - (segCount - i);
1029 for (j = 0; j <= c1 - c0; j++) {
1030 USHORT ch = c0 + j;
1031 USHORT gid;
1032
1033 if (map->idRangeOffset[i] == 0) {
1034 gid = (ch + map->idDelta[i]) & 0xffff;
1035 } else if (c0 == 0xffff && c1 == 0xffff && map->idRangeOffset[i] == 0xffff) {
1036 /* this is for protection against some old broken fonts... */
1037 gid = 0;
1038 } else {
1039 gid = (map->glyphIndexArray[j + d] + map->idDelta[i]) & 0xffff;
1040 }
1041
1042 count += add_to_cmap_if_used(cmap, cffont, used_chars, gid, ch);
1043 }
1044 }
1045
1046 return count;
1047 }
1048
1049 static USHORT
create_ToUnicode_cmap12(CMap * cmap,struct cmap12 * map,char * used_chars,cff_font * cffont)1050 create_ToUnicode_cmap12 (CMap *cmap,
1051 struct cmap12 *map,
1052 char *used_chars,
1053 cff_font *cffont)
1054 {
1055 ULONG i, ch, count = 0;
1056
1057 for (i = 0; i < map->nGroups; i++) {
1058 for (ch = map->groups[i].startCharCode;
1059 ch <= map->groups[i].endCharCode; ch++) {
1060 long d = ch - map->groups[i].startCharCode;
1061 USHORT gid = (USHORT) ((map->groups[i].startGlyphID + d) & 0xffff);
1062 count += add_to_cmap_if_used(cmap, cffont, used_chars, gid, ch);
1063 }
1064 }
1065
1066 return count;
1067 }
1068
1069 static pdf_obj *
create_ToUnicode_cmap(tt_cmap * ttcmap,const char * cmap_name,CMap * cmap_add,const char * used_chars,sfnt * sfont,CMap * code_to_cid_cmap)1070 create_ToUnicode_cmap (tt_cmap *ttcmap,
1071 const char *cmap_name,
1072 CMap *cmap_add,
1073 const char *used_chars,
1074 sfnt *sfont,
1075 CMap *code_to_cid_cmap)
1076 {
1077 pdf_obj *stream = NULL;
1078 CMap *cmap;
1079 USHORT count = 0;
1080 cff_font *cffont = prepare_CIDFont_from_sfnt(sfont);
1081 char is_cidfont = cffont && (cffont->flag & FONTTYPE_CIDFONT);
1082
1083 cmap = CMap_new();
1084 CMap_set_name (cmap, cmap_name);
1085 CMap_set_wmode(cmap, 0);
1086 CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE);
1087 CMap_set_CIDSysInfo(cmap, &CSI_UNICODE);
1088 CMap_add_codespacerange(cmap, srange_min, srange_max, 2);
1089
1090 if (code_to_cid_cmap && cffont && is_cidfont) {
1091 USHORT i;
1092 for (i = 0; i < 8192; i++) {
1093 int j;
1094
1095 if (used_chars[i] == 0)
1096 continue;
1097
1098 for (j = 0; j < 8; j++) {
1099 USHORT cid = 8 * i + j;
1100 int ch;
1101
1102 if (!is_used_char2(used_chars, cid))
1103 continue;
1104
1105 ch = CMap_reverse_decode(code_to_cid_cmap, cid);
1106 if (ch >= 0) {
1107 long len;
1108 unsigned char *p = wbuf + 2;
1109 wbuf[0] = (cid >> 8) & 0xff;
1110 wbuf[1] = cid & 0xff;
1111 len = UC_sput_UTF16BE((long)ch, &p, wbuf + WBUF_SIZE);
1112 CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len);
1113 count++;
1114 }
1115 }
1116 }
1117 } else {
1118 char used_chars_copy[8192];
1119 memcpy(used_chars_copy, used_chars, 8192);
1120
1121 /* For create_ToUnicode_cmap{4,12}(), cffont is for GID -> CID lookup,
1122 * so it is only needed for CID fonts. */
1123 switch (ttcmap->format) {
1124 case 4:
1125 count = create_ToUnicode_cmap4(cmap, ttcmap->map, used_chars_copy,
1126 is_cidfont ? cffont : NULL);
1127 break;
1128 case 12:
1129 count = create_ToUnicode_cmap12(cmap, ttcmap->map, used_chars_copy,
1130 is_cidfont ? cffont : NULL);
1131 break;
1132 }
1133
1134 /* For handle_subst_glyphs(), cffont is for GID -> glyph name lookup, so
1135 * it is only needed for non-CID fonts. */
1136 count += handle_subst_glyphs(cmap, cmap_add, used_chars_copy, sfont,
1137 is_cidfont ? NULL : cffont);
1138 }
1139
1140 if (count < 1)
1141 stream = NULL;
1142 else {
1143 stream = CMap_create_stream(cmap);
1144 }
1145 CMap_release(cmap);
1146
1147 if (cffont)
1148 cff_close(cffont);
1149
1150 return stream;
1151 }
1152
1153 typedef struct {
1154 short platform;
1155 short encoding;
1156 } cmap_plat_enc_rec;
1157
1158 static cmap_plat_enc_rec cmap_plat_encs[] = {
1159 { 3, 10 },
1160 { 0, 3 },
1161 { 0, 0 },
1162 { 3, 1 },
1163 { 0, 1 }
1164 };
1165
1166 pdf_obj *
otf_create_ToUnicode_stream(const char * font_name,int ttc_index,const char * used_chars,int cmap_id)1167 otf_create_ToUnicode_stream (const char *font_name,
1168 int ttc_index, /* 0 for non-TTC */
1169 const char *used_chars,
1170 int cmap_id)
1171 {
1172 pdf_obj *cmap_ref = NULL;
1173 long res_id;
1174 pdf_obj *cmap_obj = NULL;
1175 CMap *cmap_add, *code_to_cid_cmap;
1176 int cmap_add_id;
1177 tt_cmap *ttcmap;
1178 char *normalized_font_name;
1179 char *cmap_name;
1180 FILE *fp = NULL;
1181 sfnt *sfont;
1182 long offset = 0;
1183 int i, cmap_type;
1184
1185 /* replace slash in map name with dash to make the output cmap name valid,
1186 * happens when XeTeX embeds full font path
1187 * https://sourceforge.net/p/xetex/bugs/52/
1188 */
1189 normalized_font_name = NEW(strlen(font_name)+1, char);
1190 strcpy(normalized_font_name, font_name);
1191 for (i = 0; i < strlen(font_name); ++i) {
1192 if (normalized_font_name[i] == '/')
1193 normalized_font_name[i] = '-';
1194 }
1195
1196 cmap_name = NEW(strlen(font_name)+strlen("-UTF16")+5, char);
1197 sprintf(cmap_name, "%s,%03d-UTF16", normalized_font_name, ttc_index);
1198
1199 res_id = pdf_findresource("CMap", cmap_name);
1200 if (res_id >= 0) {
1201 RELEASE(cmap_name);
1202 cmap_ref = pdf_get_resource_reference(res_id);
1203 return cmap_ref;
1204 }
1205
1206 if (verbose > VERBOSE_LEVEL_MIN) {
1207 MESG("\n");
1208 MESG("otf_cmap>> Creating ToUnicode CMap for \"%s\"...\n", font_name);
1209 }
1210
1211
1212 if ((fp = DPXFOPEN(font_name, DPX_RES_TYPE_TTFONT)) ||
1213 (fp = DPXFOPEN(font_name, DPX_RES_TYPE_OTFONT))) {
1214 sfont = sfnt_open(fp);
1215 } else if ((fp = DPXFOPEN(font_name, DPX_RES_TYPE_DFONT))) {
1216 sfont = dfont_open(fp, ttc_index);
1217 } else {
1218 RELEASE(cmap_name);
1219 return NULL;
1220 }
1221
1222 if (!sfont) {
1223 ERROR("Could not open OpenType/TrueType font file \"%s\"", font_name);
1224 }
1225
1226 switch (sfont->type) {
1227 case SFNT_TYPE_DFONT:
1228 offset = sfont->offset;
1229 break;
1230 case SFNT_TYPE_TTC:
1231 offset = ttc_read_offset(sfont, ttc_index);
1232 if (offset == 0) {
1233 ERROR("Invalid TTC index");
1234 }
1235 break;
1236 default:
1237 offset = 0;
1238 break;
1239 }
1240
1241 if (sfnt_read_table_directory(sfont, offset) < 0) {
1242 ERROR("Could not read OpenType/TrueType table directory.");
1243 }
1244
1245 code_to_cid_cmap = CMap_cache_get(cmap_id);
1246 cmap_type = CMap_get_type(code_to_cid_cmap);
1247 if (cmap_type != CMAP_TYPE_CODE_TO_CID)
1248 code_to_cid_cmap = NULL;
1249
1250 cmap_add_id = CMap_cache_find(cmap_name);
1251 if (cmap_add_id < 0) {
1252 cmap_add = NULL;
1253 } else {
1254 cmap_add = CMap_cache_get(cmap_add_id);
1255 }
1256
1257 CMap_set_silent(1); /* many warnings without this... */
1258 for (i = 0; i < sizeof(cmap_plat_encs) / sizeof(cmap_plat_enc_rec); ++i) {
1259 ttcmap = tt_cmap_read(sfont, cmap_plat_encs[i].platform, cmap_plat_encs[i].encoding);
1260 if (!ttcmap)
1261 continue;
1262
1263 if (ttcmap->format == 4 || ttcmap->format == 12) {
1264 cmap_obj = create_ToUnicode_cmap(ttcmap, cmap_name, cmap_add, used_chars,
1265 sfont, code_to_cid_cmap);
1266 break;
1267 }
1268 }
1269 if (cmap_obj == NULL)
1270 WARN("Unable to read OpenType/TrueType Unicode cmap table.");
1271 tt_cmap_release(ttcmap);
1272 CMap_set_silent(0);
1273
1274 if (cmap_obj) {
1275 res_id = pdf_defineresource("CMap", cmap_name,
1276 cmap_obj, PDF_RES_FLUSH_IMMEDIATE);
1277 cmap_ref = pdf_get_resource_reference(res_id);
1278 } else {
1279 cmap_ref = NULL;
1280 }
1281 RELEASE(cmap_name);
1282
1283 sfnt_close(sfont);
1284 if (fp)
1285 DPXFCLOSE(fp);
1286
1287 return cmap_ref;
1288 }
1289
1290 /* Must be smaller than (WBUF_SIZE-2)/8 */
1291 #define MAX_UNICODES 16
1292
1293 struct gent
1294 {
1295 USHORT gid;
1296 long ucv; /* assigned PUA unicode */
1297
1298 int num_unicodes;
1299 long unicodes[MAX_UNICODES];
1300 };
1301
1302 static void
create_cmaps(CMap * cmap,CMap * tounicode,struct ht_table * unencoded,unsigned char * GIDToCIDMap)1303 create_cmaps (CMap *cmap, CMap *tounicode,
1304 struct ht_table *unencoded, unsigned char *GIDToCIDMap)
1305 {
1306 struct ht_iter iter;
1307
1308 ASSERT(cmap && unencoded);
1309
1310 if (ht_set_iter(unencoded, &iter) < 0)
1311 return;
1312
1313 CMap_set_silent(1); /* many warnings without this... */
1314
1315 do {
1316 struct gent *glyph;
1317 unsigned char *ucv;
1318 int i, len;
1319 unsigned char *p, *endptr;
1320 CID cid;
1321
1322 glyph = (struct gent *) ht_iter_getval(&iter);
1323 ucv = (unsigned char *) ht_iter_getkey(&iter, &len);
1324
1325 if (GIDToCIDMap) {
1326 cid = ((GIDToCIDMap[2 * glyph->gid] << 8)|GIDToCIDMap[2 * glyph->gid + 1]);
1327 if (cid == 0)
1328 WARN("Glyph gid=%u does not have corresponding CID.", glyph->gid);
1329 } else {
1330 cid = glyph->gid;
1331 }
1332
1333 CMap_add_cidchar(cmap, ucv, 4, cid);
1334
1335 if (tounicode) {
1336 wbuf[0] = (cid >> 8) & 0xff;
1337 wbuf[1] = cid & 0xff;
1338 p = wbuf + 2;
1339 endptr = wbuf + WBUF_SIZE;
1340 len = 0;
1341 for (i = 0; i < glyph->num_unicodes; i++) {
1342 len += UC_sput_UTF16BE(glyph->unicodes[i], &p, endptr);
1343 }
1344 CMap_add_bfchar(tounicode, wbuf, 2, wbuf + 2, len);
1345 }
1346 } while (ht_iter_next(&iter) >= 0);
1347
1348 CMap_set_silent(0);
1349
1350 ht_clear_iter(&iter);
1351 }
1352
1353 static void
add_glyph(struct ht_table * unencoded,USHORT gid,long ucv,int num_unicodes,long * unicodes)1354 add_glyph (struct ht_table *unencoded,
1355 USHORT gid, long ucv, int num_unicodes, long *unicodes)
1356 {
1357 struct gent *glyph;
1358 int i;
1359
1360 ASSERT(unencoded);
1361
1362 if (gid == 0 || num_unicodes < 1) {
1363 return;
1364 }
1365
1366 wbuf[0] = (ucv >> 24) & 0xff;
1367 wbuf[1] = (ucv >> 16) & 0xff;
1368 wbuf[2] = (ucv >> 8) & 0xff;
1369 wbuf[3] = ucv & 0xff;
1370
1371 glyph = NEW(1, struct gent);
1372 glyph->gid = gid;
1373 glyph->num_unicodes = num_unicodes;
1374 for (i = 0;
1375 i < num_unicodes && i < MAX_UNICODES; i++) {
1376 glyph->unicodes[i] = unicodes[i];
1377 }
1378
1379 ht_append_table(unencoded, wbuf, 4, glyph);
1380 }
1381
1382 /* This seriously affects speed... */
1383 static struct gent *
find_glyph(struct ht_table * unencoded,long ucv)1384 find_glyph (struct ht_table *unencoded, long ucv)
1385 {
1386 ASSERT(unencoded);
1387
1388 wbuf[0] = (ucv >> 24) & 0xff;
1389 wbuf[1] = (ucv >> 16) & 0xff;
1390 wbuf[2] = (ucv >> 8) & 0xff;
1391 wbuf[3] = ucv & 0xff;
1392
1393 return (struct gent *) ht_lookup_table(unencoded, wbuf, 4);
1394 }
1395
1396 static void
handle_subst(pdf_obj * dst_obj,pdf_obj * src_obj,int flag,otl_gsub * gsub_list,tt_cmap * ttcmap,struct ht_table * unencoded)1397 handle_subst (pdf_obj *dst_obj, pdf_obj *src_obj, int flag,
1398 otl_gsub *gsub_list, tt_cmap *ttcmap,
1399 struct ht_table *unencoded)
1400 {
1401 pdf_obj *tmp;
1402 long i, j, src_size, dst_size;
1403 long src, dst;
1404 long src_start, src_end, dst_start, dst_end;
1405
1406 src_size = pdf_array_length(src_obj);
1407 dst_size = pdf_array_length(dst_obj);
1408
1409 dst_start = dst_end = -1; dst = 0;
1410 src_start = src_end = -1; src = 0;
1411 for (i = 0, j = 0;
1412 i < src_size && j < dst_size; i++) {
1413 USHORT gid;
1414 int rv;
1415 struct gent *glyph;
1416
1417 tmp = pdf_get_array(src_obj, i);
1418 if (PDF_OBJ_ARRAYTYPE(tmp)) {
1419 src_start = (long) pdf_number_value(pdf_get_array(tmp, 0));
1420 src_end = (long) pdf_number_value(pdf_get_array(tmp, 1));
1421 } else {
1422 src_start = src_end = (long) pdf_number_value(tmp);
1423 }
1424 for (src = src_start; src <= src_end; src++) {
1425 glyph = find_glyph(unencoded, src);
1426 if (glyph)
1427 gid = glyph->gid;
1428 else {
1429 gid = tt_cmap_lookup(ttcmap, src);
1430 }
1431 dst++;
1432 if (dst > dst_end) {
1433 tmp = pdf_get_array(dst_obj, j++);
1434 if (PDF_OBJ_ARRAYTYPE(tmp)) {
1435 dst_start = (long) pdf_number_value(pdf_get_array(tmp, 0));
1436 dst_end = (long) pdf_number_value(pdf_get_array(tmp, 1));
1437 } else {
1438 dst_start = dst_end = (long) pdf_number_value(tmp);
1439 }
1440 dst = dst_start;
1441 }
1442 if (gid == 0) {
1443 if (flag == 'r' || flag == 'p') {
1444 if (src < 0x10000) {
1445 WARN("Font does not have glyph for U+%04X.", src);
1446 } else {
1447 WARN("Font does not have glyph for U+%06X.", src);
1448 }
1449 }
1450 if (flag == 'r') {
1451 ERROR("Missing glyph found...");
1452 }
1453 continue;
1454 }
1455 rv = otl_gsub_apply(gsub_list, &gid);
1456 if (rv < 0) {
1457 if (flag == 'p' || flag == 'r') {
1458 if (src < 0x10000) {
1459 WARN("No substituted glyph for U+%04X.", src);
1460 } else {
1461 WARN("No substituted glyph for U+%06X.", src);
1462 }
1463 }
1464 if (flag == 'r') {
1465 ERROR("Missing glyph found...");
1466 }
1467 continue;
1468 }
1469
1470 if (glyph) {
1471 glyph->gid = gid;
1472 } else {
1473 add_glyph(unencoded, gid, dst, 1, &src);
1474 }
1475
1476 if (verbose > VERBOSE_LEVEL_MIN) {
1477 if (dst < 0x10000) {
1478 MESG("otf_cmap>> Substituted glyph gid=%u assigned to U+%04X\n",
1479 gid, dst);
1480 } else {
1481 MESG("otf_cmap>> Substituted glyph gid=%u assigned to U+%06X\n",
1482 gid, dst);
1483 }
1484 }
1485
1486 }
1487 }
1488
1489 if (dst < dst_end || src < src_end ||
1490 i < src_size || j < dst_size) {
1491 WARN("Number of glyphs in left-side and right-side not equal...");
1492 WARN("Please check .otl file...");
1493 }
1494 }
1495
1496 static void
handle_assign(pdf_obj * dst,pdf_obj * src,int flag,otl_gsub * gsub_list,tt_cmap * ttcmap,struct ht_table * unencoded)1497 handle_assign (pdf_obj *dst, pdf_obj *src, int flag,
1498 otl_gsub *gsub_list, tt_cmap *ttcmap,
1499 struct ht_table *unencoded)
1500 {
1501 long unicodes[MAX_UNICODES], ucv;
1502 int i, n_unicodes, rv;
1503 USHORT gid_in[MAX_UNICODES], lig;
1504
1505 n_unicodes = pdf_array_length(src); /* FIXME */
1506 ucv = (long) pdf_number_value(pdf_get_array(dst, 0)); /* FIXME */
1507 if (!UC_is_valid(ucv)) {
1508 if (flag == 'r' || flag == 'p') {
1509 if (ucv < 0x10000) {
1510 WARN("Invalid Unicode in: %04X", ucv);
1511 } else {
1512 WARN("Invalid Unicode in: %06X", ucv);
1513 }
1514 }
1515 if (flag == 'r') {
1516 ERROR("Invalid Unicode code specified.", ucv);
1517 }
1518 return;
1519 }
1520
1521 if (verbose > VERBOSE_LEVEL_MIN) {
1522 MESG("otf_cmap>> Ligature component:");
1523 }
1524
1525 for (i = 0; i < n_unicodes; i++) {
1526 unicodes[i] =
1527 (long) pdf_number_value(pdf_get_array(src, i));
1528 gid_in[i] = tt_cmap_lookup(ttcmap, unicodes[i]);
1529
1530 if (verbose > VERBOSE_LEVEL_MIN) {
1531 if (unicodes[i] < 0x10000) {
1532 MESG(" U+%04X (gid=%u)", unicodes[i], gid_in[i]);
1533 } else {
1534 MESG(" U+%06X (gid=%u)", unicodes[i], gid_in[i]);
1535 }
1536 }
1537
1538 if (gid_in[i] == 0) {
1539 if (flag == 'r' || flag == 'p') {
1540 if (unicodes[i] < 0x10000) {
1541 WARN("Unicode char U+%04X not exist in font...", unicodes[i]);
1542 } else {
1543 WARN("Unicode char U+%06X not exist in font...", unicodes[i]);
1544 }
1545 }
1546 if (flag == 'r') {
1547 ERROR("Missing glyph found...");
1548 }
1549 return;
1550 }
1551
1552 }
1553
1554 if (verbose > VERBOSE_LEVEL_MIN) {
1555 MESG("\n");
1556 }
1557
1558 rv = otl_gsub_apply_lig(gsub_list,
1559 gid_in, (USHORT)n_unicodes, &lig);
1560 if (rv < 0) {
1561 if (flag == 'p')
1562 WARN("No ligature found...");
1563 else if (flag == 'r')
1564 ERROR("No ligature found...");
1565 return;
1566 }
1567
1568 add_glyph(unencoded, lig, ucv, n_unicodes, unicodes);
1569
1570 if (verbose > VERBOSE_LEVEL_MIN) {
1571 if (ucv < 0x10000) {
1572 MESG("otf_cmap>> Ligature glyph gid=%u assigned to U+%04X\n", lig, ucv);
1573 } else {
1574 MESG("otf_cmap>> Ligature glyph gid=%u assigned to U+%06X\n", lig, ucv);
1575 }
1576 }
1577
1578 return;
1579 }
1580
1581 static int
load_base_CMap(const char * cmap_name,int wmode,CIDSysInfo * csi,unsigned char * GIDToCIDMap,tt_cmap * ttcmap)1582 load_base_CMap (const char *cmap_name, int wmode,
1583 CIDSysInfo *csi, unsigned char *GIDToCIDMap,
1584 tt_cmap *ttcmap)
1585 {
1586 int cmap_id;
1587
1588 cmap_id = CMap_cache_find(cmap_name);
1589 if (cmap_id < 0) {
1590 CMap *cmap;
1591
1592 cmap = CMap_new();
1593 CMap_set_name (cmap, cmap_name);
1594 CMap_set_type (cmap, CMAP_TYPE_CODE_TO_CID);
1595 CMap_set_wmode(cmap, wmode);
1596 CMap_add_codespacerange(cmap, lrange_min, lrange_max, 4);
1597
1598 if (csi) { /* CID */
1599 CMap_set_CIDSysInfo(cmap, csi);
1600 } else {
1601 CMap_set_CIDSysInfo(cmap, &CSI_IDENTITY);
1602 }
1603
1604 if (ttcmap->format == 12) {
1605 load_cmap12(ttcmap->map, GIDToCIDMap, cmap);
1606 } else if (ttcmap->format == 4) {
1607 load_cmap4(ttcmap->map, GIDToCIDMap, cmap);
1608 }
1609
1610 cmap_id = CMap_cache_add(cmap);
1611 }
1612
1613 return cmap_id;
1614 }
1615
1616 static void
load_gsub(pdf_obj * conf,otl_gsub * gsub_list,sfnt * sfont)1617 load_gsub (pdf_obj *conf, otl_gsub *gsub_list, sfnt *sfont)
1618 {
1619 pdf_obj *rule;
1620 char *script, *language, *feature;
1621 long i, size;
1622
1623 rule = otl_conf_get_rule(conf);
1624 if (!rule)
1625 return;
1626
1627 script = otl_conf_get_script (conf);
1628 language = otl_conf_get_language(conf);
1629
1630 size = pdf_array_length(rule);
1631 for (i = 0; i < size; i += 2) {
1632 pdf_obj *tmp, *commands;
1633 int flag;
1634 long j, num_comms;
1635
1636 tmp = pdf_get_array(rule, i);
1637 flag = (int) pdf_number_value(tmp);
1638
1639 commands = pdf_get_array(rule, i+1);
1640 num_comms = pdf_array_length(commands);
1641
1642 /* (assign|substitute) tag dst src */
1643 for (j = 0 ; j < num_comms; j += 4) {
1644 tmp = pdf_get_array(commands, 1);
1645 if (PDF_OBJ_STRINGTYPE(tmp)) {
1646 feature = pdf_string_value(tmp);
1647 if (otl_gsub_add_feat(gsub_list,
1648 script, language, feature, sfont) < 0) {
1649 if (flag == 'p')
1650 WARN("No OTL feature matches \"%s.%s.%s\" found.",
1651 script, language, feature);
1652 else if (flag == 'r')
1653 ERROR("No OTL feature matches \"%s.%s.%s\" found.",
1654 script, language, feature);
1655 }
1656 }
1657
1658 }
1659 }
1660
1661 }
1662
1663 static void
handle_gsub(pdf_obj * conf,tt_cmap * ttcmap,otl_gsub * gsub_list,struct ht_table * unencoded)1664 handle_gsub (pdf_obj *conf,
1665 tt_cmap *ttcmap, otl_gsub *gsub_list,
1666 struct ht_table *unencoded)
1667 {
1668 pdf_obj *rule;
1669 char *script, *language, *feature;
1670 long i, size;
1671
1672 if (!conf)
1673 return;
1674
1675 rule = otl_conf_get_rule(conf);
1676 if (!rule) {
1677 return;
1678 }
1679
1680 if (!PDF_OBJ_ARRAYTYPE(rule)) {
1681 WARN("Not arraytype?");
1682 return;
1683 }
1684 script = otl_conf_get_script (conf);
1685 language = otl_conf_get_language(conf);
1686
1687 size = pdf_array_length(rule);
1688 for (i = 0; i < size; i += 2) {
1689 pdf_obj *tmp, *commands;
1690 long j, num_comms;
1691 int flag;
1692
1693 tmp = pdf_get_array(rule, i);
1694 flag = (int) pdf_number_value(tmp);
1695
1696 commands = pdf_get_array (rule, i+1);
1697 num_comms = pdf_array_length(commands);
1698
1699 for (j = 0; j < num_comms; j += 4) {
1700 pdf_obj *operator;
1701 pdf_obj *src, *dst, *feat;
1702 int rv;
1703
1704 /* (assing|substitute) tag dst src */
1705 operator = pdf_get_array(commands, j);
1706
1707 feat = pdf_get_array(commands, j+1);
1708 if (PDF_OBJ_STRINGTYPE(feat))
1709 feature = pdf_string_value(feat);
1710 else
1711 feature = NULL;
1712
1713 dst = pdf_get_array(commands, j+2);
1714 src = pdf_get_array(commands, j+3);
1715
1716 rv = otl_gsub_select(gsub_list, script, language, feature);
1717 if (rv < 0) {
1718 if (flag == 'p') {
1719 WARN("No GSUB feature %s.%s.%s loaded...",
1720 script, language, feature);
1721 } else if (flag == 'r') {
1722 ERROR("No GSUB feature %s.%s.%s loaded...",
1723 script, language, feature);
1724 }
1725 } else {
1726
1727 if (verbose > VERBOSE_LEVEL_MIN) {
1728 MESG("otf_cmap>> %s:\n", pdf_name_value(operator));
1729 }
1730
1731 if (!strcmp(pdf_name_value(operator), "assign")) {
1732 handle_assign(dst, src, flag,
1733 gsub_list, ttcmap, unencoded);
1734 } else if (!strcmp(pdf_name_value(operator), "substitute")) {
1735 handle_subst(dst, src, flag,
1736 gsub_list, ttcmap, unencoded);
1737 }
1738 }
1739
1740 }
1741
1742 }
1743
1744 }
1745
1746 static void CDECL
hval_free(void * hval)1747 hval_free (void *hval)
1748 {
1749 RELEASE(hval);
1750 }
1751
1752 int
otf_load_Unicode_CMap(const char * map_name,int ttc_index,const char * otl_tags,int wmode)1753 otf_load_Unicode_CMap (const char *map_name, int ttc_index, /* 0 for non-TTC font */
1754 const char *otl_tags, int wmode)
1755 {
1756 int cmap_id = -1;
1757 int tounicode_id = -1, is_cidfont = 0;
1758 sfnt *sfont;
1759 unsigned long offset = 0;
1760 char *base_name = NULL, *cmap_name = NULL;
1761 char *tounicode_name = NULL;
1762 FILE *fp = NULL;
1763 otl_gsub *gsub_list = NULL;
1764 tt_cmap *ttcmap;
1765 CMap *cmap, *base, *tounicode = NULL;
1766 CIDSysInfo csi = {NULL, NULL, 0};
1767 unsigned char *GIDToCIDMap = NULL;
1768
1769 if (!map_name)
1770 return -1;
1771
1772 if (ttc_index > 999 || ttc_index < 0) {
1773 return -1; /* Sorry for this... */
1774 }
1775
1776 fp = DPXFOPEN(map_name, DPX_RES_TYPE_TTFONT);
1777 if (!fp) {
1778 fp = DPXFOPEN(map_name, DPX_RES_TYPE_OTFONT);
1779 }
1780 if (!fp) {
1781 fp = DPXFOPEN(map_name, DPX_RES_TYPE_DFONT);
1782 if (!fp) return -1;
1783 sfont = dfont_open(fp, ttc_index);
1784 } else {
1785 sfont = sfnt_open(fp);
1786 }
1787
1788 if (!sfont) {
1789 ERROR("Could not open OpenType/TrueType/dfont font file \"%s\"", map_name);
1790 }
1791 switch (sfont->type) {
1792 case SFNT_TYPE_TTC:
1793 offset = ttc_read_offset(sfont, ttc_index);
1794 if (offset == 0) {
1795 ERROR("Invalid TTC index");
1796 }
1797 break;
1798 case SFNT_TYPE_TRUETYPE:
1799 case SFNT_TYPE_POSTSCRIPT:
1800 offset = 0;
1801 break;
1802 case SFNT_TYPE_DFONT:
1803 offset = sfont->offset;
1804 break;
1805 default:
1806 ERROR("Not a OpenType/TrueType/TTC font?: %s", map_name);
1807 break;
1808 }
1809
1810 if (sfnt_read_table_directory(sfont, offset) < 0)
1811 ERROR("Could not read OpenType/TrueType table directory.");
1812
1813 base_name = NEW(strlen(map_name)+strlen("-UCS4-H")+5, char);
1814 if (wmode)
1815 sprintf(base_name, "%s,%03d-UCS4-V", map_name, ttc_index);
1816 else {
1817 sprintf(base_name, "%s,%03d-UCS4-H", map_name, ttc_index);
1818 }
1819
1820 if (otl_tags) {
1821 cmap_name = NEW(strlen(map_name)+strlen(otl_tags)+strlen("-UCS4-H")+6, char);
1822 if (wmode)
1823 sprintf(cmap_name, "%s,%03d,%s-UCS4-V", map_name, ttc_index, otl_tags);
1824 else
1825 sprintf(cmap_name, "%s,%03d,%s-UCS4-H", map_name, ttc_index, otl_tags);
1826 } else {
1827 cmap_name = NEW(strlen(base_name)+1, char);
1828 strcpy(cmap_name, base_name);
1829 }
1830
1831 if (sfont->type == SFNT_TYPE_POSTSCRIPT) {
1832 is_cidfont = handle_CIDFont(sfont, &GIDToCIDMap, &csi);
1833 } else {
1834 is_cidfont = 0;
1835 }
1836
1837 if (is_cidfont) {
1838 tounicode_name = NULL;
1839 } else {
1840 tounicode_name = NEW(strlen(map_name)+strlen("-UTF16")+5, char);
1841 sprintf(tounicode_name, "%s,%03d-UTF16", map_name, ttc_index);
1842 }
1843
1844 if (verbose > VERBOSE_LEVEL_MIN) {
1845 MESG("\n");
1846 MESG("otf_cmap>> Unicode charmap for font=\"%s\" layout=\"%s\"\n",
1847 map_name, (otl_tags ? otl_tags : "none"));
1848 }
1849
1850 cmap_id = CMap_cache_find(cmap_name);
1851 if (cmap_id >= 0) {
1852 RELEASE(cmap_name);
1853 RELEASE(base_name);
1854 if (GIDToCIDMap)
1855 RELEASE(GIDToCIDMap);
1856 if (tounicode_name)
1857 RELEASE(tounicode_name);
1858
1859 sfnt_close(sfont);
1860 DPXFCLOSE(fp);
1861
1862 if (verbose > VERBOSE_LEVEL_MIN)
1863 MESG("otf_cmap>> Found at cmap_id=%d.\n", cmap_id);
1864
1865 return cmap_id;
1866 }
1867
1868 ttcmap = tt_cmap_read(sfont, 3, 10); /* Microsoft UCS4 */
1869 if (!ttcmap) {
1870 ttcmap = tt_cmap_read(sfont, 3, 1); /* Microsoft UCS2 */
1871 if (!ttcmap) {
1872 ttcmap = tt_cmap_read(sfont, 0, 3); /* Unicode 2.0 or later */
1873 if (!ttcmap) {
1874 ERROR("Unable to read OpenType/TrueType Unicode cmap table.");
1875 }
1876 }
1877 }
1878 cmap_id = load_base_CMap(base_name, wmode,
1879 (is_cidfont ? &csi : NULL),
1880 GIDToCIDMap, ttcmap);
1881 if (cmap_id < 0)
1882 ERROR("Failed to read OpenType/TrueType cmap table.");
1883
1884 if (!otl_tags) {
1885 RELEASE(cmap_name);
1886 RELEASE(base_name);
1887 if (GIDToCIDMap)
1888 RELEASE(GIDToCIDMap);
1889 if (tounicode_name)
1890 RELEASE(tounicode_name);
1891 if (is_cidfont) {
1892 if (csi.registry)
1893 RELEASE(csi.registry);
1894 if (csi.ordering)
1895 RELEASE(csi.ordering);
1896 }
1897 tt_cmap_release(ttcmap);
1898 sfnt_close(sfont);
1899 DPXFCLOSE(fp);
1900
1901 return cmap_id;
1902 }
1903
1904 base = CMap_cache_get(cmap_id);
1905
1906 cmap = CMap_new();
1907 CMap_set_name (cmap, cmap_name);
1908 CMap_set_type (cmap, CMAP_TYPE_CODE_TO_CID);
1909 CMap_set_wmode(cmap, wmode);
1910 /* CMap_add_codespacerange(cmap, lrange_min, lrange_max, 4); */
1911 CMap_set_usecmap(cmap, base);
1912 CMap_add_cidchar(cmap, lrange_max, 4, 0); /* FIXME */
1913
1914 if (is_cidfont) {
1915 CMap_set_CIDSysInfo(cmap, &csi);
1916 if (csi.registry)
1917 RELEASE(csi.registry);
1918 if (csi.ordering)
1919 RELEASE(csi.ordering);
1920 } else {
1921 CMap_set_CIDSysInfo(cmap, &CSI_IDENTITY);
1922 }
1923
1924 gsub_list = otl_gsub_new();
1925
1926 {
1927 struct ht_table unencoded;
1928 char *conf_name, *opt_tag;
1929 pdf_obj *conf, *opt_conf;
1930
1931 conf_name = NEW(strlen(otl_tags)+1, char);
1932 memset (conf_name, 0, strlen(otl_tags)+1);
1933 opt_tag = strchr(otl_tags, ':');
1934 if (opt_tag) {
1935 opt_tag++;
1936 strncpy(conf_name, otl_tags,
1937 strlen(otl_tags) - strlen(opt_tag) - 1);
1938 } else {
1939 strcpy(conf_name, otl_tags);
1940 }
1941
1942 if (verbose > VERBOSE_LEVEL_MIN) {
1943 MESG("otf_cmap>> Read layout config. \"%s\"\n", conf_name);
1944 }
1945
1946 conf = otl_find_conf(conf_name);
1947 if (!conf)
1948 ERROR("Layout file \"%s\" not found...", conf_name);
1949
1950 load_gsub(conf, gsub_list, sfont);
1951 if (opt_tag) {
1952 if (verbose > VERBOSE_LEVEL_MIN) {
1953 MESG("otf_cmap>> Layout option \"%s\" enabled\n", opt_tag);
1954 }
1955 opt_conf = otl_conf_find_opt(conf, opt_tag);
1956 if (!opt_conf)
1957 ERROR("There is no option \"%s\" in \"%s\".",
1958 opt_tag, conf_name);
1959 load_gsub(opt_conf, gsub_list, sfont);
1960 }
1961
1962 ht_init_table(&unencoded, hval_free);
1963
1964 handle_gsub(conf, ttcmap, gsub_list, &unencoded);
1965 if (opt_tag) {
1966 opt_conf = otl_conf_find_opt(conf, opt_tag);
1967 if (!opt_conf)
1968 ERROR("There is no option \"%s\" in \"%s\".",
1969 opt_tag, conf_name);
1970 handle_gsub(opt_conf, ttcmap, gsub_list, &unencoded);
1971 }
1972 if (is_cidfont) {
1973 tounicode_id = -1;
1974 tounicode = NULL;
1975 } else {
1976 tounicode_id = CMap_cache_find(tounicode_name);
1977 if (tounicode_id >= 0)
1978 tounicode = CMap_cache_get(tounicode_id);
1979 else {
1980 tounicode = CMap_new();
1981 CMap_set_name (tounicode, tounicode_name);
1982 CMap_set_type (tounicode, CMAP_TYPE_TO_UNICODE);
1983 CMap_set_wmode(tounicode, 0);
1984 CMap_add_codespacerange(tounicode, srange_min, srange_max, 2);
1985 CMap_set_CIDSysInfo(tounicode, &CSI_UNICODE);
1986 /* FIXME */
1987 CMap_add_bfchar(tounicode, srange_min, 2, srange_max, 2);
1988 }
1989 }
1990 create_cmaps(cmap, tounicode, &unencoded, GIDToCIDMap);
1991
1992 ht_clear_table(&unencoded);
1993 RELEASE(conf_name);
1994 }
1995
1996 cmap_id = CMap_cache_add(cmap);
1997 if (!is_cidfont && tounicode_id < 0) /* New */
1998 CMap_cache_add(tounicode);
1999
2000 tt_cmap_release(ttcmap);
2001 if (gsub_list)
2002 otl_gsub_release(gsub_list);
2003
2004 if (verbose > VERBOSE_LEVEL_MIN) {
2005 MESG("otf_cmap>> Overwrite CMap \"%s\" by \"%s\" with usecmap\n",
2006 base_name, cmap_name);
2007 }
2008
2009 if (GIDToCIDMap)
2010 RELEASE(GIDToCIDMap);
2011 if (base_name)
2012 RELEASE(base_name);
2013 if (cmap_name)
2014 RELEASE(cmap_name);
2015 if (tounicode_name)
2016 RELEASE(tounicode_name);
2017
2018 sfnt_close(sfont);
2019 DPXFCLOSE(fp);
2020
2021 return cmap_id;
2022 }
2023