1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2
3 Copyright (C) 2007-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4 the dvipdfmx project team.
5
6 Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22
23 /*
24 * References:
25 *
26 * Unicode and Glyph Names, ver. 2.3., Adobe Solution Network
27 * http://partners.adobe.com/asn/tech/type/unicodegn.jsp
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include <config.h>
32 #endif
33
34 #include <string.h>
35 #include <ctype.h>
36
37 #include "system.h"
38 #include "mem.h"
39 #include "error.h"
40
41 #include "mfileio.h"
42 #include "pdfparse.h"
43
44 /* Hash */
45 #include "dpxutil.h"
46
47 #include "dpxfile.h"
48
49 #include "unicode.h"
50
51 #include "agl.h"
52
53 static int verbose = 0;
54
55 void
agl_set_verbose(void)56 agl_set_verbose (void)
57 {
58 verbose++;
59 }
60
61 static agl_name *
agl_new_name(void)62 agl_new_name (void)
63 {
64 agl_name *agln;
65
66 agln = NEW(1, agl_name);
67 agln->name = NULL;
68 agln->suffix = NULL;
69 agln->n_components = 0;
70 agln->alternate = NULL;
71 agln->is_predef = 0;
72
73 return agln;
74 }
75
76 static void
agl_release_name(agl_name * agln)77 agl_release_name (agl_name *agln)
78 {
79 agl_name *next;
80
81 while (agln) {
82 next = agln->alternate;
83 if (agln->name)
84 RELEASE(agln->name);
85 if (agln->suffix)
86 RELEASE(agln->suffix);
87 agln->name = NULL;
88 RELEASE(agln);
89 agln = next;
90 }
91 }
92
93 char *
agl_chop_suffix(const char * glyphname,char ** suffix)94 agl_chop_suffix (const char *glyphname, char **suffix)
95 {
96 char *name, *p;
97 int len;
98
99 ASSERT(glyphname && suffix);
100
101 p = strchr(glyphname, '.');
102 if (p) {
103 len = strlen(glyphname) - strlen(p);
104 if (len < 1) {
105 name = NULL;
106 *suffix = NEW(strlen(glyphname), char);
107 strcpy(*suffix, glyphname+1);
108 } else {
109 p++;
110 name = NEW(len + 1, char);
111 strncpy(name, glyphname, len);
112 name[len] = '\0';
113 if (p[0] == '\0') {
114 *suffix = NULL;
115 } else {
116 *suffix = NEW(strlen(p) + 1, char);
117 strcpy(*suffix, p);
118 }
119 }
120 } else {
121 name = NEW(strlen(glyphname) + 1, char);
122 strcpy(name, glyphname);
123 *suffix = NULL;
124 }
125
126 return name;
127 }
128
129 static const char * const modifiers[] = {
130 "acute", "breve", "caron", "cedilla", "circumflex",
131 "dieresis", "dotaccent", "grave", "hungarumlaut",
132 "macron", "ogonek", "ring", "tilde", "commaaccent",
133 "slash",
134
135 /* The following entries are not accent nor something
136 * but PS font may have those "small" version...
137 */
138 "ampersand", "exclam", "exclamdown",
139 "question","questiondown",
140 NULL
141 };
142
143 static int
skip_capital(const char ** p,const char * endptr)144 skip_capital (const char **p, const char *endptr)
145 {
146 long slen = 0, len;
147
148 len = (long) (endptr - (*p));
149
150 if (len >= 2 &&
151 ((**p == 'A' && *(*p+1) == 'E') ||
152 (**p == 'O' && *(*p+1) == 'E'))) {
153 *p += 2;
154 slen = 2;
155 } else if (len >= 3 &&
156 **p == 'E' &&
157 *(*p+1) == 't' &&
158 *(*p+2) == 'h') {
159 *p += 3;
160 slen = 3;
161 } else if (len >= 5 &&
162 **p == 'T' &&
163 *(*p+1) == 'h' &&
164 *(*p+2) == 'o' &&
165 *(*p+3) == 'r' &&
166 *(*p+4) == 'n') {
167 *p += 5;
168 slen = 5;
169 } else if (len >= 1 &&
170 **p >= 'A' && **p <= 'Z') {
171 *p += 1;
172 slen = 1;
173 }
174
175 return slen;
176 }
177
178 static int
skip_modifier(const char ** p,const char * endptr)179 skip_modifier (const char **p, const char *endptr)
180 {
181 long slen = 0, len;
182 int i;
183
184 len = (long) (endptr - (*p));
185
186 for (i = 0; modifiers[i] != NULL; i++) {
187 if ((len >= strlen(modifiers[i]) &&
188 !memcmp(*p, modifiers[i], len))) {
189 slen = strlen(modifiers[i]);
190 *p += slen;
191 break;
192 }
193 }
194
195 return slen;
196 }
197
198 static int
is_smallcap(const char * glyphname)199 is_smallcap (const char *glyphname)
200 {
201 long len, slen;
202 const char *p, *endptr;
203
204 if (!glyphname)
205 return 0;
206
207 p = glyphname;
208 len = strlen(glyphname);
209 if (len < 6 ||
210 strcmp(p + len - 5, "small"))
211 return 0;
212
213 endptr = p + len - 5;
214
215 len -= 5;
216 slen = skip_modifier(&p, endptr);
217 if (slen == len)
218 return 1; /* Acutesmall, Gravesmall, etc */
219 else if (slen > 0) { /* ??? */
220 return 0;
221 }
222
223 len -= skip_capital(&p, endptr);
224 if (len == 0) {
225 return 1; /* Asmall, AEsmall, etc */
226 }
227
228 while (len > 0) { /* allow multiple accent */
229 slen = skip_modifier(&p, endptr);
230 if (slen == 0)
231 return 0;
232 len -= slen;
233 }
234
235 return 1;
236 }
237
238 #define SUFFIX_LIST_MAX 16
239 #define AGL_VAR_SMCP_IDX 0
240 static struct {
241 const char *key;
242 const char *otl_tag;
243 const char *suffixes[SUFFIX_LIST_MAX];
244 } var_list[] = {
245 {"small" , "smcp", {"sc", NULL}},
246 {"swash" , "swsh", {NULL}},
247 {"superior" , "sups", {NULL}},
248 {"inferior" , "sinf", {NULL}},
249 {"numerator" , "numr", {NULL}},
250 {"denominator" , "dnom", {NULL}},
251 {"oldstyle" , "onum", {NULL}},
252
253 /* The following only used by TeX, there are no
254 * corresponding OTL feat. tag.
255 */
256 {"display" , NULL, {NULL}},
257 {"text" , NULL, {NULL}},
258 {"big" , NULL, {NULL}},
259 {"bigg" , NULL, {NULL}},
260 {"Big" , NULL, {NULL}},
261 {"Bigg" , NULL, {NULL}},
262 {NULL, NULL, {NULL}}
263 };
264
265 const char *
agl_suffix_to_otltag(const char * suffix)266 agl_suffix_to_otltag (const char *suffix)
267 {
268 int i, j;
269
270 for (i = 0; var_list[i].key; i++) {
271 for (j = 0; var_list[i].suffixes[j]; j++) {
272 if (!strcmp(suffix, var_list[i].suffixes[j]))
273 return var_list[i].otl_tag;
274 }
275 if (!strcmp(suffix, var_list[i].key))
276 return var_list[i].otl_tag;
277 if (var_list[i].otl_tag &&
278 !strcmp(suffix, var_list[i].otl_tag))
279 return var_list[i].otl_tag;
280 }
281
282 return NULL;
283 }
284
285 static int
agl_guess_name(const char * glyphname)286 agl_guess_name (const char *glyphname)
287 {
288 int i, len;
289
290 if (is_smallcap(glyphname))
291 return AGL_VAR_SMCP_IDX;
292
293 len = strlen(glyphname);
294 for (i = 1; var_list[i].key != NULL; i++) {
295 if (len > strlen(var_list[i].key) &&
296 !strcmp(glyphname+len-strlen(var_list[i].key), var_list[i].key)
297 ) {
298 return i;
299 }
300 }
301
302 return -1;
303 }
304
305 static agl_name *
agl_normalized_name(char * glyphname)306 agl_normalized_name (char *glyphname)
307 {
308 agl_name *agln;
309 char *suffix;
310 int i, n;
311
312 if (!glyphname)
313 return NULL;
314
315 agln = agl_new_name();
316 suffix = strchr(glyphname, '.');
317 if (suffix) {
318 n = strlen(glyphname) - strlen(suffix);
319 if (suffix[1] != '\0') {
320 agln->suffix = NEW(strlen(suffix), char);
321 strcpy(agln->suffix, suffix+1);
322 }
323 agln->name = NEW(n+1, char);
324 memcpy(agln->name, glyphname, n);
325 agln->name[n] = '\0';
326 } else if (is_smallcap(glyphname)) {
327 n = strlen(glyphname) - 5;
328 agln->suffix = NEW(3, char);
329 strcpy(agln->suffix, "sc");
330 agln->name = NEW(n+1, char);
331 for (i = 0; i < n; i++) {
332 agln->name[i] = isupper((unsigned char)glyphname[i]) ?
333 (glyphname[i] + 32) : glyphname[i];
334 }
335 agln->name[n] = '\0';
336 } else {
337 int var_idx;
338
339 #define SET_STRING(p,s) do {\
340 (p) = NEW(strlen((s))+1, char);\
341 strcpy((p),(s));\
342 } while (0)
343 var_idx = agl_guess_name(glyphname);
344 if (var_idx < 0 ||
345 !var_list[var_idx].key) {
346 n = strlen(glyphname);
347 } else {
348 n = strlen(glyphname) - strlen(var_list[var_idx].key);
349 if (var_list[var_idx].suffixes[0])
350 SET_STRING(agln->suffix, var_list[var_idx].suffixes[0]);
351 else {
352 SET_STRING(agln->suffix, var_list[var_idx].key);
353 }
354 }
355 agln->name = NEW(n+1, char);
356 memcpy(agln->name, glyphname, n);
357 agln->name[n] = '\0';
358 }
359
360 return agln;
361 }
362
363 static struct ht_table aglmap;
364
365 static void CDECL
hval_free(void * hval)366 hval_free (void *hval)
367 {
368 agl_release_name((struct agl_name *) hval);
369 }
370
371 void
agl_init_map(void)372 agl_init_map (void)
373 {
374 ht_init_table(&aglmap, hval_free);
375 agl_load_listfile(AGL_EXTRA_LISTFILE, 0);
376 if (agl_load_listfile(AGL_PREDEF_LISTFILE, 1) < 0) {
377 WARN("Failed to load AGL file \"%s\"...", AGL_PREDEF_LISTFILE);
378 }
379 if (agl_load_listfile(AGL_DEFAULT_LISTFILE, 0) < 0) {
380 WARN("Failed to load AGL file \"%s\"...", AGL_DEFAULT_LISTFILE);
381 }
382 }
383
384 void
agl_close_map(void)385 agl_close_map (void)
386 {
387 ht_clear_table(&aglmap);
388 }
389
390 #define WBUF_SIZE 1024
391
392 int
agl_load_listfile(const char * filename,int is_predef)393 agl_load_listfile (const char *filename, int is_predef)
394 {
395 int count = 0;
396 const char *p, *endptr;
397 char *nextptr;
398 char wbuf[WBUF_SIZE];
399 FILE *fp;
400
401 if (!filename)
402 return -1;
403
404 fp = DPXFOPEN(filename, DPX_RES_TYPE_AGL);
405 if (!fp) {
406 return -1;
407 }
408
409 if (verbose)
410 MESG("<AGL:%s", filename);
411
412 while ((p = mfgets(wbuf, WBUF_SIZE, fp)) != NULL) {
413 agl_name *agln, *duplicate;
414 char *name;
415 int n_unicodes, i;
416 long unicodes[AGL_MAX_UNICODES];
417
418 endptr = p + strlen(p);
419 skip_white(&p, endptr);
420
421 /* Need table version check. */
422 if (!p || p[0] == '#' || p >= endptr)
423 continue;
424 nextptr = strchr(p, ';');
425 if (!nextptr || nextptr == p)
426 continue;
427
428 name = parse_ident(&p, nextptr);
429
430 skip_white(&p, endptr);
431 if (!name || p[0] != ';') {
432 WARN("Invalid AGL entry: %s", wbuf);
433 if (name)
434 RELEASE(name);
435 continue;
436 }
437
438 p++;
439 skip_white(&p, endptr);
440
441 n_unicodes = 0;
442 while (p < endptr &&
443 ((p[0] >= '0' && p[0] <= '9') ||
444 (p[0] >= 'A' && p[0] <= 'F'))
445 ) {
446
447 if (n_unicodes >= AGL_MAX_UNICODES) {
448 WARN("Too many Unicode values");
449 break;
450 }
451 unicodes[n_unicodes++] = strtol(p, &nextptr, 16);
452
453 p = nextptr;
454 skip_white(&p, endptr);
455 }
456
457 if (n_unicodes == 0) {
458 WARN("AGL entry ignored (no mapping): %s", wbuf);
459 RELEASE(name);
460 continue;
461 }
462
463 agln = agl_normalized_name(name);
464 agln->is_predef = is_predef;
465 agln->n_components = n_unicodes;
466 for (i = 0; i < n_unicodes; i++) {
467 agln->unicodes[i] = unicodes[i];
468 }
469
470 duplicate = ht_lookup_table(&aglmap, name, strlen(name));
471 if (!duplicate)
472 ht_append_table(&aglmap, name, strlen(name), agln);
473 else {
474 while (duplicate->alternate)
475 duplicate = duplicate->alternate;
476 duplicate->alternate = agln;
477 }
478
479 if (verbose > 3) {
480 if (agln->suffix)
481 MESG("agl: %s [%s.%s] -->", name, agln->name, agln->suffix);
482 else
483 MESG("agl: %s [%s] -->", name, agln->name);
484 for (i = 0; i < agln->n_components; i++) {
485 if (agln->unicodes[i] > 0xffff) {
486 MESG(" U+%06X", agln->unicodes[i]);
487 } else {
488 MESG(" U+%04X", agln->unicodes[i]);
489 }
490 }
491 MESG("\n");
492 }
493
494 RELEASE(name);
495 count++;
496 }
497 DPXFCLOSE(fp);
498
499 if (verbose)
500 MESG(">");
501
502 return count;
503 }
504
505 agl_name *
agl_lookup_list(const char * glyphname)506 agl_lookup_list (const char *glyphname)
507 {
508 agl_name *agln;
509
510 if (!glyphname)
511 return NULL;
512
513 agln = ht_lookup_table(&aglmap, glyphname, strlen(glyphname));
514
515 return agln;
516 }
517
518 int
agl_name_is_unicode(const char * glyphname)519 agl_name_is_unicode (const char *glyphname)
520 {
521 char c, *suffix;
522 int i, len;
523
524 if (!glyphname)
525 return 0;
526
527 suffix = strchr(glyphname, '.');
528 len = (int) (suffix ? suffix - glyphname : strlen(glyphname));
529 /*
530 * uni02ac is invalid glyph name and mapped to th empty string.
531 */
532 if (len >= 7 && (len - 3) % 4 == 0 &&
533 !strncmp(glyphname, "uni", 3)) {
534 c = glyphname[3];
535 /*
536 * Check if the 4th character is uppercase hexadecimal digit.
537 * "union" should not be treated as Unicode glyph name.
538 */
539 if (isdigit((unsigned char)c) || (c >= 'A' && c <= 'F'))
540 return 1;
541 else
542 return 0;
543 } else if (len <= 7 && len >= 5 &&
544 glyphname[0] == 'u') {
545 for (i = 1; i < len - 1; i++) {
546 c = glyphname[i];
547 if (!isdigit((unsigned char)c) && (c < 'A' || c > 'F'))
548 return 0;
549 }
550 return 1;
551 }
552
553 return 0;
554 }
555
556 long
agl_name_convert_unicode(const char * glyphname)557 agl_name_convert_unicode (const char *glyphname)
558 {
559 long ucv = -1;
560 const char *p;
561
562 if (!agl_name_is_unicode(glyphname))
563 return -1;
564
565 if (strlen(glyphname) > 7 && *(glyphname+7) != '.') {
566 WARN("Mapping to multiple Unicode characters not supported.");
567 return -1;
568 }
569
570 if (glyphname[1] == 'n')
571 p = glyphname + 3;
572 else
573 p = glyphname + 1;
574 ucv = 0;
575 while (*p != '\0' && *p != '.') {
576 if (!isdigit((unsigned char)*p) && (*p < 'A' || *p > 'F')) {
577 WARN("Invalid char %c in Unicode glyph name %s.", *p, glyphname);
578 return -1;
579 }
580 ucv <<= 4;
581 ucv += isdigit((unsigned char)*p) ? *p - '0' : *p - 'A' + 10;
582 p++;
583 }
584
585 if (!UC_is_valid(ucv)) {
586 if (ucv < 0x10000) {
587 WARN("Invalid Unicode code value U+%04X.", ucv);
588 } else {
589 WARN("Invalid Unicode code value U+%06X.", ucv);
590 }
591 ucv = -1;
592 }
593
594 return ucv;
595 }
596
597
598
599 static long
xtol(const char * start,int len)600 xtol (const char *start, int len)
601 {
602 long v = 0;
603
604 while (len-- > 0) {
605 v <<= 4;
606 if (isdigit((unsigned char)*start)) {
607 v += *start - '0';
608 } else if (*start >= 'A' && *start <= 'F') {
609 v += *start - 'A' + 10;
610 } else {
611 return -1;
612 }
613 start++;
614 }
615
616 return v;
617 }
618
619 #define IS_PUA(u) (((u) >= 0x00E000L && (u) <= 0x00F8FFL) || \
620 ((u) >= 0x0F0000L && (u) <= 0x0FFFFDL) || \
621 ((u) >= 0x100000L && (u) <= 0x10FFFDL) \
622 )
623
624 static long
put_unicode_glyph(const char * name,unsigned char ** dstpp,unsigned char * limptr)625 put_unicode_glyph (const char *name,
626 unsigned char **dstpp, unsigned char *limptr)
627 {
628 const char *p;
629 long len = 0, ucv;
630
631 p = name;
632 ucv = 0;
633
634 if (p[1] != 'n') {
635 p += 1;
636 ucv = xtol(p, strlen(p));
637 len += UC_sput_UTF16BE (ucv, dstpp, limptr);
638 } else {
639 p += 3;
640 while (*p != '\0') {
641 ucv = xtol(p, 4);
642 len += UC_sput_UTF16BE (ucv, dstpp, limptr);
643 p += 4;
644 }
645 }
646
647 return len;
648 }
649
650 long
agl_sput_UTF16BE(const char * glyphstr,unsigned char ** dstpp,unsigned char * limptr,int * fail_count)651 agl_sput_UTF16BE (const char *glyphstr,
652 unsigned char **dstpp, unsigned char *limptr,
653 int *fail_count)
654 {
655 long len = 0;
656 int count = 0;
657 const char *p, *endptr;
658
659 ASSERT(glyphstr && dstpp);
660
661 p = glyphstr;
662 endptr = strchr(p, '.');
663 if (!endptr)
664 endptr = p + strlen(p);
665
666 while (p < endptr) {
667 char *name;
668 const char *delim;
669 long sub_len;
670 int i;
671 agl_name *agln0, *agln1 = NULL;
672
673 delim = strchr(p, '_');
674 if (delim == p) {
675 /*
676 * Glyph names starting with a underscore or two subsequent
677 * underscore in glyph name not allowed?
678 */
679 WARN("Invalid glyph name component in \"%s\".", glyphstr);
680 count++;
681 if (fail_count)
682 *fail_count = count;
683 return len; /* Cannot continue */
684 } else if (!delim || delim > endptr) {
685 delim = endptr;
686 }
687 sub_len = (long) (delim - p);
688
689 name = NEW(sub_len+1, char);
690 memcpy(name, p, sub_len);
691 name[sub_len] = '\0';
692
693 if (agl_name_is_unicode(name)) {
694 sub_len = put_unicode_glyph(name, dstpp, limptr);
695 if (sub_len > 0)
696 len += sub_len;
697 else {
698 count++;
699 }
700 } else {
701 agln1 = agl_lookup_list(name);
702 if (!agln1 || (agln1->n_components == 1 &&
703 IS_PUA(agln1->unicodes[0]))) {
704 agln0 = agl_normalized_name(name);
705 if (agln0) {
706 if (verbose > 1 && agln0->suffix) {
707 WARN("agl: fix %s --> %s.%s",
708 name, agln0->name, agln0->suffix);
709 }
710 agln1 = agl_lookup_list(agln0->name);
711 agl_release_name(agln0);
712 }
713 }
714 if (agln1) {
715 for (i = 0; i < agln1->n_components; i++) {
716 len += UC_sput_UTF16BE (agln1->unicodes[i], dstpp, limptr);
717 }
718 } else {
719 if (verbose) {
720 WARN("No Unicode mapping for glyph name \"%s\" found.", name);
721 }
722 count++;
723 }
724 }
725 RELEASE(name);
726 p = delim + 1;
727 }
728
729 if (fail_count)
730 *fail_count = count;
731 return len;
732 }
733
734 int
agl_get_unicodes(const char * glyphstr,long * unicodes,int max_unicodes)735 agl_get_unicodes (const char *glyphstr,
736 long *unicodes, int max_unicodes)
737 {
738 int count = 0;
739 const char *p, *endptr;
740
741 p = glyphstr;
742 endptr = strchr(p, '.');
743 if (!endptr)
744 endptr = p + strlen(p);
745
746 while (p < endptr) {
747 char *name;
748 const char *delim;
749 long sub_len;
750 int i;
751 agl_name *agln0, *agln1 = NULL;
752
753 delim = strchr(p, '_');
754 if (delim == p) {
755 /*
756 * Glyph names starting with a underscore or two subsequent
757 * underscore in glyph name not allowed?
758 */
759 WARN("Invalid glyph name component in \"%s\".", glyphstr);
760 return -1; /* Cannot continue */
761 } else if (!delim || delim > endptr) {
762 delim = endptr;
763 }
764 sub_len = (long) (delim - p);
765
766 name = NEW(sub_len+1, char);
767 memcpy(name, p, sub_len);
768 name[sub_len] = '\0';
769
770 if (agl_name_is_unicode(name)) {
771 p = name;
772 if (p[1] != 'n') { /* uXXXXXXXX */
773 if (count >= max_unicodes) {
774 RELEASE(name);
775 return -1;
776 }
777 p++;
778 unicodes[count++] = xtol(p, strlen(p));
779 } else {
780 p += 3;
781 while (*p != '\0') {
782 if (count >= max_unicodes) {
783 RELEASE(name);
784 return -1;
785 }
786 unicodes[count++] = xtol(p, 4);
787 p += 4;
788 }
789 }
790 } else {
791 agln1 = agl_lookup_list(name);
792 if (!agln1 || (agln1->n_components == 1 &&
793 IS_PUA(agln1->unicodes[0]))) {
794 agln0 = agl_normalized_name(name);
795 if (agln0) {
796 if (verbose > 1 && agln0->suffix) {
797 WARN("agl: fix %s --> %s.%s",
798 name, agln0->name, agln0->suffix);
799 }
800 agln1 = agl_lookup_list(agln0->name);
801 agl_release_name(agln0);
802 }
803 }
804 if (agln1) {
805 if (count + agln1->n_components > max_unicodes) {
806 RELEASE(name);
807 return -1;
808 }
809 for (i = 0; i < agln1->n_components; i++) {
810 unicodes[count++] = agln1->unicodes[i];
811 }
812 } else {
813 if (verbose > 1)
814 WARN("No Unicode mapping for glyph name \"%s\" found.", name);
815 RELEASE(name);
816 return -1;
817 }
818 }
819 RELEASE(name);
820 p = delim + 1;
821 }
822
823 return count;
824 }
825