1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2
3 Copyright (C) 2008-2014 by Jin-Hwan Cho, Matthias Franz, and Shunsaku Hirata,
4 the dvipdfmx project team.
5
6 Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #include <string.h>
28
29 #include "system.h"
30 #include "mem.h"
31 #include "error.h"
32 #include "dpxutil.h"
33
34 #include "pdfparse.h"
35 #include "pdfobj.h"
36
37 #include "dpxfile.h"
38
39 #include "pdfencoding.h"
40
41 static int is_similar_charset (char **encoding, const char **encoding2);
42 static pdf_obj *make_encoding_differences (char **encoding, char **baseenc,
43 const char *is_used);
44
45 static unsigned char verbose = 0;
46
47 static const char *MacRomanEncoding[256];
48 static const char *MacExpertEncoding[256];
49 static const char *WinAnsiEncoding[256];
50 #if 0
51 static const char *StandardEncoding[256];
52 static const char *ISOLatin1Encoding[256];
53 #endif
54
55 void
pdf_encoding_set_verbose(void)56 pdf_encoding_set_verbose (void)
57 {
58 verbose++;
59 }
60
61 /*
62 * ident: File name, e.g., 8a.enc.
63 * name: Name of encoding, StandardEncoding, TeXBase1Encoding, ...
64 * glyphs: List of encoded glyphs (name).
65 * flags:
66 * IS_PREDEFINED:
67 * Encoding is one of the MacRomanEncoding, MacExpertEncoding, and
68 * WinAnsiEncoding.
69 * FLAG_USED_BY_TYPE3:
70 * Encoding is used by a Type 3 font.
71 */
72 #define FLAG_IS_PREDEFINED (1 << 0)
73 #define FLAG_USED_BY_TYPE3 (1 << 1)
74
75 typedef struct pdf_encoding
76 {
77 char *ident;
78
79 char *enc_name;
80 int flags;
81 char *glyphs[256]; /* ".notdef" must be represented as NULL */
82 char is_used[256];
83
84 struct pdf_encoding *baseenc;
85 pdf_obj *tounicode;
86
87 pdf_obj *resource;
88 } pdf_encoding;
89
90 static int pdf_encoding_new_encoding (const char *enc_name,
91 const char *ident,
92 const char **encoding_vec,
93 const char *baseenc_name,
94 int flags);
95
96 static void
pdf_init_encoding_struct(pdf_encoding * encoding)97 pdf_init_encoding_struct (pdf_encoding *encoding)
98 {
99 ASSERT(encoding);
100
101 encoding->ident = NULL;
102
103 encoding->enc_name = NULL;
104
105 memset(encoding->glyphs, 0, 256*sizeof(char *));
106 memset(encoding->is_used, 0, 256);
107
108 encoding->tounicode = NULL;
109
110 encoding->baseenc = NULL;
111 encoding->resource = NULL;
112
113 encoding->flags = 0;
114
115 return;
116 }
117
118 /* Creates the PDF Encoding entry for the encoding.
119 * If baseenc is non-null, it is used as BaseEncoding entry.
120 */
121 static pdf_obj *
create_encoding_resource(pdf_encoding * encoding,pdf_encoding * baseenc)122 create_encoding_resource (pdf_encoding *encoding, pdf_encoding *baseenc)
123 {
124 pdf_obj *differences;
125 ASSERT(encoding);
126 ASSERT(!encoding->resource);
127
128 differences = make_encoding_differences(encoding->glyphs,
129 baseenc ? baseenc->glyphs : NULL,
130 encoding->is_used);
131
132 if (differences) {
133 pdf_obj *resource = pdf_new_dict();
134 if (baseenc)
135 pdf_add_dict(resource, pdf_new_name("BaseEncoding"),
136 pdf_link_obj(baseenc->resource));
137 pdf_add_dict(resource, pdf_new_name("Differences"), differences);
138 return resource;
139 } else {
140 /* Fix a bug with the MinionPro package using MnSymbol fonts
141 * in its virtual fonts:
142 *
143 * Some font may have font_id even if no character is used.
144 * For example, suppose that a virtual file A.vf uses two
145 * other fonts, B and C. Even if only characters of B are used
146 * in a DVI document, C will have font_id too.
147 * In this case, both baseenc and differences can be NULL.
148 *
149 * Actually these fonts will be ignored in pdffont.c.
150 */
151 return baseenc ? pdf_link_obj(baseenc->resource) : NULL;
152 }
153 }
154
155 static void
pdf_flush_encoding(pdf_encoding * encoding)156 pdf_flush_encoding (pdf_encoding *encoding)
157 {
158 ASSERT(encoding);
159
160 if (encoding->resource) {
161 pdf_release_obj(encoding->resource);
162 encoding->resource = NULL;
163 }
164 if (encoding->tounicode) {
165 pdf_release_obj(encoding->tounicode);
166 encoding->tounicode = NULL;
167 }
168
169 return;
170 }
171
172 static void
pdf_clean_encoding_struct(pdf_encoding * encoding)173 pdf_clean_encoding_struct (pdf_encoding *encoding)
174 {
175 int code;
176
177 ASSERT(encoding);
178
179 if (encoding->resource)
180 ERROR("Object not flushed.");
181
182 if (encoding->tounicode)
183 pdf_release_obj(encoding->tounicode);
184 if (encoding->ident)
185 RELEASE(encoding->ident);
186 if (encoding->enc_name)
187 RELEASE(encoding->enc_name);
188
189 encoding->ident = NULL;
190 encoding->enc_name = NULL;
191
192 for (code = 0; code < 256; code++) {
193 if (encoding->glyphs[code])
194 RELEASE(encoding->glyphs[code]);
195 encoding->glyphs[code] = NULL;
196 }
197 encoding->ident = NULL;
198 encoding->enc_name = NULL;
199
200 return;
201 }
202
203 #if 0
204 static int CDECL
205 glycmp (const void *pv1, const void *pv2)
206 {
207 char *v1, *v2;
208
209 v1 = (char *) pv1;
210 v2 = *((char **) pv2);
211
212 return strcmp(v1, v2);
213 }
214 #endif
215
216 static int
is_similar_charset(char ** enc_vec,const char ** enc_vec2)217 is_similar_charset (char **enc_vec, const char **enc_vec2)
218 {
219 int code, same = 0;
220
221 for (code = 0; code < 256; code++)
222 if (!(enc_vec[code] && strcmp(enc_vec[code], enc_vec2[code]))
223 && ++same >= 64)
224 /* is 64 a good level? */
225 return 1;
226
227 return 0;
228 }
229
230 /* Creates a PDF Differences array for the encoding, based on the
231 * base encoding baseenc (if not NULL). Only character codes which
232 * are actually used in the document are considered.
233 */
234 static pdf_obj *
make_encoding_differences(char ** enc_vec,char ** baseenc,const char * is_used)235 make_encoding_differences (char **enc_vec, char **baseenc, const char *is_used)
236 {
237 pdf_obj *differences = NULL;
238 int code, count = 0;
239 int skipping = 1;
240
241 ASSERT(enc_vec);
242
243 /*
244 * Write all entries (except .notdef) if baseenc is unknown.
245 * If is_used is given, write only used entries.
246 */
247 differences = pdf_new_array();
248 for (code = 0; code < 256; code++) {
249 /* We skip NULL (= ".notdef"). Any character code mapped to ".notdef"
250 * glyph should not be used in the document.
251 */
252 if ((is_used && !is_used[code]) || !enc_vec[code])
253 skipping = 1;
254 else if (!baseenc || !baseenc[code] ||
255 strcmp(baseenc[code], enc_vec[code]) != 0) {
256 /*
257 * Difference found.
258 */
259 if (skipping)
260 pdf_add_array(differences, pdf_new_number(code));
261 pdf_add_array(differences, pdf_new_name(enc_vec[code]));
262 skipping = 0;
263 count++;
264 } else
265 skipping = 1;
266 }
267
268 /*
269 * No difference found. Some PDF viewers can't handle differences without
270 * any differences. We return NULL.
271 */
272 if (count == 0) {
273 pdf_release_obj(differences);
274 differences = NULL;
275 }
276
277 return differences;
278 }
279
280 static int
load_encoding_file(const char * filename)281 load_encoding_file (const char *filename)
282 {
283 FILE *fp;
284 pdf_obj *enc_name = NULL;
285 pdf_obj *encoding_array = NULL;
286 char *wbuf;
287 const char *p, *endptr;
288 const char *enc_vec[256];
289 int code, fsize, enc_id;
290
291 if (!filename)
292 return -1;
293
294 if (verbose) {
295 MESG("(Encoding:%s", filename);
296 }
297
298 fp = DPXFOPEN(filename, DPX_RES_TYPE_ENC);
299 if (!fp)
300 return -1;
301 /*
302 * file_size do seek_end witout saving current position and
303 * do rewind.
304 */
305 fsize = file_size(fp);
306
307 wbuf = NEW(fsize + 1, char);
308 wbuf[fsize] = '\0';
309 fread(wbuf, sizeof(char), fsize, fp);
310 DPXFCLOSE(fp);
311
312 p = wbuf;
313 endptr = wbuf + fsize;
314
315 skip_white(&p, endptr);
316
317 /*
318 * Skip comment lines.
319 */
320 while (p < endptr && p[0] == '%') {
321 skip_line (&p, endptr);
322 skip_white(&p, endptr);
323 }
324 if (p[0] == '/')
325 enc_name = parse_pdf_name(&p, endptr);
326
327 skip_white(&p, endptr);
328 encoding_array = parse_pdf_array(&p, endptr, NULL);
329 RELEASE(wbuf);
330 if (!encoding_array) {
331 if (enc_name)
332 pdf_release_obj(enc_name);
333 return -1;
334 }
335
336 for (code = 0; code < 256; code++) {
337 enc_vec[code] = pdf_name_value(pdf_get_array(encoding_array, code));
338 }
339 enc_id = pdf_encoding_new_encoding(enc_name ? pdf_name_value(enc_name) : NULL,
340 filename, enc_vec, NULL, 0);
341
342 if (enc_name) {
343 if (verbose > 1)
344 MESG("[%s]", pdf_name_value(enc_name));
345 pdf_release_obj(enc_name);
346 }
347 pdf_release_obj(encoding_array);
348
349 if (verbose) MESG(")");
350
351 return enc_id;
352 }
353
354 #define CHECK_ID(n) do { \
355 if ((n) < 0 || (n) >= enc_cache.count) { \
356 ERROR("Invalid encoding id: %d", (n)); \
357 } \
358 } while (0)
359
360 #define CACHE_ALLOC_SIZE 16u
361
362 struct {
363 int count;
364 int capacity;
365 pdf_encoding *encodings;
366 } enc_cache = {
367 0, 0, NULL
368 };
369
370 void
pdf_init_encodings(void)371 pdf_init_encodings (void)
372 {
373 enc_cache.count = 0;
374 enc_cache.capacity = 3;
375 enc_cache.encodings = NEW(enc_cache.capacity, pdf_encoding);
376
377 /*
378 * PDF Predefined Encodings
379 */
380 pdf_encoding_new_encoding("WinAnsiEncoding", "WinAnsiEncoding",
381 WinAnsiEncoding, NULL, FLAG_IS_PREDEFINED);
382 pdf_encoding_new_encoding("MacRomanEncoding", "MacRomanEncoding",
383 MacRomanEncoding, NULL, FLAG_IS_PREDEFINED);
384 pdf_encoding_new_encoding("MacExpertEncoding", "MacExpertEncoding",
385 MacExpertEncoding, NULL, FLAG_IS_PREDEFINED);
386
387 return;
388 }
389
390 /*
391 * The original dvipdfm describes as:
392 *
393 * Some software doesn't like BaseEncoding key (e.g., FastLane)
394 * so this code is commented out for the moment. It may reemerge in the
395 * future
396 *
397 * and the line for BaseEncoding is commented out.
398 *
399 * I'm not sure why this happens. But maybe BaseEncoding key causes problems
400 * when the font is Symbol font or TrueType font.
401 */
402
403 static int
pdf_encoding_new_encoding(const char * enc_name,const char * ident,const char ** encoding_vec,const char * baseenc_name,int flags)404 pdf_encoding_new_encoding (const char *enc_name, const char *ident,
405 const char **encoding_vec,
406 const char *baseenc_name, int flags)
407 {
408 int enc_id, code;
409
410 pdf_encoding *encoding;
411
412 enc_id = enc_cache.count;
413 if (enc_cache.count++ >= enc_cache.capacity) {
414 enc_cache.capacity += 16;
415 enc_cache.encodings = RENEW(enc_cache.encodings,
416 enc_cache.capacity, pdf_encoding);
417 }
418 encoding = &enc_cache.encodings[enc_id];
419
420 pdf_init_encoding_struct(encoding);
421
422 encoding->ident = NEW(strlen(ident)+1, char);
423 strcpy(encoding->ident, ident);
424 encoding->enc_name = NEW(strlen(enc_name)+1, char);
425 strcpy(encoding->enc_name, enc_name);
426
427 encoding->flags = flags;
428
429 for (code = 0; code < 256; code++)
430 if (encoding_vec[code] && strcmp(encoding_vec[code], ".notdef")) {
431 encoding->glyphs[code] = NEW(strlen(encoding_vec[code])+1, char);
432 strcpy(encoding->glyphs[code], encoding_vec[code]);
433 }
434
435 if (!baseenc_name && !(flags & FLAG_IS_PREDEFINED)
436 && is_similar_charset(encoding->glyphs, WinAnsiEncoding)) {
437 /* Dvipdfmx default setting. */
438 baseenc_name = "WinAnsiEncoding";
439 }
440
441 /* TODO: make base encoding configurable */
442 if (baseenc_name) {
443 int baseenc_id = pdf_encoding_findresource(baseenc_name);
444 if (baseenc_id < 0 || !pdf_encoding_is_predefined(baseenc_id))
445 ERROR("Illegal base encoding %s for encoding %s\n",
446 baseenc_name, encoding->enc_name);
447 encoding->baseenc = &enc_cache.encodings[baseenc_id];
448 }
449
450 if (flags & FLAG_IS_PREDEFINED)
451 encoding->resource = pdf_new_name(encoding->enc_name);
452
453 return enc_id;
454 }
455
456 /* Creates Encoding resource and ToUnicode CMap
457 * for all non-predefined encodings.
458 */
pdf_encoding_complete(void)459 void pdf_encoding_complete (void)
460 {
461 int enc_id;
462
463 for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
464 if (!pdf_encoding_is_predefined(enc_id)) {
465 pdf_encoding *encoding = &enc_cache.encodings[enc_id];
466 /* Section 5.5.4 of the PDF 1.5 reference says that the encoding
467 * of a Type 3 font must be completely described by a Differences
468 * array, but implementation note 56 explains that this is rather
469 * an incorrect implementation in Acrobat 4 and earlier. Hence,
470 * we do use a base encodings for PDF versions >= 1.3.
471 */
472 int with_base = !(encoding->flags & FLAG_USED_BY_TYPE3)
473 || pdf_get_version() >= 4;
474 ASSERT(!encoding->resource);
475 encoding->resource = create_encoding_resource(encoding,
476 with_base ? encoding->baseenc : NULL);
477 ASSERT(!encoding->tounicode);
478 encoding->tounicode = pdf_create_ToUnicode_CMap(encoding->enc_name,
479 encoding->glyphs,
480 encoding->is_used);
481 }
482 }
483 }
484
485 void
pdf_close_encodings(void)486 pdf_close_encodings (void)
487 {
488 int enc_id;
489
490 if (enc_cache.encodings) {
491 for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
492 pdf_encoding *encoding;
493
494 encoding = &enc_cache.encodings[enc_id];
495 if (encoding) {
496 pdf_flush_encoding(encoding);
497 pdf_clean_encoding_struct(encoding);
498 }
499 }
500 RELEASE(enc_cache.encodings);
501 }
502 enc_cache.encodings = NULL;
503 enc_cache.count = 0;
504 enc_cache.capacity = 0;
505 }
506
507 int
pdf_encoding_findresource(const char * enc_name)508 pdf_encoding_findresource (const char *enc_name)
509 {
510 int enc_id;
511 pdf_encoding *encoding;
512
513 ASSERT(enc_name);
514 for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
515 encoding = &enc_cache.encodings[enc_id];
516 if (encoding->ident &&
517 !strcmp(enc_name, encoding->ident))
518 return enc_id;
519 else if (encoding->enc_name &&
520 !strcmp(enc_name, encoding->enc_name))
521 return enc_id;
522 }
523
524 return load_encoding_file(enc_name);
525 }
526
527
528 /*
529 * Pointer will change if other encoding is loaded...
530 */
531
532 char **
pdf_encoding_get_encoding(int enc_id)533 pdf_encoding_get_encoding (int enc_id)
534 {
535 pdf_encoding *encoding;
536
537 CHECK_ID(enc_id);
538
539 encoding = &enc_cache.encodings[enc_id];
540
541 return encoding->glyphs;
542 }
543
544 pdf_obj *
pdf_get_encoding_obj(int enc_id)545 pdf_get_encoding_obj (int enc_id)
546 {
547 pdf_encoding *encoding;
548
549 CHECK_ID(enc_id);
550
551 encoding = &enc_cache.encodings[enc_id];
552
553 return encoding->resource;
554 }
555
556 int
pdf_encoding_is_predefined(int enc_id)557 pdf_encoding_is_predefined (int enc_id)
558 {
559 pdf_encoding *encoding;
560
561 CHECK_ID(enc_id);
562
563 encoding = &enc_cache.encodings[enc_id];
564
565 return (encoding->flags & FLAG_IS_PREDEFINED) ? 1 : 0;
566 }
567
568 void
pdf_encoding_used_by_type3(int enc_id)569 pdf_encoding_used_by_type3 (int enc_id)
570 {
571 pdf_encoding *encoding;
572
573 CHECK_ID(enc_id);
574
575 encoding = &enc_cache.encodings[enc_id];
576
577 encoding->flags |= FLAG_USED_BY_TYPE3;
578 }
579
580
581 char *
pdf_encoding_get_name(int enc_id)582 pdf_encoding_get_name (int enc_id)
583 {
584 pdf_encoding *encoding;
585
586 CHECK_ID(enc_id);
587
588 encoding = &enc_cache.encodings[enc_id];
589
590 return encoding->enc_name;
591 }
592
593 /* CSI_UNICODE */
594 #include "cid.h"
595
596 #include "cmap.h"
597 #include "cmap_read.h"
598 #include "cmap_write.h"
599
600 #include "agl.h"
601
602 #define WBUF_SIZE 1024
603 static unsigned char wbuf[WBUF_SIZE];
604 static unsigned char range_min[1] = {0x00u};
605 static unsigned char range_max[1] = {0xFFu};
606
607 void
pdf_encoding_add_usedchars(int encoding_id,const char * is_used)608 pdf_encoding_add_usedchars (int encoding_id, const char *is_used)
609 {
610 pdf_encoding *encoding;
611 int code;
612
613 CHECK_ID(encoding_id);
614
615 if (!is_used || pdf_encoding_is_predefined(encoding_id))
616 return;
617
618 encoding = &enc_cache.encodings[encoding_id];
619
620 for (code = 0; code <= 0xff; code++)
621 encoding->is_used[code] |= is_used[code];
622 }
623
624 pdf_obj *
pdf_encoding_get_tounicode(int encoding_id)625 pdf_encoding_get_tounicode (int encoding_id)
626 {
627 CHECK_ID(encoding_id);
628
629 return enc_cache.encodings[encoding_id].tounicode;
630 }
631
632
633 /* Creates a ToUnicode CMap. An empty CMap is replaced by NULL.
634 *
635 * For PDF <= 1.4 a complete CMap is created unless all character codes
636 * are predefined in PDF. For PDF >= 1.5 only those character codes which
637 * are not predefined appear in the CMap.
638 *
639 * Note: The PDF 1.4 reference is not consistent: Section 5.9 describes
640 * the Unicode mapping of PDF 1.3 and Section 9.7.2 (in the context of
641 * Tagged PDF) the one of PDF 1.5.
642 */
643 pdf_obj *
pdf_create_ToUnicode_CMap(const char * enc_name,char ** enc_vec,const char * is_used)644 pdf_create_ToUnicode_CMap (const char *enc_name,
645 char **enc_vec, const char *is_used)
646 {
647 pdf_obj *stream;
648 CMap *cmap;
649 int code, all_predef;
650 char *cmap_name;
651 unsigned char *p, *endptr;
652
653 ASSERT(enc_name && enc_vec);
654
655 cmap_name = NEW(strlen(enc_name)+strlen("-UTF16")+1, char);
656 sprintf(cmap_name, "%s-UTF16", enc_name);
657
658 cmap = CMap_new();
659 CMap_set_name (cmap, cmap_name);
660 CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE);
661 CMap_set_wmode(cmap, 0);
662
663 CMap_set_CIDSysInfo(cmap, &CSI_UNICODE);
664
665 CMap_add_codespacerange(cmap, range_min, range_max, 1);
666
667 all_predef = 1;
668 for (code = 0; code <= 0xff; code++) {
669 if (is_used && !is_used[code])
670 continue;
671
672 if (enc_vec[code]) {
673 long len;
674 int fail_count = 0;
675 agl_name *agln = agl_lookup_list(enc_vec[code]);
676 /* Adobe glyph naming conventions are not used by viewers,
677 * hence even ligatures (e.g, "f_i") must be explicitly defined
678 */
679 if (pdf_get_version() < 5 || !agln || !agln->is_predef) {
680 wbuf[0] = (code & 0xff);
681 p = wbuf + 1;
682 endptr = wbuf + WBUF_SIZE;
683 len = agl_sput_UTF16BE(enc_vec[code], &p, endptr, &fail_count);
684 if (len >= 1 && !fail_count) {
685 CMap_add_bfchar(cmap, wbuf, 1, wbuf + 1, len);
686 all_predef &= agln && agln->is_predef;
687 }
688 }
689 }
690 }
691
692 stream = all_predef ? NULL : CMap_create_stream(cmap);
693
694 CMap_release(cmap);
695 RELEASE(cmap_name);
696
697 return stream;
698 }
699
700
701 pdf_obj *
pdf_load_ToUnicode_stream(const char * ident)702 pdf_load_ToUnicode_stream (const char *ident)
703 {
704 pdf_obj *stream = NULL;
705 CMap *cmap;
706 FILE *fp;
707
708 if (!ident)
709 return NULL;
710
711 fp = DPXFOPEN(ident, DPX_RES_TYPE_CMAP);
712 if (!fp)
713 return NULL;
714 else if (CMap_parse_check_sig(fp) < 0) {
715 DPXFCLOSE(fp);
716 return NULL;
717 }
718
719 cmap = CMap_new();
720 if (CMap_parse(cmap, fp) < 0) {
721 WARN("Reading CMap file \"%s\" failed.", ident);
722 } else {
723 if (verbose) {
724 MESG("(CMap:%s)", ident);
725 }
726 stream = CMap_create_stream(cmap);
727 if (!stream) {
728 WARN("Failed to creat ToUnicode CMap stream for \"%s\".", ident);
729 }
730 }
731 CMap_release(cmap);
732 DPXFCLOSE(fp);
733
734 return stream;
735 }
736
737
738 static const char *
739 MacRomanEncoding[256] = {
740 ".notdef", ".notdef", ".notdef", ".notdef",
741 ".notdef", ".notdef", ".notdef", ".notdef",
742 ".notdef", ".notdef", ".notdef", ".notdef",
743 ".notdef", ".notdef", ".notdef", ".notdef",
744 ".notdef", ".notdef", ".notdef", ".notdef",
745 ".notdef", ".notdef", ".notdef", ".notdef",
746 ".notdef", ".notdef", ".notdef", ".notdef",
747 ".notdef", ".notdef", ".notdef", ".notdef",
748 "space", "exclam", "quotedbl", "numbersign",
749 "dollar", "percent", "ampersand", "quotesingle",
750 "parenleft", "parenright", "asterisk", "plus",
751 "comma", "hyphen", "period", "slash",
752 "zero", "one", "two", "three",
753 "four", "five", "six", "seven",
754 "eight", "nine", "colon", "semicolon",
755 "less", "equal", "greater", "question",
756 "at", "A", "B", "C",
757 "D", "E", "F", "G", "H",
758 "I", "J", "K", "L",
759 "M", "N", "O", "P",
760 "Q", "R", "S", "T",
761 "U", "V", "W", "X",
762 "Y", "Z", "bracketleft", "backslash",
763 "bracketright", "asciicircum", "underscore",
764 "grave", "a", "b", "c",
765 "d", "e", "f", "g",
766 "h", "i", "j", "k",
767 "l", "m", "n", "o",
768 "p", "q", "r", "s",
769 "t", "u", "v", "w",
770 "x", "y", "z", "braceleft",
771 "bar", "braceright", "asciitilde", ".notdef",
772 "Adieresis", "Aring", "Ccedilla", "Eacute",
773 "Ntilde", "Odieresis", "Udieresis", "aacute",
774 "agrave", "acircumflex", "adieresis", "atilde",
775 "aring", "ccedilla", "eacute", "egrave",
776 "ecircumflex", "edieresis", "iacute", "igrave",
777 "icircumflex", "idieresis", "ntilde", "oacute",
778 "ograve", "ocircumflex", "odieresis", "otilde",
779 "uacute", "ugrave", "ucircumflex", "udieresis",
780 "dagger", "degree", "cent", "sterling",
781 "section", "bullet", "paragraph", "germandbls",
782 "registered", "copyright", "trademark", "acute",
783 "dieresis", "notequal", "AE", "Oslash",
784 "infinity", "plusminus", "lessequal", "greaterequal",
785 "yen", "mu", "partialdiff", "summation",
786 "product", "pi", "integral", "ordfeminine",
787 "ordmasculine", "Omega", "ae", "oslash",
788 "questiondown", "exclamdown", "logicalnot", "radical",
789 "florin", "approxequal", "Delta", "guillemotleft",
790 "guillemotright", "ellipsis", "space", "Agrave",
791 "Atilde", "Otilde", "OE", "oe",
792 "endash", "emdash", "quotedblleft", "quotedblright",
793 "quoteleft", "quoteright", "divide", "lozenge",
794 "ydieresis", "Ydieresis", "fraction", "currency",
795 "guilsinglleft", "guilsinglright", "fi", "fl",
796 "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
797 "perthousand", "Acircumflex", "Ecircumflex", "Aacute",
798 "Edieresis", "Egrave", "Iacute", "Icircumflex",
799 "Idieresis", "Igrave", "Oacute", "Ocircumflex",
800 "apple", "Ograve", "Uacute", "Ucircumflex",
801 "Ugrave", "dotlessi", "circumflex", "tilde",
802 "macron", "breve", "dotaccent", "ring",
803 "cedilla", "hungarumlaut", "ogonek", "caron"
804 };
805
806 static const char *
807 MacExpertEncoding[256] = {
808 ".notdef", ".notdef", ".notdef", ".notdef",
809 ".notdef", ".notdef", ".notdef", ".notdef",
810 ".notdef", ".notdef", ".notdef", ".notdef",
811 ".notdef", ".notdef", ".notdef", ".notdef",
812 ".notdef", ".notdef", ".notdef", ".notdef",
813 ".notdef", ".notdef", ".notdef", ".notdef",
814 ".notdef", ".notdef", ".notdef", ".notdef",
815 ".notdef", ".notdef", ".notdef", ".notdef",
816 "space", "exclamsmall", "Hungarumlautsmall", "centoldstyle",
817 "dollaroldstyle", "dollarsuperior", "ampersandsmall", "Acutesmall",
818 "parenleftsuperior", "parenrightsuperior", "twodotenleader", "onedotenleader",
819 "comma", "hyphen", "period", "fraction",
820 "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
821 "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
822 "eightoldstyle", "nineoldstyle", "colon", "semicolon",
823 ".notdef", "threequartersemdash", ".notdef", "questionsmall",
824 ".notdef", ".notdef", ".notdef", ".notdef",
825 "Ethsmall", ".notdef", ".notdef", "onequarter",
826 "onehalf", "threequarters", "oneeighth", "threeeighths",
827 "fiveeighths", "seveneighths", "onethird", "twothirds",
828 ".notdef", ".notdef", ".notdef", ".notdef",
829 ".notdef", ".notdef", "ff", "fi",
830 "fl", "ffi", "ffl", "parenleftinferior",
831 ".notdef", "parenrightinferior", "Circumflexsmall", "hypheninferior",
832 "Gravesmall", "Asmall", "Bsmall", "Csmall",
833 "Dsmall", "Esmall", "Fsmall", "Gsmall",
834 "Hsmall", "Ismall", "Jsmall", "Ksmall",
835 "Lsmall", "Msmall", "Nsmall", "Osmall",
836 "Psmall", "Qsmall", "Rsmall", "Ssmall",
837 "Tsmall", "Usmall", "Vsmall", "Wsmall",
838 "Xsmall", "Ysmall", "Zsmall", "colonmonetary",
839 "onefitted", "rupiah", "Tildesmall", ".notdef",
840 ".notdef", "asuperior", "centsuperior", ".notdef",
841 ".notdef", ".notdef", ".notdef", "Aacutesmall",
842 "Agravesmall", "Acircumflexsmall", "Adieresissmall", "Atildesmall",
843 "Aringsmall", "Ccedillasmall", "Eacutesmall", "Egravesmall",
844 "Ecircumflexsmall", "Edieresissmall", "Iacutesmall", "Igravesmall",
845 "Icircumflexsmall", "Idieresissmall", "Ntildesmall", "Oacutesmall",
846 "Ogravesmall", "Ocircumflexsmall", "Odieresissmall", "Otildesmall",
847 "Uacutesmall", "Ugravesmall", "Ucircumflexsmall", "Udieresissmall",
848 ".notdef", "eightsuperior", "fourinferior", "threeinferior",
849 "sixinferior", "eightinferior", "seveninferior", "Scaronsmall",
850 ".notdef", "centinferior", "twoinferior", ".notdef",
851 "Dieresissmall", ".notdef", "Caronsmall", "osuperior",
852 "fiveinferior", ".notdef", "commainferior", "periodinferior",
853 "Yacutesmall", ".notdef", "dollarinferior", ".notdef",
854 ".notdef", "Thornsmall", ".notdef", "nineinferior",
855 "zeroinferior", "Zcaronsmall", "AEsmall", "Oslashsmall",
856 "questiondownsmall", "oneinferior", "Lslashsmall", ".notdef",
857 ".notdef", ".notdef", ".notdef", ".notdef",
858 ".notdef", "Cedillasmall", ".notdef", ".notdef",
859 ".notdef", ".notdef", ".notdef", "OEsmall",
860 "figuredash", "hyphensuperior", ".notdef", ".notdef",
861 ".notdef", ".notdef", "exclamdownsmall", ".notdef",
862 "Ydieresissmall", ".notdef", "onesuperior", "twosuperior",
863 "threesuperior", "foursuperior", "fivesuperior", "sixsuperior",
864 "sevensuperior", "ninesuperior", "zerosuperior", ".notdef",
865 "esuperior", "rsuperior", "tsuperior", ".notdef",
866 ".notdef", "isuperior", "ssuperior", "dsuperior",
867 ".notdef", ".notdef", ".notdef", ".notdef",
868 ".notdef", "lsuperior", "Ogoneksmall", "Brevesmall",
869 "Macronsmall", "bsuperior", "nsuperior", "msuperior",
870 "commasuperior", "periodsuperior", "Dotaccentsmall", "Ringsmall",
871 ".notdef", ".notdef", ".notdef", ".notdef"
872 };
873
874 static const char *
875 WinAnsiEncoding[256] = {
876 ".notdef", ".notdef", ".notdef", ".notdef",
877 ".notdef", ".notdef", ".notdef", ".notdef",
878 ".notdef", ".notdef", ".notdef", ".notdef",
879 ".notdef", ".notdef", ".notdef", ".notdef",
880 ".notdef", ".notdef", ".notdef", ".notdef",
881 ".notdef", ".notdef", ".notdef", ".notdef",
882 ".notdef", ".notdef", ".notdef", ".notdef",
883 ".notdef", ".notdef", ".notdef", ".notdef",
884 "space", "exclam", "quotedbl", "numbersign",
885 "dollar", "percent", "ampersand", "quotesingle",
886 "parenleft", "parenright", "asterisk", "plus",
887 "comma", "hyphen", "period", "slash",
888 "zero", "one", "two", "three",
889 "four", "five", "six", "seven",
890 "eight", "nine", "colon", "semicolon",
891 "less", "equal", "greater", "question",
892 "at", "A", "B", "C",
893 "D", "E", "F", "G",
894 "H", "I", "J", "K",
895 "L", "M", "N", "O",
896 "P", "Q", "R", "S",
897 "T", "U", "V", "W",
898 "X", "Y", "Z", "bracketleft",
899 "backslash", "bracketright", "asciicircum", "underscore",
900 "grave", "a", "b", "c",
901 "d", "e", "f", "g",
902 "h", "i", "j", "k",
903 "l", "m", "n", "o",
904 "p", "q", "r", "s",
905 "t", "u", "v", "w",
906 "x", "y", "z", "braceleft",
907 "bar", "braceright", "asciitilde", "bullet",
908 "Euro", "bullet", "quotesinglbase", "florin",
909 "quotedblbase", "ellipsis", "dagger", "daggerdbl",
910 "circumflex", "perthousand", "Scaron", "guilsinglleft",
911 "OE", "bullet", "Zcaron", "bullet",
912 "bullet", "quoteleft", "quoteright", "quotedblleft",
913 "quotedblright", "bullet", "endash", "emdash",
914 "tilde", "trademark", "scaron", "guilsinglright",
915 "oe", "bullet", "zcaron", "Ydieresis",
916 "space", "exclamdown", "cent", "sterling",
917 "currency", "yen", "brokenbar", "section",
918 "dieresis", "copyright", "ordfeminine", "guillemotleft",
919 "logicalnot", "hyphen", "registered", "macron",
920 "degree", "plusminus", "twosuperior", "threesuperior",
921 "acute", "mu", "paragraph", "periodcentered",
922 "cedilla", "onesuperior", "ordmasculine", "guillemotright",
923 "onequarter", "onehalf", "threequarters", "questiondown",
924 "Agrave", "Aacute", "Acircumflex", "Atilde",
925 "Adieresis", "Aring", "AE", "Ccedilla",
926 "Egrave", "Eacute", "Ecircumflex", "Edieresis",
927 "Igrave", "Iacute", "Icircumflex", "Idieresis",
928 "Eth", "Ntilde", "Ograve", "Oacute",
929 "Ocircumflex", "Otilde", "Odieresis", "multiply",
930 "Oslash", "Ugrave", "Uacute", "Ucircumflex",
931 "Udieresis", "Yacute", "Thorn", "germandbls",
932 "agrave", "aacute", "acircumflex", "atilde",
933 "adieresis", "aring", "ae", "ccedilla",
934 "egrave", "eacute", "ecircumflex", "edieresis",
935 "igrave", "iacute", "icircumflex", "idieresis",
936 "eth", "ntilde", "ograve", "oacute",
937 "ocircumflex", "otilde", "odieresis", "divide",
938 "oslash", "ugrave", "uacute", "ucircumflex",
939 "udieresis", "yacute", "thorn", "ydieresis"
940 };
941
942 #if 0
943 static const char *
944 StandardEncoding[256] = {
945 ".notdef", ".notdef", ".notdef", ".notdef",
946 ".notdef", ".notdef", ".notdef", ".notdef",
947 ".notdef", ".notdef", ".notdef", ".notdef",
948 ".notdef", ".notdef", ".notdef", ".notdef",
949 ".notdef", ".notdef", ".notdef", ".notdef",
950 ".notdef", ".notdef", ".notdef", ".notdef",
951 ".notdef", ".notdef", ".notdef", ".notdef",
952 ".notdef", ".notdef", ".notdef", ".notdef",
953 "space", "exclam", "quotedbl", "numbersign",
954 "dollar", "percent", "ampersand", "quoteright",
955 "parenleft", "parenright", "asterisk", "plus",
956 "comma", "hyphen", "period", "slash",
957 "zero", "one", "two", "three",
958 "four", "five", "six", "seven",
959 "eight", "nine", "colon", "semicolon",
960 "less", "equal", "greater", "question",
961 "at", "A", "B", "C",
962 "D", "E", "F", "G",
963 "H", "I", "J", "K",
964 "L", "M", "N", "O",
965 "P", "Q", "R", "S",
966 "T", "U", "V", "W",
967 "X", "Y", "Z", "bracketleft",
968 "backslash", "bracketright", "asciicircum", "underscore",
969 "quoteleft", "a", "b", "c",
970 "d", "e", "f", "g",
971 "h", "i", "j", "k",
972 "l", "m", "n", "o",
973 "p", "q", "r", "s",
974 "t", "u", "v", "w",
975 "x", "y", "z", "braceleft",
976 "bar", "braceright", "asciitilde", ".notdef",
977 ".notdef", ".notdef", ".notdef", ".notdef",
978 ".notdef", ".notdef", ".notdef", ".notdef",
979 ".notdef", ".notdef", ".notdef", ".notdef",
980 ".notdef", ".notdef", ".notdef", ".notdef",
981 ".notdef", ".notdef", ".notdef", ".notdef",
982 ".notdef", ".notdef", ".notdef", ".notdef",
983 ".notdef", ".notdef", ".notdef", ".notdef",
984 ".notdef", ".notdef", ".notdef", ".notdef",
985 ".notdef", "exclamdown", "cent", "sterling",
986 "fraction", "yen", "florin", "section",
987 "currency", "quotesingle", "quotedblleft", "guillemotleft",
988 "guilsinglleft", "guilsinglright", "fi", "fl",
989 ".notdef", "endash", "dagger", "daggerdbl",
990 "periodcentered", ".notdef", "paragraph", "bullet",
991 "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright",
992 "ellipsis", "perthousand", ".notdef", "questiondown",
993 ".notdef", "grave", "acute", "circumflex",
994 "tilde", "macron", "breve", "dotaccent",
995 "dieresis", ".notdef", "ring", "cedilla",
996 ".notdef", "hungarumlaut", "ogonek", "caron",
997 "emdash", ".notdef", ".notdef", ".notdef",
998 ".notdef", ".notdef", ".notdef", ".notdef",
999 ".notdef", ".notdef", ".notdef", ".notdef",
1000 ".notdef", ".notdef", ".notdef", ".notdef",
1001 ".notdef", "AE", ".notdef", "ordfeminine",
1002 ".notdef", ".notdef", ".notdef", ".notdef",
1003 "Lslash", "Oslash", "OE", "ordmasculine",
1004 ".notdef", ".notdef", ".notdef", ".notdef",
1005 ".notdef", "ae", ".notdef", ".notdef",
1006 ".notdef", "dotlessi", ".notdef", ".notdef",
1007 "lslash", "oslash", "oe", "germandbls",
1008 ".notdef", ".notdef", ".notdef", ".notdef"
1009 };
1010
1011 static const char *
1012 ISOLatin1Encoding[256] = {
1013 ".notdef", ".notdef", ".notdef", ".notdef",
1014 ".notdef", ".notdef", ".notdef", ".notdef",
1015 ".notdef", ".notdef", ".notdef", ".notdef",
1016 ".notdef", ".notdef", ".notdef", ".notdef",
1017 ".notdef", ".notdef", ".notdef", ".notdef",
1018 ".notdef", ".notdef", ".notdef", ".notdef",
1019 ".notdef", ".notdef", ".notdef", ".notdef",
1020 ".notdef", ".notdef", ".notdef", ".notdef",
1021 "space", "exclam", "quotedbl", "numbersign",
1022 "dollar", "percent", "ampersand", "quotesingle",
1023 "parenleft", "parenright", "asterisk", "plus",
1024 "comma", "hyphen", "period", "slash",
1025 "zero", "one", "two", "three",
1026 "four", "five", "six", "seven",
1027 "eight", "nine", "colon", "semicolon",
1028 "less", "equal", "greater", "question",
1029 "at", "A", "B", "C",
1030 "D", "E", "F", "G",
1031 "H", "I", "J", "K",
1032 "L", "M", "N", "O",
1033 "P", "Q", "R", "S",
1034 "T", "U", "V", "W",
1035 "X", "Y", "Z", "bracketleft",
1036 "backslash", "bracketright", "asciicircum", "underscore",
1037 "grave", "a", "b", "c",
1038 "d", "e", "f", "g",
1039 "h", "i", "j", "k",
1040 "l", "m", "n", "o",
1041 "p", "q", "r", "s",
1042 "t", "u", "v", "w",
1043 "x", "y", "z", "braceleft",
1044 "bar", "braceright", "asciitilde", ".notdef",
1045 ".notdef", ".notdef", ".notdef", ".notdef",
1046 ".notdef", ".notdef", ".notdef", ".notdef",
1047 ".notdef", ".notdef", ".notdef", ".notdef",
1048 ".notdef", ".notdef", ".notdef", ".notdef",
1049 "dotlessi", "quoteleft", "quoteright", "circumflex",
1050 "tilde", "macron", "breve", "dotaccent",
1051 "dieresis", ".notdef", "ring", "cedilla",
1052 ".notdef", "hungarumlaut", "ogonek", "caron",
1053 "space", "exclamdown", "cent", "sterling",
1054 "currency", "yen", "brokenbar", "section",
1055 "dieresis", "copyright", "ordfeminine", "guillemotleft",
1056 "logicalnot", "hyphen", "registered", "macron",
1057 "degree", "plusminus", "twosuperior", "threesuperior",
1058 "acute", "mu", "paragraph", "periodcentered",
1059 "cedilla", "onesuperior", "ordmasculine", "guillemotright",
1060 "onequarter", "onehalf", "threequarters", "questiondown",
1061 "Agrave", "Aacute", "Acircumflex", "Atilde",
1062 "Adieresis", "Aring", "AE", "Ccedilla",
1063 "Egrave", "Eacute", "Ecircumflex", "Edieresis",
1064 "Igrave", "Iacute", "Icircumflex", "Idieresis",
1065 "Eth", "Ntilde", "Ograve", "Oacute",
1066 "Ocircumflex", "Otilde", "Odieresis", "multiply",
1067 "Oslash", "Ugrave", "Uacute", "Ucircumflex",
1068 "Udieresis", "Yacute", "Thorn", "germandbls",
1069 "agrave", "aacute", "acircumflex", "atilde",
1070 "adieresis", "aring", "ae", "ccedilla",
1071 "egrave", "eacute", "ecircumflex", "edieresis",
1072 "igrave", "iacute", "icircumflex", "idieresis",
1073 "eth", "ntilde", "ograve", "oacute",
1074 "ocircumflex", "otilde", "odieresis", "divide",
1075 "oslash", "ugrave", "uacute", "ucircumflex",
1076 "udieresis", "yacute", "thorn", "ydieresis"
1077 };
1078 #endif
1079