1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2 
3     Copyright (C) 2008-2014 by Jin-Hwan Cho, Matthias Franz, and Shunsaku Hirata,
4     the dvipdfmx project team.
5 
6     Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7 
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License
19     along with this program; if not, write to the Free Software
20     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22 
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26 
27 #include <string.h>
28 
29 #include "system.h"
30 #include "mem.h"
31 #include "error.h"
32 #include "dpxutil.h"
33 
34 #include "pdfparse.h"
35 #include "pdfobj.h"
36 
37 #include "dpxfile.h"
38 
39 #include "pdfencoding.h"
40 
41 static int      is_similar_charset (char **encoding, const char **encoding2);
42 static pdf_obj *make_encoding_differences (char **encoding, char **baseenc,
43 					   const char *is_used);
44 
45 static unsigned char verbose = 0;
46 
47 static const char *MacRomanEncoding[256];
48 static const char *MacExpertEncoding[256];
49 static const char *WinAnsiEncoding[256];
50 #if 0
51 static const char *StandardEncoding[256];
52 static const char *ISOLatin1Encoding[256];
53 #endif
54 
55 void
pdf_encoding_set_verbose(void)56 pdf_encoding_set_verbose (void)
57 {
58   verbose++;
59 }
60 
61 /*
62  * ident:  File name, e.g., 8a.enc.
63  * name:   Name of encoding, StandardEncoding, TeXBase1Encoding, ...
64  * glyphs: List of encoded glyphs (name).
65  * flags:
66  *   IS_PREDEFINED:
67  *     Encoding is one of the MacRomanEncoding, MacExpertEncoding, and
68  *     WinAnsiEncoding.
69  *   FLAG_USED_BY_TYPE3:
70  *     Encoding is used by a Type 3 font.
71  */
72 #define FLAG_IS_PREDEFINED  (1 << 0)
73 #define FLAG_USED_BY_TYPE3  (1 << 1)
74 
75 typedef struct pdf_encoding
76 {
77   char     *ident;
78 
79   char     *enc_name;
80   int       flags;
81   char     *glyphs[256];     /* ".notdef" must be represented as NULL */
82   char      is_used[256];
83 
84   struct pdf_encoding *baseenc;
85   pdf_obj  *tounicode;
86 
87   pdf_obj  *resource;
88 } pdf_encoding;
89 
90 static int      pdf_encoding_new_encoding (const char *enc_name,
91 					   const char *ident,
92 					   const char **encoding_vec,
93 					   const char *baseenc_name,
94 					   int flags);
95 
96 static void
pdf_init_encoding_struct(pdf_encoding * encoding)97 pdf_init_encoding_struct (pdf_encoding *encoding)
98 {
99   ASSERT(encoding);
100 
101   encoding->ident    = NULL;
102 
103   encoding->enc_name = NULL;
104 
105   memset(encoding->glyphs,  0, 256*sizeof(char *));
106   memset(encoding->is_used, 0, 256);
107 
108   encoding->tounicode = NULL;
109 
110   encoding->baseenc   = NULL;
111   encoding->resource  = NULL;
112 
113   encoding->flags     = 0;
114 
115   return;
116 }
117 
118 /* Creates the PDF Encoding entry for the encoding.
119  * If baseenc is non-null, it is used as BaseEncoding entry.
120  */
121 static pdf_obj *
create_encoding_resource(pdf_encoding * encoding,pdf_encoding * baseenc)122 create_encoding_resource (pdf_encoding *encoding, pdf_encoding *baseenc)
123 {
124   pdf_obj *differences;
125   ASSERT(encoding);
126   ASSERT(!encoding->resource);
127 
128   differences = make_encoding_differences(encoding->glyphs,
129 					  baseenc ? baseenc->glyphs : NULL,
130 					  encoding->is_used);
131 
132   if (differences) {
133     pdf_obj *resource = pdf_new_dict();
134     if (baseenc)
135       pdf_add_dict(resource, pdf_new_name("BaseEncoding"),
136 		   pdf_link_obj(baseenc->resource));
137     pdf_add_dict(resource, pdf_new_name("Differences"),  differences);
138     return resource;
139   } else {
140     /* Fix a bug with the MinionPro package using MnSymbol fonts
141      * in its virtual fonts:
142      *
143      * Some font may have font_id even if no character is used.
144      * For example, suppose that a virtual file A.vf uses two
145      * other fonts, B and C. Even if only characters of B are used
146      * in a DVI document, C will have font_id too.
147      * In this case, both baseenc and differences can be NULL.
148      *
149      * Actually these fonts will be ignored in pdffont.c.
150      */
151     return baseenc ? pdf_link_obj(baseenc->resource) : NULL;
152   }
153 }
154 
155 static void
pdf_flush_encoding(pdf_encoding * encoding)156 pdf_flush_encoding (pdf_encoding *encoding)
157 {
158   ASSERT(encoding);
159 
160   if (encoding->resource) {
161     pdf_release_obj(encoding->resource);
162     encoding->resource  = NULL;
163   }
164   if (encoding->tounicode) {
165     pdf_release_obj(encoding->tounicode);
166     encoding->tounicode = NULL;
167   }
168 
169   return;
170 }
171 
172 static void
pdf_clean_encoding_struct(pdf_encoding * encoding)173 pdf_clean_encoding_struct (pdf_encoding *encoding)
174 {
175   int   code;
176 
177   ASSERT(encoding);
178 
179   if (encoding->resource)
180     ERROR("Object not flushed.");
181 
182   if (encoding->tounicode)
183     pdf_release_obj(encoding->tounicode);
184   if (encoding->ident)
185     RELEASE(encoding->ident);
186   if (encoding->enc_name)
187     RELEASE(encoding->enc_name);
188 
189   encoding->ident    = NULL;
190   encoding->enc_name = NULL;
191 
192   for (code = 0; code < 256; code++) {
193     if (encoding->glyphs[code])
194       RELEASE(encoding->glyphs[code]);
195     encoding->glyphs[code] = NULL;
196   }
197   encoding->ident    = NULL;
198   encoding->enc_name = NULL;
199 
200   return;
201 }
202 
203 #if 0
204 static int CDECL
205 glycmp (const void *pv1, const void *pv2)
206 {
207   char *v1, *v2;
208 
209   v1 = (char *) pv1;
210   v2 = *((char **) pv2);
211 
212   return strcmp(v1, v2);
213 }
214 #endif
215 
216 static int
is_similar_charset(char ** enc_vec,const char ** enc_vec2)217 is_similar_charset (char **enc_vec, const char **enc_vec2)
218 {
219   int   code, same = 0;
220 
221   for (code = 0; code < 256; code++)
222     if (!(enc_vec[code] && strcmp(enc_vec[code], enc_vec2[code]))
223 	&& ++same >= 64)
224       /* is 64 a good level? */
225       return 1;
226 
227   return 0;
228 }
229 
230 /* Creates a PDF Differences array for the encoding, based on the
231  * base encoding baseenc (if not NULL). Only character codes which
232  * are actually used in the document are considered.
233  */
234 static pdf_obj *
make_encoding_differences(char ** enc_vec,char ** baseenc,const char * is_used)235 make_encoding_differences (char **enc_vec, char **baseenc, const char *is_used)
236 {
237   pdf_obj *differences = NULL;
238   int      code, count = 0;
239   int      skipping = 1;
240 
241   ASSERT(enc_vec);
242 
243   /*
244    *  Write all entries (except .notdef) if baseenc is unknown.
245    *  If is_used is given, write only used entries.
246    */
247   differences = pdf_new_array();
248   for (code = 0; code < 256; code++) {
249     /* We skip NULL (= ".notdef"). Any character code mapped to ".notdef"
250      * glyph should not be used in the document.
251      */
252     if ((is_used && !is_used[code]) || !enc_vec[code])
253       skipping = 1;
254     else if (!baseenc || !baseenc[code] ||
255              strcmp(baseenc[code], enc_vec[code]) != 0) {
256       /*
257        * Difference found.
258        */
259       if (skipping)
260         pdf_add_array(differences, pdf_new_number(code));
261       pdf_add_array(differences,   pdf_new_name(enc_vec[code]));
262       skipping = 0;
263       count++;
264     } else
265       skipping = 1;
266   }
267 
268   /*
269    * No difference found. Some PDF viewers can't handle differences without
270    * any differences. We return NULL.
271    */
272   if (count == 0) {
273     pdf_release_obj(differences);
274     differences = NULL;
275   }
276 
277   return differences;
278 }
279 
280 static int
load_encoding_file(const char * filename)281 load_encoding_file (const char *filename)
282 {
283   FILE    *fp;
284   pdf_obj *enc_name = NULL;
285   pdf_obj *encoding_array = NULL;
286   char    *wbuf;
287   const char *p, *endptr;
288   const char *enc_vec[256];
289   int      code, fsize, enc_id;
290 
291   if (!filename)
292     return -1;
293 
294   if (verbose) {
295     MESG("(Encoding:%s", filename);
296   }
297 
298   fp = DPXFOPEN(filename, DPX_RES_TYPE_ENC);
299   if (!fp)
300     return -1;
301   /*
302    * file_size do seek_end witout saving current position and
303    * do rewind.
304    */
305   fsize = file_size(fp);
306 
307   wbuf = NEW(fsize + 1, char);
308   wbuf[fsize] = '\0';
309   fread(wbuf, sizeof(char), fsize, fp);
310   DPXFCLOSE(fp);
311 
312   p        = wbuf;
313   endptr   = wbuf + fsize;
314 
315   skip_white(&p, endptr);
316 
317   /*
318    * Skip comment lines.
319    */
320   while (p < endptr && p[0] == '%') {
321     skip_line (&p, endptr);
322     skip_white(&p, endptr);
323   }
324   if (p[0] == '/')
325     enc_name = parse_pdf_name(&p, endptr);
326 
327   skip_white(&p, endptr);
328   encoding_array = parse_pdf_array(&p, endptr, NULL);
329   RELEASE(wbuf);
330   if (!encoding_array) {
331     if (enc_name)
332       pdf_release_obj(enc_name);
333     return -1;
334   }
335 
336   for (code = 0; code < 256; code++) {
337     enc_vec[code] = pdf_name_value(pdf_get_array(encoding_array, code));
338   }
339   enc_id = pdf_encoding_new_encoding(enc_name ? pdf_name_value(enc_name) : NULL,
340 				     filename, enc_vec, NULL, 0);
341 
342   if (enc_name) {
343     if (verbose > 1)
344       MESG("[%s]", pdf_name_value(enc_name));
345     pdf_release_obj(enc_name);
346   }
347   pdf_release_obj(encoding_array);
348 
349   if (verbose) MESG(")");
350 
351   return enc_id;
352 }
353 
354 #define CHECK_ID(n) do { \
355   if ((n) < 0 || (n) >= enc_cache.count) { \
356      ERROR("Invalid encoding id: %d", (n)); \
357   } \
358 } while (0)
359 
360 #define CACHE_ALLOC_SIZE 16u
361 
362 struct {
363   int           count;
364   int           capacity;
365   pdf_encoding *encodings;
366 } enc_cache = {
367   0, 0, NULL
368 };
369 
370 void
pdf_init_encodings(void)371 pdf_init_encodings (void)
372 {
373   enc_cache.count     = 0;
374   enc_cache.capacity  = 3;
375   enc_cache.encodings = NEW(enc_cache.capacity, pdf_encoding);
376 
377   /*
378    * PDF Predefined Encodings
379    */
380   pdf_encoding_new_encoding("WinAnsiEncoding", "WinAnsiEncoding",
381 			    WinAnsiEncoding, NULL, FLAG_IS_PREDEFINED);
382   pdf_encoding_new_encoding("MacRomanEncoding", "MacRomanEncoding",
383 			    MacRomanEncoding, NULL, FLAG_IS_PREDEFINED);
384   pdf_encoding_new_encoding("MacExpertEncoding", "MacExpertEncoding",
385 			    MacExpertEncoding, NULL, FLAG_IS_PREDEFINED);
386 
387   return;
388 }
389 
390 /*
391  * The original dvipdfm describes as:
392  *
393  *  Some software doesn't like BaseEncoding key (e.g., FastLane)
394  *  so this code is commented out for the moment.  It may reemerge in the
395  *  future
396  *
397  * and the line for BaseEncoding is commented out.
398  *
399  * I'm not sure why this happens. But maybe BaseEncoding key causes problems
400  * when the font is Symbol font or TrueType font.
401  */
402 
403 static int
pdf_encoding_new_encoding(const char * enc_name,const char * ident,const char ** encoding_vec,const char * baseenc_name,int flags)404 pdf_encoding_new_encoding (const char *enc_name, const char *ident,
405 			   const char **encoding_vec,
406 			   const char *baseenc_name, int flags)
407 {
408   int      enc_id, code;
409 
410   pdf_encoding *encoding;
411 
412   enc_id   = enc_cache.count;
413   if (enc_cache.count++ >= enc_cache.capacity) {
414     enc_cache.capacity += 16;
415     enc_cache.encodings = RENEW(enc_cache.encodings,
416                                 enc_cache.capacity,  pdf_encoding);
417   }
418   encoding = &enc_cache.encodings[enc_id];
419 
420   pdf_init_encoding_struct(encoding);
421 
422   encoding->ident = NEW(strlen(ident)+1, char);
423   strcpy(encoding->ident, ident);
424   encoding->enc_name  = NEW(strlen(enc_name)+1, char);
425   strcpy(encoding->enc_name, enc_name);
426 
427   encoding->flags = flags;
428 
429   for (code = 0; code < 256; code++)
430     if (encoding_vec[code] && strcmp(encoding_vec[code], ".notdef")) {
431       encoding->glyphs[code] = NEW(strlen(encoding_vec[code])+1, char);
432       strcpy(encoding->glyphs[code], encoding_vec[code]);
433     }
434 
435   if (!baseenc_name && !(flags & FLAG_IS_PREDEFINED)
436       && is_similar_charset(encoding->glyphs, WinAnsiEncoding)) {
437     /* Dvipdfmx default setting. */
438     baseenc_name = "WinAnsiEncoding";
439   }
440 
441   /* TODO: make base encoding configurable */
442   if (baseenc_name) {
443     int baseenc_id = pdf_encoding_findresource(baseenc_name);
444     if (baseenc_id < 0 || !pdf_encoding_is_predefined(baseenc_id))
445       ERROR("Illegal base encoding %s for encoding %s\n",
446 	    baseenc_name, encoding->enc_name);
447     encoding->baseenc = &enc_cache.encodings[baseenc_id];
448   }
449 
450   if (flags & FLAG_IS_PREDEFINED)
451     encoding->resource = pdf_new_name(encoding->enc_name);
452 
453   return enc_id;
454 }
455 
456 /* Creates Encoding resource and ToUnicode CMap
457  * for all non-predefined encodings.
458  */
pdf_encoding_complete(void)459 void pdf_encoding_complete (void)
460 {
461   int  enc_id;
462 
463   for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
464     if (!pdf_encoding_is_predefined(enc_id)) {
465       pdf_encoding *encoding = &enc_cache.encodings[enc_id];
466       /* Section 5.5.4 of the PDF 1.5 reference says that the encoding
467        * of a Type 3 font must be completely described by a Differences
468        * array, but implementation note 56 explains that this is rather
469        * an incorrect implementation in Acrobat 4 and earlier. Hence,
470        * we do use a base encodings for PDF versions >= 1.3.
471        */
472       int with_base = !(encoding->flags & FLAG_USED_BY_TYPE3)
473 	              || pdf_get_version() >= 4;
474       ASSERT(!encoding->resource);
475       encoding->resource = create_encoding_resource(encoding,
476 						    with_base ? encoding->baseenc : NULL);
477       ASSERT(!encoding->tounicode);
478       encoding->tounicode = pdf_create_ToUnicode_CMap(encoding->enc_name,
479 						      encoding->glyphs,
480 						      encoding->is_used);
481     }
482   }
483 }
484 
485 void
pdf_close_encodings(void)486 pdf_close_encodings (void)
487 {
488   int  enc_id;
489 
490   if (enc_cache.encodings) {
491     for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
492       pdf_encoding *encoding;
493 
494       encoding = &enc_cache.encodings[enc_id];
495       if (encoding) {
496         pdf_flush_encoding(encoding);
497         pdf_clean_encoding_struct(encoding);
498       }
499     }
500     RELEASE(enc_cache.encodings);
501   }
502   enc_cache.encodings = NULL;
503   enc_cache.count     = 0;
504   enc_cache.capacity  = 0;
505 }
506 
507 int
pdf_encoding_findresource(const char * enc_name)508 pdf_encoding_findresource (const char *enc_name)
509 {
510   int           enc_id;
511   pdf_encoding *encoding;
512 
513   ASSERT(enc_name);
514   for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
515     encoding = &enc_cache.encodings[enc_id];
516     if (encoding->ident &&
517         !strcmp(enc_name, encoding->ident))
518       return enc_id;
519     else if (encoding->enc_name &&
520              !strcmp(enc_name, encoding->enc_name))
521       return enc_id;
522   }
523 
524   return load_encoding_file(enc_name);
525 }
526 
527 
528 /*
529  * Pointer will change if other encoding is loaded...
530  */
531 
532 char **
pdf_encoding_get_encoding(int enc_id)533 pdf_encoding_get_encoding (int enc_id)
534 {
535   pdf_encoding *encoding;
536 
537   CHECK_ID(enc_id);
538 
539   encoding = &enc_cache.encodings[enc_id];
540 
541   return encoding->glyphs;
542 }
543 
544 pdf_obj *
pdf_get_encoding_obj(int enc_id)545 pdf_get_encoding_obj (int enc_id)
546 {
547   pdf_encoding *encoding;
548 
549   CHECK_ID(enc_id);
550 
551   encoding = &enc_cache.encodings[enc_id];
552 
553   return encoding->resource;
554 }
555 
556 int
pdf_encoding_is_predefined(int enc_id)557 pdf_encoding_is_predefined (int enc_id)
558 {
559   pdf_encoding *encoding;
560 
561   CHECK_ID(enc_id);
562 
563   encoding = &enc_cache.encodings[enc_id];
564 
565   return (encoding->flags & FLAG_IS_PREDEFINED) ? 1 : 0;
566 }
567 
568 void
pdf_encoding_used_by_type3(int enc_id)569 pdf_encoding_used_by_type3 (int enc_id)
570 {
571   pdf_encoding *encoding;
572 
573   CHECK_ID(enc_id);
574 
575   encoding = &enc_cache.encodings[enc_id];
576 
577   encoding->flags |= FLAG_USED_BY_TYPE3;
578 }
579 
580 
581 char *
pdf_encoding_get_name(int enc_id)582 pdf_encoding_get_name (int enc_id)
583 {
584   pdf_encoding *encoding;
585 
586   CHECK_ID(enc_id);
587 
588   encoding = &enc_cache.encodings[enc_id];
589 
590   return encoding->enc_name;
591 }
592 
593 /* CSI_UNICODE */
594 #include "cid.h"
595 
596 #include "cmap.h"
597 #include "cmap_read.h"
598 #include "cmap_write.h"
599 
600 #include "agl.h"
601 
602 #define WBUF_SIZE 1024
603 static unsigned char wbuf[WBUF_SIZE];
604 static unsigned char range_min[1] = {0x00u};
605 static unsigned char range_max[1] = {0xFFu};
606 
607 void
pdf_encoding_add_usedchars(int encoding_id,const char * is_used)608 pdf_encoding_add_usedchars (int encoding_id, const char *is_used)
609 {
610   pdf_encoding *encoding;
611   int code;
612 
613   CHECK_ID(encoding_id);
614 
615   if (!is_used || pdf_encoding_is_predefined(encoding_id))
616     return;
617 
618   encoding = &enc_cache.encodings[encoding_id];
619 
620   for (code = 0; code <= 0xff; code++)
621     encoding->is_used[code] |= is_used[code];
622 }
623 
624 pdf_obj *
pdf_encoding_get_tounicode(int encoding_id)625 pdf_encoding_get_tounicode (int encoding_id)
626 {
627   CHECK_ID(encoding_id);
628 
629   return enc_cache.encodings[encoding_id].tounicode;
630 }
631 
632 
633 /* Creates a ToUnicode CMap. An empty CMap is replaced by NULL.
634  *
635  * For PDF <= 1.4 a complete CMap is created unless all character codes
636  * are predefined in PDF. For PDF >= 1.5 only those character codes which
637  * are not predefined appear in the CMap.
638  *
639  * Note: The PDF 1.4 reference is not consistent: Section 5.9 describes
640  * the Unicode mapping of PDF 1.3 and Section 9.7.2 (in the context of
641  * Tagged PDF) the one of PDF 1.5.
642  */
643 pdf_obj *
pdf_create_ToUnicode_CMap(const char * enc_name,char ** enc_vec,const char * is_used)644 pdf_create_ToUnicode_CMap (const char *enc_name,
645                            char **enc_vec, const char *is_used)
646 {
647   pdf_obj  *stream;
648   CMap     *cmap;
649   int       code, all_predef;
650   char     *cmap_name;
651   unsigned char *p, *endptr;
652 
653   ASSERT(enc_name && enc_vec);
654 
655   cmap_name = NEW(strlen(enc_name)+strlen("-UTF16")+1, char);
656   sprintf(cmap_name, "%s-UTF16", enc_name);
657 
658   cmap = CMap_new();
659   CMap_set_name (cmap, cmap_name);
660   CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE);
661   CMap_set_wmode(cmap, 0);
662 
663   CMap_set_CIDSysInfo(cmap, &CSI_UNICODE);
664 
665   CMap_add_codespacerange(cmap, range_min, range_max, 1);
666 
667   all_predef = 1;
668   for (code = 0; code <= 0xff; code++) {
669     if (is_used && !is_used[code])
670       continue;
671 
672     if (enc_vec[code]) {
673       long   len;
674       int    fail_count = 0;
675       agl_name *agln = agl_lookup_list(enc_vec[code]);
676       /* Adobe glyph naming conventions are not used by viewers,
677        * hence even ligatures (e.g, "f_i") must be explicitly defined
678        */
679       if (pdf_get_version() < 5 || !agln || !agln->is_predef) {
680         wbuf[0] = (code & 0xff);
681         p      = wbuf + 1;
682         endptr = wbuf + WBUF_SIZE;
683         len = agl_sput_UTF16BE(enc_vec[code], &p, endptr, &fail_count);
684         if (len >= 1 && !fail_count) {
685           CMap_add_bfchar(cmap, wbuf, 1, wbuf + 1, len);
686 	  all_predef &= agln && agln->is_predef;
687         }
688       }
689     }
690   }
691 
692   stream = all_predef ? NULL : CMap_create_stream(cmap);
693 
694   CMap_release(cmap);
695   RELEASE(cmap_name);
696 
697   return stream;
698 }
699 
700 
701 pdf_obj *
pdf_load_ToUnicode_stream(const char * ident)702 pdf_load_ToUnicode_stream (const char *ident)
703 {
704   pdf_obj *stream = NULL;
705   CMap    *cmap;
706   FILE    *fp;
707 
708   if (!ident)
709     return NULL;
710 
711   fp = DPXFOPEN(ident, DPX_RES_TYPE_CMAP);
712   if (!fp)
713     return NULL;
714   else if (CMap_parse_check_sig(fp) < 0) {
715     DPXFCLOSE(fp);
716     return NULL;
717   }
718 
719   cmap = CMap_new();
720   if (CMap_parse(cmap, fp) < 0) {
721     WARN("Reading CMap file \"%s\" failed.", ident);
722   } else {
723     if (verbose) {
724       MESG("(CMap:%s)", ident);
725     }
726     stream = CMap_create_stream(cmap);
727     if (!stream) {
728       WARN("Failed to creat ToUnicode CMap stream for \"%s\".", ident);
729     }
730   }
731   CMap_release(cmap);
732   DPXFCLOSE(fp);
733 
734   return  stream;
735 }
736 
737 
738 static const char *
739 MacRomanEncoding[256] = {
740   ".notdef", ".notdef", ".notdef", ".notdef",
741   ".notdef", ".notdef", ".notdef", ".notdef",
742   ".notdef", ".notdef", ".notdef", ".notdef",
743   ".notdef", ".notdef", ".notdef", ".notdef",
744   ".notdef", ".notdef", ".notdef", ".notdef",
745   ".notdef", ".notdef", ".notdef", ".notdef",
746   ".notdef", ".notdef", ".notdef", ".notdef",
747   ".notdef", ".notdef", ".notdef", ".notdef",
748   "space", "exclam",  "quotedbl", "numbersign",
749   "dollar", "percent", "ampersand", "quotesingle",
750   "parenleft", "parenright", "asterisk", "plus",
751   "comma", "hyphen", "period", "slash",
752   "zero", "one", "two", "three",
753   "four", "five", "six", "seven",
754   "eight", "nine", "colon", "semicolon",
755   "less", "equal", "greater", "question",
756   "at", "A", "B", "C",
757   "D", "E", "F", "G", "H",
758   "I", "J", "K", "L",
759   "M", "N", "O", "P",
760   "Q", "R", "S", "T",
761   "U", "V", "W", "X",
762   "Y", "Z", "bracketleft", "backslash",
763   "bracketright", "asciicircum", "underscore",
764   "grave", "a", "b", "c",
765   "d", "e", "f", "g",
766   "h", "i", "j", "k",
767   "l", "m", "n", "o",
768   "p", "q", "r", "s",
769   "t", "u", "v", "w",
770   "x", "y", "z", "braceleft",
771   "bar", "braceright", "asciitilde", ".notdef",
772   "Adieresis", "Aring", "Ccedilla", "Eacute",
773   "Ntilde", "Odieresis", "Udieresis", "aacute",
774   "agrave", "acircumflex", "adieresis", "atilde",
775   "aring", "ccedilla", "eacute", "egrave",
776   "ecircumflex", "edieresis", "iacute", "igrave",
777   "icircumflex", "idieresis", "ntilde", "oacute",
778   "ograve", "ocircumflex", "odieresis", "otilde",
779   "uacute", "ugrave", "ucircumflex", "udieresis",
780   "dagger", "degree", "cent", "sterling",
781   "section", "bullet", "paragraph", "germandbls",
782   "registered", "copyright", "trademark", "acute",
783   "dieresis", "notequal", "AE", "Oslash",
784   "infinity", "plusminus", "lessequal", "greaterequal",
785   "yen", "mu", "partialdiff", "summation",
786   "product", "pi", "integral", "ordfeminine",
787   "ordmasculine", "Omega", "ae", "oslash",
788   "questiondown", "exclamdown", "logicalnot", "radical",
789   "florin", "approxequal", "Delta", "guillemotleft",
790   "guillemotright", "ellipsis", "space", "Agrave",
791   "Atilde", "Otilde", "OE", "oe",
792   "endash", "emdash", "quotedblleft", "quotedblright",
793   "quoteleft", "quoteright", "divide", "lozenge",
794   "ydieresis", "Ydieresis", "fraction", "currency",
795   "guilsinglleft", "guilsinglright", "fi", "fl",
796   "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
797   "perthousand", "Acircumflex", "Ecircumflex", "Aacute",
798   "Edieresis", "Egrave", "Iacute", "Icircumflex",
799   "Idieresis", "Igrave", "Oacute", "Ocircumflex",
800   "apple", "Ograve", "Uacute", "Ucircumflex",
801   "Ugrave", "dotlessi", "circumflex", "tilde",
802   "macron", "breve", "dotaccent", "ring",
803   "cedilla", "hungarumlaut", "ogonek", "caron"
804 };
805 
806 static const char *
807 MacExpertEncoding[256] = {
808   ".notdef", ".notdef", ".notdef", ".notdef",
809   ".notdef", ".notdef", ".notdef", ".notdef",
810   ".notdef", ".notdef", ".notdef", ".notdef",
811   ".notdef", ".notdef", ".notdef", ".notdef",
812   ".notdef", ".notdef", ".notdef", ".notdef",
813   ".notdef", ".notdef", ".notdef", ".notdef",
814   ".notdef", ".notdef", ".notdef", ".notdef",
815   ".notdef", ".notdef", ".notdef", ".notdef",
816   "space", "exclamsmall", "Hungarumlautsmall", "centoldstyle",
817   "dollaroldstyle", "dollarsuperior", "ampersandsmall", "Acutesmall",
818   "parenleftsuperior", "parenrightsuperior", "twodotenleader", "onedotenleader",
819   "comma", "hyphen", "period", "fraction",
820   "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
821   "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
822   "eightoldstyle", "nineoldstyle", "colon", "semicolon",
823   ".notdef", "threequartersemdash", ".notdef", "questionsmall",
824   ".notdef", ".notdef", ".notdef", ".notdef",
825   "Ethsmall", ".notdef", ".notdef", "onequarter",
826   "onehalf", "threequarters", "oneeighth", "threeeighths",
827   "fiveeighths", "seveneighths", "onethird", "twothirds",
828   ".notdef", ".notdef", ".notdef", ".notdef",
829   ".notdef", ".notdef", "ff", "fi",
830   "fl", "ffi", "ffl", "parenleftinferior",
831   ".notdef", "parenrightinferior", "Circumflexsmall", "hypheninferior",
832   "Gravesmall", "Asmall", "Bsmall", "Csmall",
833   "Dsmall", "Esmall", "Fsmall", "Gsmall",
834   "Hsmall", "Ismall", "Jsmall", "Ksmall",
835   "Lsmall", "Msmall", "Nsmall", "Osmall",
836   "Psmall", "Qsmall", "Rsmall", "Ssmall",
837   "Tsmall", "Usmall", "Vsmall", "Wsmall",
838   "Xsmall", "Ysmall", "Zsmall", "colonmonetary",
839   "onefitted", "rupiah", "Tildesmall", ".notdef",
840   ".notdef", "asuperior", "centsuperior", ".notdef",
841   ".notdef", ".notdef", ".notdef", "Aacutesmall",
842   "Agravesmall", "Acircumflexsmall", "Adieresissmall", "Atildesmall",
843   "Aringsmall", "Ccedillasmall", "Eacutesmall", "Egravesmall",
844   "Ecircumflexsmall", "Edieresissmall", "Iacutesmall", "Igravesmall",
845   "Icircumflexsmall", "Idieresissmall", "Ntildesmall", "Oacutesmall",
846   "Ogravesmall", "Ocircumflexsmall", "Odieresissmall", "Otildesmall",
847   "Uacutesmall", "Ugravesmall", "Ucircumflexsmall", "Udieresissmall",
848   ".notdef", "eightsuperior", "fourinferior", "threeinferior",
849   "sixinferior", "eightinferior", "seveninferior", "Scaronsmall",
850   ".notdef", "centinferior", "twoinferior", ".notdef",
851   "Dieresissmall", ".notdef", "Caronsmall", "osuperior",
852   "fiveinferior", ".notdef", "commainferior", "periodinferior",
853   "Yacutesmall", ".notdef", "dollarinferior", ".notdef",
854   ".notdef", "Thornsmall", ".notdef", "nineinferior",
855   "zeroinferior", "Zcaronsmall", "AEsmall", "Oslashsmall",
856   "questiondownsmall", "oneinferior", "Lslashsmall", ".notdef",
857   ".notdef", ".notdef", ".notdef", ".notdef",
858   ".notdef", "Cedillasmall", ".notdef", ".notdef",
859   ".notdef", ".notdef", ".notdef", "OEsmall",
860   "figuredash", "hyphensuperior", ".notdef", ".notdef",
861   ".notdef", ".notdef", "exclamdownsmall", ".notdef",
862   "Ydieresissmall", ".notdef", "onesuperior", "twosuperior",
863   "threesuperior", "foursuperior", "fivesuperior", "sixsuperior",
864   "sevensuperior", "ninesuperior", "zerosuperior", ".notdef",
865   "esuperior", "rsuperior", "tsuperior", ".notdef",
866   ".notdef", "isuperior", "ssuperior", "dsuperior",
867   ".notdef", ".notdef", ".notdef", ".notdef",
868   ".notdef", "lsuperior", "Ogoneksmall", "Brevesmall",
869   "Macronsmall", "bsuperior", "nsuperior", "msuperior",
870   "commasuperior", "periodsuperior", "Dotaccentsmall", "Ringsmall",
871   ".notdef", ".notdef", ".notdef", ".notdef"
872 };
873 
874 static const char *
875 WinAnsiEncoding[256] = {
876   ".notdef", ".notdef", ".notdef", ".notdef",
877   ".notdef", ".notdef", ".notdef", ".notdef",
878   ".notdef", ".notdef", ".notdef", ".notdef",
879   ".notdef", ".notdef", ".notdef", ".notdef",
880   ".notdef", ".notdef", ".notdef", ".notdef",
881   ".notdef", ".notdef", ".notdef", ".notdef",
882   ".notdef", ".notdef", ".notdef", ".notdef",
883   ".notdef", ".notdef", ".notdef", ".notdef",
884   "space", "exclam", "quotedbl", "numbersign",
885   "dollar", "percent", "ampersand", "quotesingle",
886   "parenleft", "parenright", "asterisk", "plus",
887   "comma", "hyphen", "period", "slash",
888   "zero", "one", "two", "three",
889   "four", "five", "six", "seven",
890   "eight", "nine", "colon", "semicolon",
891   "less", "equal", "greater", "question",
892   "at", "A", "B", "C",
893   "D", "E", "F", "G",
894   "H", "I", "J", "K",
895   "L", "M", "N", "O",
896   "P", "Q", "R", "S",
897   "T", "U", "V", "W",
898   "X", "Y", "Z", "bracketleft",
899   "backslash", "bracketright", "asciicircum", "underscore",
900   "grave", "a", "b", "c",
901   "d", "e", "f", "g",
902   "h", "i", "j", "k",
903   "l", "m", "n", "o",
904   "p", "q", "r", "s",
905   "t", "u", "v", "w",
906   "x", "y", "z", "braceleft",
907   "bar", "braceright", "asciitilde", "bullet",
908   "Euro", "bullet", "quotesinglbase", "florin",
909   "quotedblbase", "ellipsis", "dagger", "daggerdbl",
910   "circumflex", "perthousand", "Scaron", "guilsinglleft",
911   "OE", "bullet", "Zcaron", "bullet",
912   "bullet", "quoteleft", "quoteright", "quotedblleft",
913   "quotedblright", "bullet", "endash", "emdash",
914   "tilde", "trademark", "scaron", "guilsinglright",
915   "oe", "bullet", "zcaron", "Ydieresis",
916   "space", "exclamdown", "cent", "sterling",
917   "currency", "yen", "brokenbar", "section",
918   "dieresis", "copyright", "ordfeminine", "guillemotleft",
919   "logicalnot", "hyphen", "registered", "macron",
920   "degree", "plusminus", "twosuperior", "threesuperior",
921   "acute", "mu", "paragraph", "periodcentered",
922   "cedilla", "onesuperior", "ordmasculine", "guillemotright",
923   "onequarter", "onehalf", "threequarters", "questiondown",
924   "Agrave", "Aacute", "Acircumflex", "Atilde",
925   "Adieresis", "Aring", "AE", "Ccedilla",
926   "Egrave", "Eacute", "Ecircumflex", "Edieresis",
927   "Igrave", "Iacute", "Icircumflex", "Idieresis",
928   "Eth", "Ntilde", "Ograve", "Oacute",
929   "Ocircumflex", "Otilde", "Odieresis", "multiply",
930   "Oslash", "Ugrave", "Uacute", "Ucircumflex",
931   "Udieresis", "Yacute", "Thorn", "germandbls",
932   "agrave", "aacute", "acircumflex", "atilde",
933   "adieresis", "aring", "ae", "ccedilla",
934   "egrave", "eacute", "ecircumflex", "edieresis",
935   "igrave", "iacute", "icircumflex", "idieresis",
936   "eth", "ntilde", "ograve", "oacute",
937   "ocircumflex", "otilde", "odieresis", "divide",
938   "oslash", "ugrave", "uacute", "ucircumflex",
939   "udieresis", "yacute", "thorn", "ydieresis"
940 };
941 
942 #if 0
943 static const char *
944 StandardEncoding[256] = {
945   ".notdef", ".notdef", ".notdef", ".notdef",
946   ".notdef", ".notdef", ".notdef", ".notdef",
947   ".notdef", ".notdef", ".notdef", ".notdef",
948   ".notdef", ".notdef", ".notdef", ".notdef",
949   ".notdef", ".notdef", ".notdef", ".notdef",
950   ".notdef", ".notdef", ".notdef", ".notdef",
951   ".notdef", ".notdef", ".notdef", ".notdef",
952   ".notdef", ".notdef", ".notdef", ".notdef",
953   "space", "exclam", "quotedbl", "numbersign",
954   "dollar", "percent", "ampersand", "quoteright",
955   "parenleft", "parenright", "asterisk", "plus",
956   "comma", "hyphen", "period", "slash",
957   "zero", "one", "two", "three",
958   "four", "five", "six", "seven",
959   "eight", "nine", "colon", "semicolon",
960   "less", "equal", "greater", "question",
961   "at", "A", "B", "C",
962   "D", "E", "F", "G",
963   "H", "I", "J", "K",
964   "L", "M", "N", "O",
965   "P", "Q", "R", "S",
966   "T", "U", "V", "W",
967   "X", "Y", "Z", "bracketleft",
968   "backslash", "bracketright", "asciicircum", "underscore",
969   "quoteleft", "a", "b", "c",
970   "d", "e", "f", "g",
971   "h", "i", "j", "k",
972   "l", "m", "n", "o",
973   "p", "q", "r", "s",
974   "t", "u", "v", "w",
975   "x", "y", "z", "braceleft",
976   "bar", "braceright", "asciitilde", ".notdef",
977   ".notdef", ".notdef", ".notdef", ".notdef",
978   ".notdef", ".notdef", ".notdef", ".notdef",
979   ".notdef", ".notdef", ".notdef", ".notdef",
980   ".notdef", ".notdef", ".notdef", ".notdef",
981   ".notdef", ".notdef", ".notdef", ".notdef",
982   ".notdef", ".notdef", ".notdef", ".notdef",
983   ".notdef", ".notdef", ".notdef", ".notdef",
984   ".notdef", ".notdef", ".notdef", ".notdef",
985   ".notdef", "exclamdown", "cent", "sterling",
986   "fraction", "yen", "florin", "section",
987   "currency", "quotesingle", "quotedblleft", "guillemotleft",
988   "guilsinglleft", "guilsinglright", "fi", "fl",
989   ".notdef", "endash", "dagger", "daggerdbl",
990   "periodcentered", ".notdef", "paragraph", "bullet",
991   "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright",
992   "ellipsis", "perthousand", ".notdef", "questiondown",
993   ".notdef", "grave", "acute", "circumflex",
994   "tilde", "macron", "breve", "dotaccent",
995   "dieresis", ".notdef", "ring", "cedilla",
996   ".notdef", "hungarumlaut", "ogonek", "caron",
997   "emdash", ".notdef", ".notdef", ".notdef",
998   ".notdef", ".notdef", ".notdef", ".notdef",
999   ".notdef", ".notdef", ".notdef", ".notdef",
1000   ".notdef", ".notdef", ".notdef", ".notdef",
1001   ".notdef", "AE", ".notdef", "ordfeminine",
1002   ".notdef", ".notdef", ".notdef", ".notdef",
1003   "Lslash", "Oslash", "OE", "ordmasculine",
1004   ".notdef", ".notdef", ".notdef", ".notdef",
1005   ".notdef", "ae", ".notdef", ".notdef",
1006   ".notdef", "dotlessi", ".notdef", ".notdef",
1007   "lslash", "oslash", "oe", "germandbls",
1008   ".notdef", ".notdef", ".notdef", ".notdef"
1009 };
1010 
1011 static const char *
1012 ISOLatin1Encoding[256] = {
1013   ".notdef", ".notdef", ".notdef", ".notdef",
1014   ".notdef", ".notdef", ".notdef", ".notdef",
1015   ".notdef", ".notdef", ".notdef", ".notdef",
1016   ".notdef", ".notdef", ".notdef", ".notdef",
1017   ".notdef", ".notdef", ".notdef", ".notdef",
1018   ".notdef", ".notdef", ".notdef", ".notdef",
1019   ".notdef", ".notdef", ".notdef", ".notdef",
1020   ".notdef", ".notdef", ".notdef", ".notdef",
1021   "space", "exclam", "quotedbl", "numbersign",
1022   "dollar", "percent", "ampersand", "quotesingle",
1023   "parenleft", "parenright", "asterisk", "plus",
1024   "comma", "hyphen", "period", "slash",
1025   "zero", "one", "two", "three",
1026   "four", "five", "six", "seven",
1027   "eight", "nine", "colon", "semicolon",
1028   "less", "equal", "greater", "question",
1029   "at", "A", "B", "C",
1030   "D", "E", "F", "G",
1031   "H", "I", "J", "K",
1032   "L", "M", "N", "O",
1033   "P", "Q", "R", "S",
1034   "T", "U", "V", "W",
1035   "X", "Y", "Z", "bracketleft",
1036   "backslash", "bracketright", "asciicircum", "underscore",
1037   "grave", "a", "b", "c",
1038   "d", "e", "f", "g",
1039   "h", "i", "j", "k",
1040   "l", "m", "n", "o",
1041   "p", "q", "r", "s",
1042   "t", "u", "v", "w",
1043   "x", "y", "z", "braceleft",
1044   "bar", "braceright", "asciitilde", ".notdef",
1045   ".notdef", ".notdef", ".notdef", ".notdef",
1046   ".notdef", ".notdef", ".notdef", ".notdef",
1047   ".notdef", ".notdef", ".notdef", ".notdef",
1048   ".notdef", ".notdef", ".notdef", ".notdef",
1049   "dotlessi", "quoteleft", "quoteright", "circumflex",
1050   "tilde", "macron", "breve", "dotaccent",
1051   "dieresis", ".notdef", "ring", "cedilla",
1052   ".notdef", "hungarumlaut", "ogonek", "caron",
1053   "space", "exclamdown", "cent", "sterling",
1054   "currency", "yen", "brokenbar", "section",
1055   "dieresis", "copyright", "ordfeminine", "guillemotleft",
1056   "logicalnot", "hyphen", "registered", "macron",
1057   "degree", "plusminus", "twosuperior", "threesuperior",
1058   "acute", "mu", "paragraph", "periodcentered",
1059   "cedilla", "onesuperior", "ordmasculine", "guillemotright",
1060   "onequarter", "onehalf", "threequarters", "questiondown",
1061   "Agrave", "Aacute", "Acircumflex", "Atilde",
1062   "Adieresis", "Aring", "AE", "Ccedilla",
1063   "Egrave", "Eacute", "Ecircumflex", "Edieresis",
1064   "Igrave", "Iacute", "Icircumflex", "Idieresis",
1065   "Eth", "Ntilde", "Ograve", "Oacute",
1066   "Ocircumflex", "Otilde", "Odieresis", "multiply",
1067   "Oslash", "Ugrave", "Uacute", "Ucircumflex",
1068   "Udieresis", "Yacute", "Thorn", "germandbls",
1069   "agrave", "aacute", "acircumflex", "atilde",
1070   "adieresis", "aring", "ae", "ccedilla",
1071   "egrave", "eacute", "ecircumflex", "edieresis",
1072   "igrave", "iacute", "icircumflex", "idieresis",
1073   "eth", "ntilde", "ograve", "oacute",
1074   "ocircumflex", "otilde", "odieresis", "divide",
1075   "oslash", "ugrave", "uacute", "ucircumflex",
1076   "udieresis", "yacute", "thorn", "ydieresis"
1077 };
1078 #endif
1079