1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2 
3     Copyright (C) 2002-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4     the dvipdfmx project team.
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19 */
20 
21 /*
22  * Type0 font support:
23  *
24  * TODO:
25  *
26  *  Composite font (multiple descendants) - not supported in PDF
27  */
28 
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32 
33 #include <string.h>
34 
35 #include "system.h"
36 #include "mem.h"
37 #include "error.h"
38 #include "dpxfile.h"
39 
40 #include "pdfobj.h"
41 #include "fontmap.h"
42 
43 #include "cmap.h"
44 #include "cid.h"
45 
46 #include "type0.h"
47 
48 
49 #define TYPE0FONT_DEBUG_STR "Type0"
50 #define TYPE0FONT_DEBUG     3
51 
52 static int __verbose = 0;
53 
54 static pdf_obj *pdf_read_ToUnicode_file (const char *cmap_name);
55 
56 void
Type0Font_set_verbose(void)57 Type0Font_set_verbose(void)
58 {
59   __verbose++;
60 }
61 
62 /*
63  * used_chars:
64  *
65  *  Single bit is used for each CIDs since used_chars can be reused as a
66  *  stream content of CIDSet by doing so. See, cid.h for add_to_used() and
67  *  is_used().
68  */
69 
70 static char *
new_used_chars2(void)71 new_used_chars2(void)
72 {
73   char *used_chars;
74 
75   used_chars = NEW(8192, char);
76   memset(used_chars, 0, 8192);
77 
78   return used_chars;
79 }
80 
81 #define FLAG_NONE              0
82 #define FLAG_USED_CHARS_SHARED (1 << 0)
83 
84 struct Type0Font {
85   char    *fontname;   /* BaseFont */
86   char    *encoding;   /* "Identity-H" or "Identity-V" (not ID) */
87   char    *used_chars; /* Used chars (CIDs) */
88 
89   /*
90    * Type0 only
91    */
92   CIDFont *descendant; /* Only single descendant is allowed. */
93   int      flags;
94   int      wmode;
95   int      cmap_id;
96 
97   /*
98    * PDF Font Resource
99    */
100   pdf_obj *indirect;
101   pdf_obj *fontdict;
102   pdf_obj *descriptor; /* MUST BE NULL */
103 };
104 
105 static void
Type0Font_init_font_struct(Type0Font * font)106 Type0Font_init_font_struct (Type0Font *font)
107 {
108   ASSERT(font);
109 
110   font->fontname   = NULL;
111   font->fontdict   = NULL;
112   font->indirect   = NULL;
113   font->descriptor = NULL;
114   font->encoding   = NULL;
115   font->used_chars = NULL;
116   font->descendant = NULL;
117   font->wmode      = -1;
118   font->cmap_id    = -1;
119   font->flags      = FLAG_NONE;
120 
121   return;
122 }
123 
124 static void
Type0Font_clean(Type0Font * font)125 Type0Font_clean (Type0Font *font)
126 {
127   if (font) {
128     if (font->fontdict)
129       ERROR("%s: Object not flushed.", TYPE0FONT_DEBUG_STR);
130     if (font->indirect)
131       ERROR("%s: Object not flushed.", TYPE0FONT_DEBUG_STR);
132     if (font->descriptor)
133       ERROR("%s: FontDescriptor unexpected for Type0 font.", TYPE0FONT_DEBUG_STR);
134     if (!(font->flags & FLAG_USED_CHARS_SHARED) && font->used_chars)
135       RELEASE(font->used_chars);
136     if (font->encoding)
137       RELEASE(font->encoding);
138     if (font->fontname)
139       RELEASE(font->fontname);
140     font->fontdict   = NULL;
141     font->indirect   = NULL;
142     font->descriptor = NULL;
143     font->used_chars = NULL;
144     font->encoding   = NULL;
145     font->fontname   = NULL;
146   }
147 }
148 
149 /* PLEASE FIX THIS */
150 #include "tt_cmap.h"
151 
152 static pdf_obj *
Type0Font_create_ToUnicode_stream(Type0Font * font)153 Type0Font_create_ToUnicode_stream(Type0Font *font) {
154   CIDFont *cidfont = font->descendant;
155   return otf_create_ToUnicode_stream(CIDFont_get_ident(cidfont),
156                                      CIDFont_get_opt_index(cidfont),
157                                      Type0Font_get_usedchars(font),
158                                      font->cmap_id);
159 }
160 
161 /* Try to load ToUnicode CMap from file system first, if not found fallback to
162  * font CMap reverse lookup. */
163 static pdf_obj *
Type0Font_try_load_ToUnicode_stream(Type0Font * font,char * cmap_base)164 Type0Font_try_load_ToUnicode_stream(Type0Font *font, char *cmap_base) {
165   char *cmap_name = NEW(strlen(cmap_base) + strlen("-UTF-16"), char);
166   pdf_obj *tounicode;
167 
168   sprintf(cmap_name, "%s-UTF16", cmap_base);
169   tounicode = pdf_read_ToUnicode_file(cmap_name);
170   if (!tounicode) {
171     sprintf(cmap_name, "%s-UCS2", cmap_base);
172     tounicode = pdf_read_ToUnicode_file(cmap_name);
173   }
174 
175   RELEASE(cmap_name);
176 
177   if (!tounicode)
178     tounicode = Type0Font_create_ToUnicode_stream(font);
179 
180   return tounicode;
181 }
182 
183 static void
add_ToUnicode(Type0Font * font)184 add_ToUnicode (Type0Font *font)
185 {
186   pdf_obj    *tounicode;
187   CIDFont    *cidfont;
188   CIDSysInfo *csi;
189   char       *fontname;
190 
191   /*
192    * ToUnicode CMap:
193    *
194    *  ToUnicode CMaps are usually not required for standard character
195    *  collections such as Adobe-Japan1. Identity-H is used for UCS
196    *  ordering CID-keyed fonts. External resource must be loaded for
197    *  others.
198    */
199 
200   cidfont = font->descendant;
201   if (!cidfont) {
202     ERROR("%s: No descendant CID-keyed font.", TYPE0FONT_DEBUG_STR);
203     return;
204   }
205 
206   if (CIDFont_is_ACCFont(cidfont)) {
207     /* No need to embed ToUnicode */
208     return;
209   } else if (CIDFont_is_UCSFont(cidfont)) {
210     /*
211      * Old version of dvipdfmx mistakenly used Adobe-Identity as Unicode.
212      */
213     tounicode = pdf_read_ToUnicode_file("Adobe-Identity-UCS2");
214     if (!tounicode) { /* This should work */
215       tounicode = pdf_new_name("Identity-H");
216     }
217     pdf_add_dict(font->fontdict, pdf_new_name("ToUnicode"), tounicode);
218     return;
219   }
220 
221   tounicode = NULL;
222   csi       = CIDFont_get_CIDSysInfo(cidfont);
223   fontname  = CIDFont_get_fontname(cidfont);
224   if (CIDFont_get_embedding(cidfont)) {
225     fontname += 7; /* FIXME */
226   }
227 
228   if (!strcmp(csi->registry, "Adobe")    &&
229       !strcmp(csi->ordering, "Identity")) {
230     switch (CIDFont_get_subtype(cidfont)) {
231     case CIDFONT_TYPE2:
232       /* PLEASE FIX THIS */
233       tounicode = Type0Font_create_ToUnicode_stream(font);
234       break;
235     default:
236       if (CIDFont_get_flag(cidfont, CIDFONT_FLAG_TYPE1C)) { /* FIXME */
237         tounicode = Type0Font_create_ToUnicode_stream(font);
238       } else if (CIDFont_get_flag(cidfont, CIDFONT_FLAG_TYPE1)) { /* FIXME */
239 	/* Font loader will create ToUnicode and set. */
240 	return;
241       } else {
242         tounicode = Type0Font_try_load_ToUnicode_stream(font, fontname);
243       }
244       break;
245     }
246   } else {
247     char *cmap_base = NEW(strlen(csi->registry) + strlen(csi->ordering) + 2, char);
248     sprintf(cmap_base, "%s-%s", csi->registry, csi->ordering);
249     tounicode = Type0Font_try_load_ToUnicode_stream(font, cmap_base);
250     RELEASE(cmap_base);
251   }
252 
253   if (tounicode) {
254     pdf_add_dict(font->fontdict,
255 		 pdf_new_name("ToUnicode"), tounicode);
256   } else {
257     WARN("Failed to load ToUnicode CMap for font \"%s\"", fontname);
258   }
259 
260   return;
261 }
262 
263 void
Type0Font_set_ToUnicode(Type0Font * font,pdf_obj * cmap_ref)264 Type0Font_set_ToUnicode (Type0Font *font, pdf_obj *cmap_ref)
265 {
266   ASSERT(font);
267 
268   pdf_add_dict(font->fontdict,
269 	       pdf_new_name("ToUnicode"), cmap_ref);
270 }
271 
272 static void
Type0Font_dofont(Type0Font * font)273 Type0Font_dofont (Type0Font *font)
274 {
275   if (!font || !font->indirect)
276     return;
277 
278   if (!pdf_lookup_dict(font->fontdict, "ToUnicode")) { /* FIXME */
279     add_ToUnicode(font);
280   }
281 }
282 
283 static void
Type0Font_flush(Type0Font * font)284 Type0Font_flush (Type0Font *font)
285 {
286   if (font) {
287     if (font->fontdict)
288       pdf_release_obj(font->fontdict);
289     font->fontdict = NULL;
290     if (font->indirect)
291       pdf_release_obj(font->indirect);
292     font->indirect = NULL;
293     if (font->descriptor)
294       ERROR("%s: FontDescriptor unexpected for Type0 font.", TYPE0FONT_DEBUG_STR);
295     font->descriptor = NULL;
296   }
297 }
298 
299 int
Type0Font_get_wmode(Type0Font * font)300 Type0Font_get_wmode (Type0Font *font)
301 {
302   ASSERT(font);
303 
304   return font->wmode;
305 }
306 
307 #if 0
308 char *
309 Type0Font_get_encoding (Type0Font *font)
310 {
311   ASSERT(font);
312 
313   return font->encoding;
314 }
315 #endif
316 
317 char *
Type0Font_get_usedchars(Type0Font * font)318 Type0Font_get_usedchars (Type0Font *font)
319 {
320   ASSERT(font);
321 
322   return font->used_chars;
323 }
324 
325 pdf_obj *
Type0Font_get_resource(Type0Font * font)326 Type0Font_get_resource (Type0Font *font)
327 {
328   ASSERT(font);
329 
330   /*
331    * This looks somewhat strange.
332    */
333   if (!font->indirect) {
334     pdf_obj *array;
335 
336     array = pdf_new_array();
337     pdf_add_array(array, CIDFont_get_resource(font->descendant));
338     pdf_add_dict(font->fontdict, pdf_new_name("DescendantFonts"), array);
339     font->indirect = pdf_ref_obj(font->fontdict);
340   }
341 
342   return pdf_link_obj(font->indirect);
343 }
344 
345 /******************************** CACHE ********************************/
346 
347 #define CHECK_ID(n) do {\
348   if ((n) < 0 || (n) >= __cache.count)\
349     ERROR("%s: Invalid ID %d", TYPE0FONT_DEBUG_STR, (n));\
350 } while (0)
351 
352 #define CACHE_ALLOC_SIZE 16u
353 
354 static struct font_cache {
355   int        count;
356   int        capacity;
357   Type0Font *fonts;
358 } __cache = {
359   0, 0, NULL
360 };
361 
362 void
Type0Font_cache_init(void)363 Type0Font_cache_init (void)
364 {
365   if (__cache.fonts)
366     ERROR("%s: Already initialized.", TYPE0FONT_DEBUG_STR);
367   __cache.count    = 0;
368   __cache.capacity = 0;
369   __cache.fonts    = NULL;
370 }
371 
372 Type0Font *
Type0Font_cache_get(int id)373 Type0Font_cache_get (int id)
374 {
375   CHECK_ID(id);
376 
377   return &__cache.fonts[id];
378 }
379 
380 int
Type0Font_cache_find(const char * map_name,int cmap_id,fontmap_opt * fmap_opt)381 Type0Font_cache_find (const char *map_name, int cmap_id, fontmap_opt *fmap_opt)
382 {
383   int         font_id = -1;
384   Type0Font  *font;
385   CIDFont    *cidfont;
386   CMap       *cmap;
387   CIDSysInfo *csi;
388   char       *fontname = NULL;
389   int         cid_id = -1, parent_id = -1, wmode = 0;
390   int         pdf_ver;
391 
392   pdf_ver = pdf_get_version();
393   if (!map_name || cmap_id < 0 || pdf_ver < 2)
394     return -1;
395 
396   /*
397    * Encoding is Identity-H or Identity-V according as thier WMode value.
398    *
399    * We do not use match against the map_name since fonts (TrueType) covers
400    * characters across multiple character collection (eg, Adobe-Japan1 and
401    * Adobe-Japan2) must be splited into multiple CID-keyed fonts.
402    */
403 
404   cmap = CMap_cache_get(cmap_id);
405   csi  = (CMap_is_Identity(cmap)) ? NULL : CMap_get_CIDSysInfo(cmap) ;
406 
407   cid_id = CIDFont_cache_find(map_name, csi, fmap_opt);
408 
409   if (cid_id < 0)
410     return -1;
411 
412   /*
413    * The descendant CID-keyed font has already been registerd.
414    * If CID-keyed font with ID = cid_id is new font, then create new parent
415    * Type 0 font. Otherwise, there already exists parent Type 0 font and
416    * then we find him and return his ID. We must check against their WMode.
417    */
418 
419   cidfont = CIDFont_cache_get(cid_id);
420   wmode   = CMap_get_wmode(cmap);
421 
422   /* Does CID-keyed font already have parent ? */
423   parent_id = CIDFont_get_parent_id(cidfont, wmode);
424   if (parent_id >= 0)
425     return parent_id; /* If so, we don't need new one. */
426 
427   /*
428    * CIDFont does not have parent or his parent's WMode does not matched with
429    * wmode. Create new Type0 font.
430    */
431 
432   if (__cache.count >= __cache.capacity) {
433     __cache.capacity += CACHE_ALLOC_SIZE;
434     __cache.fonts     = RENEW(__cache.fonts, __cache.capacity, struct Type0Font);
435   }
436   font_id =  __cache.count;
437   font    = &__cache.fonts[font_id];
438 
439   Type0Font_init_font_struct(font);
440 
441   /*
442    * All CJK double-byte characters are mapped so that resulting
443    * character codes coincide with CIDs of given character collection.
444    * So, the Encoding is always Identity-H for horizontal fonts or
445    * Identity-V for vertical fonts.
446    */
447   if (wmode) {
448     font->encoding = NEW(strlen("Identity-V")+1, char);
449     strcpy(font->encoding, "Identity-V");
450   } else {
451     font->encoding = NEW(strlen("Identity-H")+1, char);
452     strcpy(font->encoding, "Identity-H");
453   }
454   font->wmode = wmode;
455   font->cmap_id = cmap_id;
456 
457   /*
458    * Now we start font dictionary.
459    */
460   font->fontdict = pdf_new_dict();
461   pdf_add_dict(font->fontdict, pdf_new_name ("Type"),    pdf_new_name ("Font"));
462   pdf_add_dict(font->fontdict, pdf_new_name ("Subtype"), pdf_new_name ("Type0"));
463 
464   /*
465    * Type0 font does not have FontDescriptor because it is not a simple font.
466    * Instead, DescendantFonts appears here.
467    *
468    * Up to PDF version 1.5, Type0 font must have single descendant font which
469    * is a CID-keyed font. Future PDF spec. will allow multiple desecendant
470    * fonts.
471    */
472   font->descendant = cidfont;
473   CIDFont_attach_parent(cidfont, font_id, wmode);
474 
475   /*
476    * PostScript Font name:
477    *
478    *  Type0 font's fontname is usually descendant CID-keyed font's font name
479    *  appended by -ENCODING.
480    */
481   fontname = CIDFont_get_fontname(cidfont);
482 
483   if (__verbose) {
484     if (CIDFont_get_embedding(cidfont) && strlen(fontname) > 7)
485       MESG("(CID:%s)", fontname+7); /* skip XXXXXX+ */
486     else
487       MESG("(CID:%s)", fontname);
488   }
489 
490   /*
491    * The difference between CID-keyed font and TrueType font appears here.
492    *
493    * Glyph substitution for vertical writing is done in CMap mapping process
494    * for CID-keyed fonts. But we must rely on OpenType layout table in the
495    * case of TrueType fonts. So, we must use different used_chars for each
496    * horizontal and vertical fonts in that case.
497    *
498    * In most PDF file, encoding name is not appended to fontname for Type0
499    * fonts having CIDFontType 2 font as their descendant.
500    */
501 
502   font->used_chars = NULL;
503   font->flags      = FLAG_NONE;
504 
505   switch (CIDFont_get_subtype(cidfont)) {
506   case CIDFONT_TYPE0:
507     font->fontname = NEW(strlen(fontname)+strlen(font->encoding)+2, char);
508     sprintf(font->fontname, "%s-%s", fontname, font->encoding);
509     pdf_add_dict(font->fontdict,
510                  pdf_new_name("BaseFont"), pdf_new_name(font->fontname));
511     /*
512      * Need used_chars to write W, W2.
513      */
514     if ((parent_id = CIDFont_get_parent_id(cidfont, wmode ? 0 : 1)) < 0) {
515       font->used_chars = new_used_chars2();
516     } else {
517       /* Don't allocate new one. */
518       font->used_chars = Type0Font_get_usedchars(Type0Font_cache_get(parent_id));
519       font->flags     |= FLAG_USED_CHARS_SHARED;
520     }
521     break;
522   case CIDFONT_TYPE2:
523     /*
524      * TrueType:
525      *
526      *  Use different used_chars for H and V.
527      */
528     pdf_add_dict(font->fontdict,
529                  pdf_new_name("BaseFont"), pdf_new_name(fontname));
530     font->used_chars = new_used_chars2();
531     break;
532   default:
533     ERROR("Unrecognized CIDFont Type");
534     break;
535   }
536 
537   pdf_add_dict(font->fontdict,
538                pdf_new_name("Encoding"), pdf_new_name(font->encoding));
539 
540   __cache.count++;
541 
542   return font_id;
543 }
544 
545 void
Type0Font_cache_close(void)546 Type0Font_cache_close (void)
547 {
548   int   font_id;
549 
550   /*
551    * This need to be fixed.
552    *
553    * CIDFont_cache_close() before Type0Font_release because of used_chars.
554    * ToUnicode support want descendant CIDFont's CSI and fontname.
555    */
556   if (__cache.fonts) {
557     for (font_id = 0; font_id < __cache.count; font_id++)
558       Type0Font_dofont(&__cache.fonts[font_id]);
559   }
560   CIDFont_cache_close();
561   if (__cache.fonts) {
562     for (font_id = 0; font_id < __cache.count; font_id++) {
563       Type0Font_flush(&__cache.fonts[font_id]);
564       Type0Font_clean(&__cache.fonts[font_id]);
565     }
566     RELEASE(__cache.fonts);
567   }
568   __cache.fonts    = NULL;
569   __cache.count    = 0;
570   __cache.capacity = 0;
571 }
572 
573 /******************************** COMPAT ********************************/
574 
575 #ifndef WITHOUT_COMPAT
576 
577 #include "cmap_read.h"
578 #include "cmap_write.h"
579 #include "pdfresource.h"
580 #include "pdfencoding.h"
581 
582 static pdf_obj *
create_dummy_CMap(void)583 create_dummy_CMap (void)
584 {
585   pdf_obj *stream;
586   char     buf[32];
587   int      i, n;
588 
589 #define CMAP_PART0 "\
590 %!PS-Adobe-3.0 Resource-CMap\n\
591 %%DocumentNeededResources: ProcSet (CIDInit)\n\
592 %%IncludeResource: ProcSet (CIDInit)\n\
593 %%BeginResource: CMap (Adobe-Identity-UCS2)\n\
594 %%Title: (Adobe-Identity-UCS2 Adobe UCS2 0)\n\
595 %%Version: 1.0\n\
596 %%Copyright:\n\
597 %% ---\n\
598 %%EndComments\n\n\
599 "
600 #define CMAP_PART1 "\
601 /CIDInit /ProcSet findresource begin\n\
602 \n\
603 12 dict begin\n\nbegincmap\n\n\
604 /CIDSystemInfo 3 dict dup begin\n\
605   /Registry (Adobe) def\n\
606   /Ordering (UCS2) def\n\
607   /Supplement 0 def\n\
608 end def\n\n\
609 /CMapName /Adobe-Identity-UCS2 def\n\
610 /CMapVersion 1.0 def\n\
611 /CMapType 2 def\n\n\
612 2 begincodespacerange\n\
613 <0000> <FFFF>\n\
614 endcodespacerange\n\
615 "
616 #define CMAP_PART3 "\
617 endcmap\n\n\
618 CMapName currentdict /CMap defineresource pop\n\n\
619 end\nend\n\n\
620 %%EndResource\n\
621 %%EOF\n\
622 "
623 
624   stream = pdf_new_stream(STREAM_COMPRESS);
625   pdf_add_stream(stream, CMAP_PART0, strlen(CMAP_PART0));
626   pdf_add_stream(stream, CMAP_PART1, strlen(CMAP_PART1));
627   pdf_add_stream(stream, "\n100 beginbfrange\n", strlen("\n100 beginbfrange\n"));
628   for (i = 0; i < 0x64; i++) {
629     n = sprintf(buf,
630                 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
631     pdf_add_stream(stream, buf, n);
632   }
633   pdf_add_stream(stream, "endbfrange\n\n", strlen("endbfrange\n\n"));
634 
635   pdf_add_stream(stream, "\n100 beginbfrange\n", strlen("\n100 beginbfrange\n"));
636   for (i = 0x64; i < 0xc8; i++) {
637     n = sprintf(buf,
638                 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
639     pdf_add_stream(stream, buf, n);
640   }
641   pdf_add_stream(stream, "endbfrange\n\n", strlen("endbfrange\n\n"));
642 
643   pdf_add_stream(stream, "\n48 beginbfrange\n", strlen("\n48 beginbfrange\n"));
644   for (i = 0xc8; i <= 0xd7; i++) {
645     n = sprintf(buf,
646                 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
647     pdf_add_stream(stream, buf, n);
648   }
649   for (i = 0xe0; i <= 0xff; i++) {
650     n = sprintf(buf,
651                 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
652     pdf_add_stream(stream, buf, n);
653   }
654   pdf_add_stream(stream, "endbfrange\n\n", strlen("endbfrange\n\n"));
655 
656   pdf_add_stream(stream, CMAP_PART3, strlen(CMAP_PART3));
657 
658   return  stream;
659 }
660 
661 static pdf_obj *
pdf_read_ToUnicode_file(const char * cmap_name)662 pdf_read_ToUnicode_file (const char *cmap_name)
663 {
664   pdf_obj *stream;
665   long     res_id = -1;
666 
667   ASSERT(cmap_name);
668 
669   res_id = pdf_findresource("CMap", cmap_name);
670   if (res_id < 0) {
671     if (!strcmp(cmap_name, "Adobe-Identity-UCS2"))
672       stream = create_dummy_CMap();
673     else {
674       stream = pdf_load_ToUnicode_stream(cmap_name);
675     }
676     if (stream) {
677       res_id   = pdf_defineresource("CMap",
678                                     cmap_name,
679                                     stream, PDF_RES_FLUSH_IMMEDIATE);
680     }
681   }
682 
683   return  (res_id < 0 ? NULL : pdf_get_resource_reference(res_id));
684 }
685 #endif /* !WITHOUT_COMPAT */
686