1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2
3 Copyright (C) 2002-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4 the dvipdfmx project team.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19 */
20
21 /*
22 * Type0 font support:
23 *
24 * TODO:
25 *
26 * Composite font (multiple descendants) - not supported in PDF
27 */
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <string.h>
34
35 #include "system.h"
36 #include "mem.h"
37 #include "error.h"
38 #include "dpxfile.h"
39
40 #include "pdfobj.h"
41 #include "fontmap.h"
42
43 #include "cmap.h"
44 #include "cid.h"
45
46 #include "type0.h"
47
48
49 #define TYPE0FONT_DEBUG_STR "Type0"
50 #define TYPE0FONT_DEBUG 3
51
52 static int __verbose = 0;
53
54 static pdf_obj *pdf_read_ToUnicode_file (const char *cmap_name);
55
56 void
Type0Font_set_verbose(void)57 Type0Font_set_verbose(void)
58 {
59 __verbose++;
60 }
61
62 /*
63 * used_chars:
64 *
65 * Single bit is used for each CIDs since used_chars can be reused as a
66 * stream content of CIDSet by doing so. See, cid.h for add_to_used() and
67 * is_used().
68 */
69
70 static char *
new_used_chars2(void)71 new_used_chars2(void)
72 {
73 char *used_chars;
74
75 used_chars = NEW(8192, char);
76 memset(used_chars, 0, 8192);
77
78 return used_chars;
79 }
80
81 #define FLAG_NONE 0
82 #define FLAG_USED_CHARS_SHARED (1 << 0)
83
84 struct Type0Font {
85 char *fontname; /* BaseFont */
86 char *encoding; /* "Identity-H" or "Identity-V" (not ID) */
87 char *used_chars; /* Used chars (CIDs) */
88
89 /*
90 * Type0 only
91 */
92 CIDFont *descendant; /* Only single descendant is allowed. */
93 int flags;
94 int wmode;
95 int cmap_id;
96
97 /*
98 * PDF Font Resource
99 */
100 pdf_obj *indirect;
101 pdf_obj *fontdict;
102 pdf_obj *descriptor; /* MUST BE NULL */
103 };
104
105 static void
Type0Font_init_font_struct(Type0Font * font)106 Type0Font_init_font_struct (Type0Font *font)
107 {
108 ASSERT(font);
109
110 font->fontname = NULL;
111 font->fontdict = NULL;
112 font->indirect = NULL;
113 font->descriptor = NULL;
114 font->encoding = NULL;
115 font->used_chars = NULL;
116 font->descendant = NULL;
117 font->wmode = -1;
118 font->cmap_id = -1;
119 font->flags = FLAG_NONE;
120
121 return;
122 }
123
124 static void
Type0Font_clean(Type0Font * font)125 Type0Font_clean (Type0Font *font)
126 {
127 if (font) {
128 if (font->fontdict)
129 ERROR("%s: Object not flushed.", TYPE0FONT_DEBUG_STR);
130 if (font->indirect)
131 ERROR("%s: Object not flushed.", TYPE0FONT_DEBUG_STR);
132 if (font->descriptor)
133 ERROR("%s: FontDescriptor unexpected for Type0 font.", TYPE0FONT_DEBUG_STR);
134 if (!(font->flags & FLAG_USED_CHARS_SHARED) && font->used_chars)
135 RELEASE(font->used_chars);
136 if (font->encoding)
137 RELEASE(font->encoding);
138 if (font->fontname)
139 RELEASE(font->fontname);
140 font->fontdict = NULL;
141 font->indirect = NULL;
142 font->descriptor = NULL;
143 font->used_chars = NULL;
144 font->encoding = NULL;
145 font->fontname = NULL;
146 }
147 }
148
149 /* PLEASE FIX THIS */
150 #include "tt_cmap.h"
151
152 static pdf_obj *
Type0Font_create_ToUnicode_stream(Type0Font * font)153 Type0Font_create_ToUnicode_stream(Type0Font *font) {
154 CIDFont *cidfont = font->descendant;
155 return otf_create_ToUnicode_stream(CIDFont_get_ident(cidfont),
156 CIDFont_get_opt_index(cidfont),
157 Type0Font_get_usedchars(font),
158 font->cmap_id);
159 }
160
161 /* Try to load ToUnicode CMap from file system first, if not found fallback to
162 * font CMap reverse lookup. */
163 static pdf_obj *
Type0Font_try_load_ToUnicode_stream(Type0Font * font,char * cmap_base)164 Type0Font_try_load_ToUnicode_stream(Type0Font *font, char *cmap_base) {
165 char *cmap_name = NEW(strlen(cmap_base) + strlen("-UTF-16"), char);
166 pdf_obj *tounicode;
167
168 sprintf(cmap_name, "%s-UTF16", cmap_base);
169 tounicode = pdf_read_ToUnicode_file(cmap_name);
170 if (!tounicode) {
171 sprintf(cmap_name, "%s-UCS2", cmap_base);
172 tounicode = pdf_read_ToUnicode_file(cmap_name);
173 }
174
175 RELEASE(cmap_name);
176
177 if (!tounicode)
178 tounicode = Type0Font_create_ToUnicode_stream(font);
179
180 return tounicode;
181 }
182
183 static void
add_ToUnicode(Type0Font * font)184 add_ToUnicode (Type0Font *font)
185 {
186 pdf_obj *tounicode;
187 CIDFont *cidfont;
188 CIDSysInfo *csi;
189 char *fontname;
190
191 /*
192 * ToUnicode CMap:
193 *
194 * ToUnicode CMaps are usually not required for standard character
195 * collections such as Adobe-Japan1. Identity-H is used for UCS
196 * ordering CID-keyed fonts. External resource must be loaded for
197 * others.
198 */
199
200 cidfont = font->descendant;
201 if (!cidfont) {
202 ERROR("%s: No descendant CID-keyed font.", TYPE0FONT_DEBUG_STR);
203 return;
204 }
205
206 if (CIDFont_is_ACCFont(cidfont)) {
207 /* No need to embed ToUnicode */
208 return;
209 } else if (CIDFont_is_UCSFont(cidfont)) {
210 /*
211 * Old version of dvipdfmx mistakenly used Adobe-Identity as Unicode.
212 */
213 tounicode = pdf_read_ToUnicode_file("Adobe-Identity-UCS2");
214 if (!tounicode) { /* This should work */
215 tounicode = pdf_new_name("Identity-H");
216 }
217 pdf_add_dict(font->fontdict, pdf_new_name("ToUnicode"), tounicode);
218 return;
219 }
220
221 tounicode = NULL;
222 csi = CIDFont_get_CIDSysInfo(cidfont);
223 fontname = CIDFont_get_fontname(cidfont);
224 if (CIDFont_get_embedding(cidfont)) {
225 fontname += 7; /* FIXME */
226 }
227
228 if (!strcmp(csi->registry, "Adobe") &&
229 !strcmp(csi->ordering, "Identity")) {
230 switch (CIDFont_get_subtype(cidfont)) {
231 case CIDFONT_TYPE2:
232 /* PLEASE FIX THIS */
233 tounicode = Type0Font_create_ToUnicode_stream(font);
234 break;
235 default:
236 if (CIDFont_get_flag(cidfont, CIDFONT_FLAG_TYPE1C)) { /* FIXME */
237 tounicode = Type0Font_create_ToUnicode_stream(font);
238 } else if (CIDFont_get_flag(cidfont, CIDFONT_FLAG_TYPE1)) { /* FIXME */
239 /* Font loader will create ToUnicode and set. */
240 return;
241 } else {
242 tounicode = Type0Font_try_load_ToUnicode_stream(font, fontname);
243 }
244 break;
245 }
246 } else {
247 char *cmap_base = NEW(strlen(csi->registry) + strlen(csi->ordering) + 2, char);
248 sprintf(cmap_base, "%s-%s", csi->registry, csi->ordering);
249 tounicode = Type0Font_try_load_ToUnicode_stream(font, cmap_base);
250 RELEASE(cmap_base);
251 }
252
253 if (tounicode) {
254 pdf_add_dict(font->fontdict,
255 pdf_new_name("ToUnicode"), tounicode);
256 } else {
257 WARN("Failed to load ToUnicode CMap for font \"%s\"", fontname);
258 }
259
260 return;
261 }
262
263 void
Type0Font_set_ToUnicode(Type0Font * font,pdf_obj * cmap_ref)264 Type0Font_set_ToUnicode (Type0Font *font, pdf_obj *cmap_ref)
265 {
266 ASSERT(font);
267
268 pdf_add_dict(font->fontdict,
269 pdf_new_name("ToUnicode"), cmap_ref);
270 }
271
272 static void
Type0Font_dofont(Type0Font * font)273 Type0Font_dofont (Type0Font *font)
274 {
275 if (!font || !font->indirect)
276 return;
277
278 if (!pdf_lookup_dict(font->fontdict, "ToUnicode")) { /* FIXME */
279 add_ToUnicode(font);
280 }
281 }
282
283 static void
Type0Font_flush(Type0Font * font)284 Type0Font_flush (Type0Font *font)
285 {
286 if (font) {
287 if (font->fontdict)
288 pdf_release_obj(font->fontdict);
289 font->fontdict = NULL;
290 if (font->indirect)
291 pdf_release_obj(font->indirect);
292 font->indirect = NULL;
293 if (font->descriptor)
294 ERROR("%s: FontDescriptor unexpected for Type0 font.", TYPE0FONT_DEBUG_STR);
295 font->descriptor = NULL;
296 }
297 }
298
299 int
Type0Font_get_wmode(Type0Font * font)300 Type0Font_get_wmode (Type0Font *font)
301 {
302 ASSERT(font);
303
304 return font->wmode;
305 }
306
307 #if 0
308 char *
309 Type0Font_get_encoding (Type0Font *font)
310 {
311 ASSERT(font);
312
313 return font->encoding;
314 }
315 #endif
316
317 char *
Type0Font_get_usedchars(Type0Font * font)318 Type0Font_get_usedchars (Type0Font *font)
319 {
320 ASSERT(font);
321
322 return font->used_chars;
323 }
324
325 pdf_obj *
Type0Font_get_resource(Type0Font * font)326 Type0Font_get_resource (Type0Font *font)
327 {
328 ASSERT(font);
329
330 /*
331 * This looks somewhat strange.
332 */
333 if (!font->indirect) {
334 pdf_obj *array;
335
336 array = pdf_new_array();
337 pdf_add_array(array, CIDFont_get_resource(font->descendant));
338 pdf_add_dict(font->fontdict, pdf_new_name("DescendantFonts"), array);
339 font->indirect = pdf_ref_obj(font->fontdict);
340 }
341
342 return pdf_link_obj(font->indirect);
343 }
344
345 /******************************** CACHE ********************************/
346
347 #define CHECK_ID(n) do {\
348 if ((n) < 0 || (n) >= __cache.count)\
349 ERROR("%s: Invalid ID %d", TYPE0FONT_DEBUG_STR, (n));\
350 } while (0)
351
352 #define CACHE_ALLOC_SIZE 16u
353
354 static struct font_cache {
355 int count;
356 int capacity;
357 Type0Font *fonts;
358 } __cache = {
359 0, 0, NULL
360 };
361
362 void
Type0Font_cache_init(void)363 Type0Font_cache_init (void)
364 {
365 if (__cache.fonts)
366 ERROR("%s: Already initialized.", TYPE0FONT_DEBUG_STR);
367 __cache.count = 0;
368 __cache.capacity = 0;
369 __cache.fonts = NULL;
370 }
371
372 Type0Font *
Type0Font_cache_get(int id)373 Type0Font_cache_get (int id)
374 {
375 CHECK_ID(id);
376
377 return &__cache.fonts[id];
378 }
379
380 int
Type0Font_cache_find(const char * map_name,int cmap_id,fontmap_opt * fmap_opt)381 Type0Font_cache_find (const char *map_name, int cmap_id, fontmap_opt *fmap_opt)
382 {
383 int font_id = -1;
384 Type0Font *font;
385 CIDFont *cidfont;
386 CMap *cmap;
387 CIDSysInfo *csi;
388 char *fontname = NULL;
389 int cid_id = -1, parent_id = -1, wmode = 0;
390 int pdf_ver;
391
392 pdf_ver = pdf_get_version();
393 if (!map_name || cmap_id < 0 || pdf_ver < 2)
394 return -1;
395
396 /*
397 * Encoding is Identity-H or Identity-V according as thier WMode value.
398 *
399 * We do not use match against the map_name since fonts (TrueType) covers
400 * characters across multiple character collection (eg, Adobe-Japan1 and
401 * Adobe-Japan2) must be splited into multiple CID-keyed fonts.
402 */
403
404 cmap = CMap_cache_get(cmap_id);
405 csi = (CMap_is_Identity(cmap)) ? NULL : CMap_get_CIDSysInfo(cmap) ;
406
407 cid_id = CIDFont_cache_find(map_name, csi, fmap_opt);
408
409 if (cid_id < 0)
410 return -1;
411
412 /*
413 * The descendant CID-keyed font has already been registerd.
414 * If CID-keyed font with ID = cid_id is new font, then create new parent
415 * Type 0 font. Otherwise, there already exists parent Type 0 font and
416 * then we find him and return his ID. We must check against their WMode.
417 */
418
419 cidfont = CIDFont_cache_get(cid_id);
420 wmode = CMap_get_wmode(cmap);
421
422 /* Does CID-keyed font already have parent ? */
423 parent_id = CIDFont_get_parent_id(cidfont, wmode);
424 if (parent_id >= 0)
425 return parent_id; /* If so, we don't need new one. */
426
427 /*
428 * CIDFont does not have parent or his parent's WMode does not matched with
429 * wmode. Create new Type0 font.
430 */
431
432 if (__cache.count >= __cache.capacity) {
433 __cache.capacity += CACHE_ALLOC_SIZE;
434 __cache.fonts = RENEW(__cache.fonts, __cache.capacity, struct Type0Font);
435 }
436 font_id = __cache.count;
437 font = &__cache.fonts[font_id];
438
439 Type0Font_init_font_struct(font);
440
441 /*
442 * All CJK double-byte characters are mapped so that resulting
443 * character codes coincide with CIDs of given character collection.
444 * So, the Encoding is always Identity-H for horizontal fonts or
445 * Identity-V for vertical fonts.
446 */
447 if (wmode) {
448 font->encoding = NEW(strlen("Identity-V")+1, char);
449 strcpy(font->encoding, "Identity-V");
450 } else {
451 font->encoding = NEW(strlen("Identity-H")+1, char);
452 strcpy(font->encoding, "Identity-H");
453 }
454 font->wmode = wmode;
455 font->cmap_id = cmap_id;
456
457 /*
458 * Now we start font dictionary.
459 */
460 font->fontdict = pdf_new_dict();
461 pdf_add_dict(font->fontdict, pdf_new_name ("Type"), pdf_new_name ("Font"));
462 pdf_add_dict(font->fontdict, pdf_new_name ("Subtype"), pdf_new_name ("Type0"));
463
464 /*
465 * Type0 font does not have FontDescriptor because it is not a simple font.
466 * Instead, DescendantFonts appears here.
467 *
468 * Up to PDF version 1.5, Type0 font must have single descendant font which
469 * is a CID-keyed font. Future PDF spec. will allow multiple desecendant
470 * fonts.
471 */
472 font->descendant = cidfont;
473 CIDFont_attach_parent(cidfont, font_id, wmode);
474
475 /*
476 * PostScript Font name:
477 *
478 * Type0 font's fontname is usually descendant CID-keyed font's font name
479 * appended by -ENCODING.
480 */
481 fontname = CIDFont_get_fontname(cidfont);
482
483 if (__verbose) {
484 if (CIDFont_get_embedding(cidfont) && strlen(fontname) > 7)
485 MESG("(CID:%s)", fontname+7); /* skip XXXXXX+ */
486 else
487 MESG("(CID:%s)", fontname);
488 }
489
490 /*
491 * The difference between CID-keyed font and TrueType font appears here.
492 *
493 * Glyph substitution for vertical writing is done in CMap mapping process
494 * for CID-keyed fonts. But we must rely on OpenType layout table in the
495 * case of TrueType fonts. So, we must use different used_chars for each
496 * horizontal and vertical fonts in that case.
497 *
498 * In most PDF file, encoding name is not appended to fontname for Type0
499 * fonts having CIDFontType 2 font as their descendant.
500 */
501
502 font->used_chars = NULL;
503 font->flags = FLAG_NONE;
504
505 switch (CIDFont_get_subtype(cidfont)) {
506 case CIDFONT_TYPE0:
507 font->fontname = NEW(strlen(fontname)+strlen(font->encoding)+2, char);
508 sprintf(font->fontname, "%s-%s", fontname, font->encoding);
509 pdf_add_dict(font->fontdict,
510 pdf_new_name("BaseFont"), pdf_new_name(font->fontname));
511 /*
512 * Need used_chars to write W, W2.
513 */
514 if ((parent_id = CIDFont_get_parent_id(cidfont, wmode ? 0 : 1)) < 0) {
515 font->used_chars = new_used_chars2();
516 } else {
517 /* Don't allocate new one. */
518 font->used_chars = Type0Font_get_usedchars(Type0Font_cache_get(parent_id));
519 font->flags |= FLAG_USED_CHARS_SHARED;
520 }
521 break;
522 case CIDFONT_TYPE2:
523 /*
524 * TrueType:
525 *
526 * Use different used_chars for H and V.
527 */
528 pdf_add_dict(font->fontdict,
529 pdf_new_name("BaseFont"), pdf_new_name(fontname));
530 font->used_chars = new_used_chars2();
531 break;
532 default:
533 ERROR("Unrecognized CIDFont Type");
534 break;
535 }
536
537 pdf_add_dict(font->fontdict,
538 pdf_new_name("Encoding"), pdf_new_name(font->encoding));
539
540 __cache.count++;
541
542 return font_id;
543 }
544
545 void
Type0Font_cache_close(void)546 Type0Font_cache_close (void)
547 {
548 int font_id;
549
550 /*
551 * This need to be fixed.
552 *
553 * CIDFont_cache_close() before Type0Font_release because of used_chars.
554 * ToUnicode support want descendant CIDFont's CSI and fontname.
555 */
556 if (__cache.fonts) {
557 for (font_id = 0; font_id < __cache.count; font_id++)
558 Type0Font_dofont(&__cache.fonts[font_id]);
559 }
560 CIDFont_cache_close();
561 if (__cache.fonts) {
562 for (font_id = 0; font_id < __cache.count; font_id++) {
563 Type0Font_flush(&__cache.fonts[font_id]);
564 Type0Font_clean(&__cache.fonts[font_id]);
565 }
566 RELEASE(__cache.fonts);
567 }
568 __cache.fonts = NULL;
569 __cache.count = 0;
570 __cache.capacity = 0;
571 }
572
573 /******************************** COMPAT ********************************/
574
575 #ifndef WITHOUT_COMPAT
576
577 #include "cmap_read.h"
578 #include "cmap_write.h"
579 #include "pdfresource.h"
580 #include "pdfencoding.h"
581
582 static pdf_obj *
create_dummy_CMap(void)583 create_dummy_CMap (void)
584 {
585 pdf_obj *stream;
586 char buf[32];
587 int i, n;
588
589 #define CMAP_PART0 "\
590 %!PS-Adobe-3.0 Resource-CMap\n\
591 %%DocumentNeededResources: ProcSet (CIDInit)\n\
592 %%IncludeResource: ProcSet (CIDInit)\n\
593 %%BeginResource: CMap (Adobe-Identity-UCS2)\n\
594 %%Title: (Adobe-Identity-UCS2 Adobe UCS2 0)\n\
595 %%Version: 1.0\n\
596 %%Copyright:\n\
597 %% ---\n\
598 %%EndComments\n\n\
599 "
600 #define CMAP_PART1 "\
601 /CIDInit /ProcSet findresource begin\n\
602 \n\
603 12 dict begin\n\nbegincmap\n\n\
604 /CIDSystemInfo 3 dict dup begin\n\
605 /Registry (Adobe) def\n\
606 /Ordering (UCS2) def\n\
607 /Supplement 0 def\n\
608 end def\n\n\
609 /CMapName /Adobe-Identity-UCS2 def\n\
610 /CMapVersion 1.0 def\n\
611 /CMapType 2 def\n\n\
612 2 begincodespacerange\n\
613 <0000> <FFFF>\n\
614 endcodespacerange\n\
615 "
616 #define CMAP_PART3 "\
617 endcmap\n\n\
618 CMapName currentdict /CMap defineresource pop\n\n\
619 end\nend\n\n\
620 %%EndResource\n\
621 %%EOF\n\
622 "
623
624 stream = pdf_new_stream(STREAM_COMPRESS);
625 pdf_add_stream(stream, CMAP_PART0, strlen(CMAP_PART0));
626 pdf_add_stream(stream, CMAP_PART1, strlen(CMAP_PART1));
627 pdf_add_stream(stream, "\n100 beginbfrange\n", strlen("\n100 beginbfrange\n"));
628 for (i = 0; i < 0x64; i++) {
629 n = sprintf(buf,
630 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
631 pdf_add_stream(stream, buf, n);
632 }
633 pdf_add_stream(stream, "endbfrange\n\n", strlen("endbfrange\n\n"));
634
635 pdf_add_stream(stream, "\n100 beginbfrange\n", strlen("\n100 beginbfrange\n"));
636 for (i = 0x64; i < 0xc8; i++) {
637 n = sprintf(buf,
638 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
639 pdf_add_stream(stream, buf, n);
640 }
641 pdf_add_stream(stream, "endbfrange\n\n", strlen("endbfrange\n\n"));
642
643 pdf_add_stream(stream, "\n48 beginbfrange\n", strlen("\n48 beginbfrange\n"));
644 for (i = 0xc8; i <= 0xd7; i++) {
645 n = sprintf(buf,
646 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
647 pdf_add_stream(stream, buf, n);
648 }
649 for (i = 0xe0; i <= 0xff; i++) {
650 n = sprintf(buf,
651 "<%02X00> <%02XFF> <%02X00>\n", i, i, i);
652 pdf_add_stream(stream, buf, n);
653 }
654 pdf_add_stream(stream, "endbfrange\n\n", strlen("endbfrange\n\n"));
655
656 pdf_add_stream(stream, CMAP_PART3, strlen(CMAP_PART3));
657
658 return stream;
659 }
660
661 static pdf_obj *
pdf_read_ToUnicode_file(const char * cmap_name)662 pdf_read_ToUnicode_file (const char *cmap_name)
663 {
664 pdf_obj *stream;
665 long res_id = -1;
666
667 ASSERT(cmap_name);
668
669 res_id = pdf_findresource("CMap", cmap_name);
670 if (res_id < 0) {
671 if (!strcmp(cmap_name, "Adobe-Identity-UCS2"))
672 stream = create_dummy_CMap();
673 else {
674 stream = pdf_load_ToUnicode_stream(cmap_name);
675 }
676 if (stream) {
677 res_id = pdf_defineresource("CMap",
678 cmap_name,
679 stream, PDF_RES_FLUSH_IMMEDIATE);
680 }
681 }
682
683 return (res_id < 0 ? NULL : pdf_get_resource_reference(res_id));
684 }
685 #endif /* !WITHOUT_COMPAT */
686