1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3
4 #include <assert.h>
5
6 #include <ft2build.h>
7 #include FT_FREETYPE_H
8 #include FT_ADVANCES_H
9 #ifdef FT_FONT_FORMATS_H
10 #include FT_FONT_FORMATS_H
11 #else
12 #include FT_XFREE86_H
13 #endif
14 #include FT_TRUETYPE_TABLES_H
15
16 #ifndef FT_SFNT_HEAD
17 #define FT_SFNT_HEAD ft_sfnt_head
18 #endif
19
20 void
pdf_load_encoding(const char ** estrings,const char * encoding)21 pdf_load_encoding(const char **estrings, const char *encoding)
22 {
23 const char * const *bstrings = NULL;
24 int i;
25
26 if (!strcmp(encoding, "StandardEncoding"))
27 bstrings = fz_glyph_name_from_adobe_standard;
28 if (!strcmp(encoding, "MacRomanEncoding"))
29 bstrings = fz_glyph_name_from_mac_roman;
30 if (!strcmp(encoding, "MacExpertEncoding"))
31 bstrings = fz_glyph_name_from_mac_expert;
32 if (!strcmp(encoding, "WinAnsiEncoding"))
33 bstrings = fz_glyph_name_from_win_ansi;
34
35 if (bstrings)
36 for (i = 0; i < 256; i++)
37 estrings[i] = bstrings[i];
38 }
39
40 static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
41 const char *collection, const char *basefont, int iscidfont);
42
43 static const char *base_font_names[][10] =
44 {
45 { "Courier", "CourierNew", "CourierNewPSMT", NULL },
46 { "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
47 "CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
48 { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
49 "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
50 { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
51 "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
52 { "Helvetica", "ArialMT", "Arial", NULL },
53 { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
54 "Helvetica,Bold", NULL },
55 { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
56 "Helvetica,Italic", "Helvetica-Italic", NULL },
57 { "Helvetica-BoldOblique", "Arial-BoldItalicMT",
58 "Arial,BoldItalic", "Arial-BoldItalic",
59 "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
60 { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
61 "TimesNewRomanPS", NULL },
62 { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
63 "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
64 { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
65 "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
66 { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
67 "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
68 "TimesNewRoman-BoldItalic", NULL },
69 { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
70 "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
71 { "ZapfDingbats", NULL }
72 };
73
74 const unsigned char *
pdf_lookup_substitute_font(fz_context * ctx,int mono,int serif,int bold,int italic,int * len)75 pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len)
76 {
77 if (mono) {
78 if (bold) {
79 if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len);
80 else return fz_lookup_base14_font(ctx, "Courier-Bold", len);
81 } else {
82 if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len);
83 else return fz_lookup_base14_font(ctx, "Courier", len);
84 }
85 } else if (serif) {
86 if (bold) {
87 if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len);
88 else return fz_lookup_base14_font(ctx, "Times-Bold", len);
89 } else {
90 if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len);
91 else return fz_lookup_base14_font(ctx, "Times-Roman", len);
92 }
93 } else {
94 if (bold) {
95 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len);
96 else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len);
97 } else {
98 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len);
99 else return fz_lookup_base14_font(ctx, "Helvetica", len);
100 }
101 }
102 }
103
is_dynalab(char * name)104 static int is_dynalab(char *name)
105 {
106 if (strstr(name, "HuaTian"))
107 return 1;
108 if (strstr(name, "MingLi"))
109 return 1;
110 if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
111 return 1;
112 if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
113 return 1;
114 return 0;
115 }
116
strcmp_ignore_space(const char * a,const char * b)117 static int strcmp_ignore_space(const char *a, const char *b)
118 {
119 while (1)
120 {
121 while (*a == ' ')
122 a++;
123 while (*b == ' ')
124 b++;
125 if (*a != *b)
126 return 1;
127 if (*a == 0)
128 return *a != *b;
129 if (*b == 0)
130 return *a != *b;
131 a++;
132 b++;
133 }
134 }
135
pdf_clean_font_name(const char * fontname)136 const char *pdf_clean_font_name(const char *fontname)
137 {
138 int i, k;
139 for (i = 0; i < (int)nelem(base_font_names); i++)
140 for (k = 0; base_font_names[i][k]; k++)
141 if (!strcmp_ignore_space(base_font_names[i][k], fontname))
142 return base_font_names[i][0];
143 return fontname;
144 }
145
146 /*
147 * FreeType and Rendering glue
148 */
149
150 enum { UNKNOWN, TYPE1, TRUETYPE };
151
ft_kind(FT_Face face)152 static int ft_kind(FT_Face face)
153 {
154 #ifdef FT_FONT_FORMATS_H
155 const char *kind = FT_Get_Font_Format(face);
156 #else
157 const char *kind = FT_Get_X11_Font_Format(face);
158 #endif
159 if (!strcmp(kind, "TrueType")) return TRUETYPE;
160 if (!strcmp(kind, "Type 1")) return TYPE1;
161 if (!strcmp(kind, "CFF")) return TYPE1;
162 if (!strcmp(kind, "CID Type 1")) return TYPE1;
163 return UNKNOWN;
164 }
165
ft_cid_to_gid(pdf_font_desc * fontdesc,int cid)166 static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
167 {
168 if (fontdesc->to_ttf_cmap)
169 {
170 cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
171
172 /* vertical presentation forms */
173 if (fontdesc->font->flags.ft_substitute && fontdesc->wmode)
174 {
175 switch (cid)
176 {
177 case 0x0021: cid = 0xFE15; break; /* ! */
178 case 0x0028: cid = 0xFE35; break; /* ( */
179 case 0x0029: cid = 0xFE36; break; /* ) */
180 case 0x002C: cid = 0xFE10; break; /* , */
181 case 0x003A: cid = 0xFE13; break; /* : */
182 case 0x003B: cid = 0xFE14; break; /* ; */
183 case 0x003F: cid = 0xFE16; break; /* ? */
184 case 0x005B: cid = 0xFE47; break; /* [ */
185 case 0x005D: cid = 0xFE48; break; /* ] */
186 case 0x005F: cid = 0xFE33; break; /* _ */
187 case 0x007B: cid = 0xFE37; break; /* { */
188 case 0x007D: cid = 0xFE38; break; /* } */
189 case 0x2013: cid = 0xFE32; break; /* EN DASH */
190 case 0x2014: cid = 0xFE31; break; /* EM DASH */
191 case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */
192 case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */
193 case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */
194 case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */
195 case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */
196 case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */
197 case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */
198 case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */
199 case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */
200 case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */
201 case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */
202 case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */
203 case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */
204 case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */
205 case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */
206 case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */
207 case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */
208 case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */
209
210 case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */
211 case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */
212 case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */
213 case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */
214 case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */
215 case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */
216 case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */
217 case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */
218 case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */
219 case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */
220 case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */
221 case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */
222
223 case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
224 case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */
225 }
226 }
227
228 return ft_char_index(fontdesc->font->ft_face, cid);
229 }
230
231 if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0)
232 return fontdesc->cid_to_gid[cid];
233
234 return cid;
235 }
236
237 int
pdf_font_cid_to_gid(fz_context * ctx,pdf_font_desc * fontdesc,int cid)238 pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
239 {
240 if (fontdesc->font->ft_face)
241 return ft_cid_to_gid(fontdesc, cid);
242 return cid;
243 }
244
ft_width(fz_context * ctx,pdf_font_desc * fontdesc,int cid)245 static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
246 {
247 int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM;
248 int gid = ft_cid_to_gid(fontdesc, cid);
249 FT_Fixed adv = 0;
250 int fterr;
251 FT_Face face = fontdesc->font->ft_face;
252 FT_UShort units_per_EM;
253
254 fterr = FT_Get_Advance(face, gid, mask, &adv);
255 if (fterr && fterr != FT_Err_Invalid_Argument)
256 fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr));
257
258 units_per_EM = face->units_per_EM;
259 if (units_per_EM == 0)
260 units_per_EM = 2048;
261
262 return adv * 1000 / units_per_EM;
263 }
264
265 static const struct { int code; const char *name; } mre_diff_table[] =
266 {
267 { 173, "notequal" },
268 { 176, "infinity" },
269 { 178, "lessequal" },
270 { 179, "greaterequal" },
271 { 182, "partialdiff" },
272 { 183, "summation" },
273 { 184, "product" },
274 { 185, "pi" },
275 { 186, "integral" },
276 { 189, "Omega" },
277 { 195, "radical" },
278 { 197, "approxequal" },
279 { 198, "Delta" },
280 { 215, "lozenge" },
281 { 219, "Euro" },
282 { 240, "apple" },
283 };
284
lookup_mre_code(const char * name)285 static int lookup_mre_code(const char *name)
286 {
287 int i;
288 for (i = 0; i < (int)nelem(mre_diff_table); ++i)
289 if (!strcmp(name, mre_diff_table[i].name))
290 return mre_diff_table[i].code;
291 for (i = 0; i < 256; i++)
292 if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i]))
293 return i;
294 return -1;
295 }
296
ft_find_glyph_by_unicode_name(FT_Face face,const char * name)297 static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name)
298 {
299 int unicode, glyph;
300
301 /* Prefer exact unicode match if available. */
302 unicode = fz_unicode_from_glyph_name_strict(name);
303 if (unicode > 0)
304 {
305 glyph = ft_char_index(face, unicode);
306 if (glyph > 0)
307 return glyph;
308 }
309
310 /* Fall back to font glyph name if we can. */
311 glyph = ft_name_index(face, name);
312 if (glyph > 0)
313 return glyph;
314
315 /* Fuzzy unicode match as last attempt. */
316 unicode = fz_unicode_from_glyph_name(name);
317 if (unicode > 0)
318 return ft_char_index(face, unicode);
319
320 /* Failed. */
321 return 0;
322 }
323
324 /*
325 * Load font files.
326 */
327
328 static void
pdf_load_builtin_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,int has_descriptor)329 pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor)
330 {
331 FT_Face face;
332 const char *clean_name = pdf_clean_font_name(fontname);
333 if (clean_name == fontname)
334 clean_name = "Times-Roman";
335
336 fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor);
337 if (!fontdesc->font)
338 {
339 const unsigned char *data;
340 int len;
341
342 data = fz_lookup_base14_font(ctx, clean_name, &len);
343 if (!data)
344 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname);
345
346 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
347 fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times");
348 }
349
350 if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats"))
351 fontdesc->flags |= PDF_FD_SYMBOLIC;
352
353 face = fontdesc->font->ft_face;
354 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
355 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
356 }
357
358 static void
pdf_load_substitute_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,int mono,int serif,int bold,int italic)359 pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic)
360 {
361 fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0);
362 if (!fontdesc->font)
363 {
364 const unsigned char *data;
365 int len;
366
367 data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len);
368 if (!data)
369 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font");
370
371 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
372 fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold;
373 fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic;
374
375 fontdesc->font->flags.is_mono = mono;
376 fontdesc->font->flags.is_serif = serif;
377 fontdesc->font->flags.is_bold = bold;
378 fontdesc->font->flags.is_italic = italic;
379 }
380
381 fontdesc->font->flags.ft_substitute = 1;
382 fontdesc->font->flags.ft_stretch = 1;
383 }
384
385 static void
pdf_load_substitute_cjk_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,int ros,int serif)386 pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif)
387 {
388 fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif);
389 if (!fontdesc->font)
390 {
391 const unsigned char *data;
392 int size;
393 int subfont;
394
395 data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
396 if (!data)
397 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font");
398
399 /* A glyph bbox cache is too big for CJK fonts. */
400 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
401 }
402
403 fontdesc->font->flags.ft_substitute = 1;
404 fontdesc->font->flags.ft_stretch = 0;
405 }
406
407 static void
pdf_load_system_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,const char * collection)408 pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection)
409 {
410 int bold = 0;
411 int italic = 0;
412 int serif = 0;
413 int mono = 0;
414
415 if (strstr(fontname, "Bold"))
416 bold = 1;
417 if (strstr(fontname, "Italic"))
418 italic = 1;
419 if (strstr(fontname, "Oblique"))
420 italic = 1;
421
422 if (fontdesc->flags & PDF_FD_FIXED_PITCH)
423 mono = 1;
424 if (fontdesc->flags & PDF_FD_SERIF)
425 serif = 1;
426 if (fontdesc->flags & PDF_FD_ITALIC)
427 italic = 1;
428 if (fontdesc->flags & PDF_FD_FORCE_BOLD)
429 bold = 1;
430
431 if (collection)
432 {
433 if (!strcmp(collection, "Adobe-CNS1"))
434 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
435 else if (!strcmp(collection, "Adobe-GB1"))
436 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
437 else if (!strcmp(collection, "Adobe-Japan1"))
438 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
439 else if (!strcmp(collection, "Adobe-Korea1"))
440 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
441 else
442 {
443 if (strcmp(collection, "Adobe-Identity") != 0)
444 fz_warn(ctx, "unknown cid collection: %s", collection);
445 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
446 }
447 }
448 else
449 {
450 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
451 }
452 }
453
454 static void
pdf_load_embedded_font(fz_context * ctx,pdf_document * doc,pdf_font_desc * fontdesc,const char * fontname,pdf_obj * stmref)455 pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref)
456 {
457 fz_buffer *buf;
458
459 buf = pdf_load_stream(ctx, stmref);
460 fz_try(ctx)
461 fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1);
462 fz_always(ctx)
463 fz_drop_buffer(ctx, buf);
464 fz_catch(ctx)
465 fz_rethrow(ctx);
466
467 fontdesc->size += fz_buffer_storage(ctx, buf, NULL);
468 fontdesc->is_embedded = 1;
469 }
470
471 /*
472 * Create and destroy
473 */
474
475 pdf_font_desc *
pdf_keep_font(fz_context * ctx,pdf_font_desc * fontdesc)476 pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc)
477 {
478 return fz_keep_storable(ctx, &fontdesc->storable);
479 }
480
481 void
pdf_drop_font(fz_context * ctx,pdf_font_desc * fontdesc)482 pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc)
483 {
484 fz_drop_storable(ctx, &fontdesc->storable);
485 }
486
487 static void
pdf_drop_font_imp(fz_context * ctx,fz_storable * fontdesc_)488 pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_)
489 {
490 pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_;
491
492 fz_drop_font(ctx, fontdesc->font);
493 pdf_drop_cmap(ctx, fontdesc->encoding);
494 pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap);
495 pdf_drop_cmap(ctx, fontdesc->to_unicode);
496 fz_free(ctx, fontdesc->cid_to_gid);
497 fz_free(ctx, fontdesc->cid_to_ucs);
498 fz_free(ctx, fontdesc->hmtx);
499 fz_free(ctx, fontdesc->vmtx);
500 fz_free(ctx, fontdesc);
501 }
502
503 pdf_font_desc *
pdf_new_font_desc(fz_context * ctx)504 pdf_new_font_desc(fz_context *ctx)
505 {
506 pdf_font_desc *fontdesc;
507
508 fontdesc = fz_malloc_struct(ctx, pdf_font_desc);
509 FZ_INIT_STORABLE(fontdesc, 1, pdf_drop_font_imp);
510 fontdesc->size = sizeof(pdf_font_desc);
511
512 fontdesc->font = NULL;
513
514 fontdesc->flags = 0;
515 fontdesc->italic_angle = 0;
516 fontdesc->ascent = 800;
517 fontdesc->descent = -200;
518 fontdesc->cap_height = 800;
519 fontdesc->x_height = 500;
520 fontdesc->missing_width = 0;
521
522 fontdesc->encoding = NULL;
523 fontdesc->to_ttf_cmap = NULL;
524 fontdesc->cid_to_gid_len = 0;
525 fontdesc->cid_to_gid = NULL;
526
527 fontdesc->to_unicode = NULL;
528 fontdesc->cid_to_ucs_len = 0;
529 fontdesc->cid_to_ucs = NULL;
530
531 fontdesc->wmode = 0;
532
533 fontdesc->hmtx_cap = 0;
534 fontdesc->vmtx_cap = 0;
535 fontdesc->hmtx_len = 0;
536 fontdesc->vmtx_len = 0;
537 fontdesc->hmtx = NULL;
538 fontdesc->vmtx = NULL;
539
540 fontdesc->dhmtx.lo = 0x0000;
541 fontdesc->dhmtx.hi = 0xFFFF;
542 fontdesc->dhmtx.w = 1000;
543
544 fontdesc->dvmtx.lo = 0x0000;
545 fontdesc->dvmtx.hi = 0xFFFF;
546 fontdesc->dvmtx.x = 0;
547 fontdesc->dvmtx.y = 880;
548 fontdesc->dvmtx.w = -1000;
549
550 fontdesc->is_embedded = 0;
551
552 return fontdesc;
553 }
554
555 /*
556 * Simple fonts (Type1 and TrueType)
557 */
558
559 static FT_CharMap
select_type1_cmap(FT_Face face)560 select_type1_cmap(FT_Face face)
561 {
562 int i;
563 for (i = 0; i < face->num_charmaps; i++)
564 if (face->charmaps[i]->platform_id == 7)
565 return face->charmaps[i];
566 if (face->num_charmaps > 0)
567 return face->charmaps[0];
568 return NULL;
569 }
570
571 static FT_CharMap
select_truetype_cmap(FT_Face face,int symbolic)572 select_truetype_cmap(FT_Face face, int symbolic)
573 {
574 int i;
575
576 /* First look for a Microsoft symbolic cmap, if applicable */
577 if (symbolic)
578 {
579 for (i = 0; i < face->num_charmaps; i++)
580 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0)
581 return face->charmaps[i];
582 }
583
584 /* Then look for a Microsoft Unicode cmap */
585 for (i = 0; i < face->num_charmaps; i++)
586 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1)
587 if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
588 return face->charmaps[i];
589
590 /* Finally look for an Apple MacRoman cmap */
591 for (i = 0; i < face->num_charmaps; i++)
592 if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0)
593 if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
594 return face->charmaps[i];
595
596 if (face->num_charmaps > 0)
597 if (FT_Get_CMap_Format(face->charmaps[0]) != -1)
598 return face->charmaps[0];
599 return NULL;
600 }
601
602 static FT_CharMap
select_unknown_cmap(FT_Face face)603 select_unknown_cmap(FT_Face face)
604 {
605 if (face->num_charmaps > 0)
606 return face->charmaps[0];
607 return NULL;
608 }
609
610 static pdf_font_desc *
pdf_load_simple_font(fz_context * ctx,pdf_document * doc,pdf_obj * dict)611 pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
612 {
613 const char *basefont;
614 pdf_obj *descriptor;
615 pdf_obj *encoding;
616 pdf_obj *widths;
617 unsigned short *etable = NULL;
618 pdf_font_desc *fontdesc = NULL;
619 pdf_obj *subtype;
620 FT_Face face;
621 FT_CharMap cmap;
622 int symbolic;
623 int kind;
624 int glyph;
625
626 const char *estrings[256];
627 char ebuffer[256][32];
628 int i, k, n;
629 int fterr;
630 int has_lock = 0;
631
632 fz_var(fontdesc);
633 fz_var(etable);
634 fz_var(has_lock);
635
636 /* Load font file */
637 fz_try(ctx)
638 {
639 fontdesc = pdf_new_font_desc(ctx);
640
641 basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont)));
642
643 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
644 if (descriptor)
645 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0);
646 else
647 pdf_load_builtin_font(ctx, fontdesc, basefont, 0);
648
649 /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
650 if (descriptor && pdf_is_string(ctx, pdf_dict_get(ctx, descriptor, PDF_NAME(FontName))) &&
651 !pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)) &&
652 pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Encoding)), PDF_NAME(WinAnsiEncoding)) &&
653 pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) == 4)
654 {
655 char *cp936fonts[] = {
656 "\xCB\xCE\xCC\xE5", "SimSun,Regular",
657 "\xBA\xDA\xCC\xE5", "SimHei,Regular",
658 "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
659 "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
660 "\xC1\xA5\xCA\xE9", "SimLi,Regular",
661 NULL
662 };
663 for (i = 0; cp936fonts[i]; i += 2)
664 if (!strcmp(basefont, cp936fonts[i]))
665 break;
666 if (cp936fonts[i])
667 {
668 fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
669 pdf_drop_font(ctx, fontdesc);
670 fontdesc = NULL;
671 fontdesc = pdf_new_font_desc(ctx);
672 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0);
673 fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
674 fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
675 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
676
677 goto skip_encoding;
678 }
679 }
680
681 face = fontdesc->font->ft_face;
682 kind = ft_kind(face);
683
684 /* Encoding */
685
686 symbolic = fontdesc->flags & 4;
687
688 if (kind == TYPE1)
689 cmap = select_type1_cmap(face);
690 else if (kind == TRUETYPE)
691 cmap = select_truetype_cmap(face, symbolic);
692 else
693 cmap = select_unknown_cmap(face);
694
695 if (cmap)
696 {
697 fterr = FT_Set_Charmap(face, cmap);
698 if (fterr)
699 fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
700 }
701 else
702 fz_warn(ctx, "freetype could not find any cmaps");
703
704 /* FIXME: etable may leak on error. */
705 etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid");
706 fontdesc->size += 256 * sizeof(unsigned short);
707 for (i = 0; i < 256; i++)
708 {
709 estrings[i] = NULL;
710 etable[i] = 0;
711 }
712
713 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
714 if (encoding)
715 {
716 if (pdf_is_name(ctx, encoding))
717 pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));
718
719 if (pdf_is_dict(ctx, encoding))
720 {
721 pdf_obj *base, *diff, *item;
722
723 base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
724 if (pdf_is_name(ctx, base))
725 pdf_load_encoding(estrings, pdf_to_name(ctx, base));
726 else if (!fontdesc->is_embedded && !symbolic)
727 pdf_load_encoding(estrings, "StandardEncoding");
728
729 diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences));
730 if (pdf_is_array(ctx, diff))
731 {
732 n = pdf_array_len(ctx, diff);
733 k = 0;
734 for (i = 0; i < n; i++)
735 {
736 item = pdf_array_get(ctx, diff, i);
737 if (pdf_is_int(ctx, item))
738 k = pdf_to_int(ctx, item);
739 if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings))
740 estrings[k++] = pdf_to_name(ctx, item);
741 }
742 }
743 }
744 }
745 else if (!fontdesc->is_embedded && !symbolic)
746 pdf_load_encoding(estrings, "StandardEncoding");
747
748 /* start with the builtin encoding */
749 for (i = 0; i < 256; i++)
750 etable[i] = ft_char_index(face, i);
751
752 fz_lock(ctx, FZ_LOCK_FREETYPE);
753 has_lock = 1;
754
755 /* built-in and substitute fonts may be a different type than what the document expects */
756 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
757 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
758 kind = TYPE1;
759 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
760 kind = TYPE1;
761 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
762 kind = TRUETYPE;
763 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
764 kind = TYPE1;
765 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
766 kind = TRUETYPE;
767
768 /* encode by glyph name where we can */
769 if (kind == TYPE1)
770 {
771 for (i = 0; i < 256; i++)
772 {
773 if (estrings[i])
774 {
775 glyph = ft_name_index(face, estrings[i]);
776 if (glyph > 0)
777 etable[i] = glyph;
778 }
779 }
780 }
781
782 /* encode by glyph name where we can */
783 if (kind == TRUETYPE)
784 {
785 /* Unicode cmap */
786 if (!symbolic && face->charmap && face->charmap->platform_id == 3)
787 {
788 for (i = 0; i < 256; i++)
789 {
790 if (estrings[i])
791 {
792 glyph = ft_find_glyph_by_unicode_name(face, estrings[i]);
793 if (glyph > 0)
794 etable[i] = glyph;
795 }
796 }
797 }
798
799 /* MacRoman cmap */
800 else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
801 {
802 for (i = 0; i < 256; i++)
803 {
804 if (estrings[i])
805 {
806 int mrcode = lookup_mre_code(estrings[i]);
807 glyph = 0;
808 if (mrcode > 0)
809 glyph = ft_char_index(face, mrcode);
810 if (glyph == 0)
811 glyph = ft_name_index(face, estrings[i]);
812 if (glyph > 0)
813 etable[i] = glyph;
814 }
815 }
816 }
817
818 /* Symbolic cmap */
819 else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
820 {
821 for (i = 0; i < 256; i++)
822 {
823 if (estrings[i])
824 {
825 glyph = ft_name_index(face, estrings[i]);
826 if (glyph > 0)
827 etable[i] = glyph;
828 }
829 }
830 }
831 }
832
833 /* try to reverse the glyph names from the builtin encoding */
834 for (i = 0; i < 256; i++)
835 {
836 if (etable[i] && !estrings[i])
837 {
838 if (FT_HAS_GLYPH_NAMES(face))
839 {
840 fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
841 if (fterr)
842 fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
843 if (ebuffer[i][0])
844 estrings[i] = ebuffer[i];
845 }
846 else
847 {
848 estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */
849 }
850 }
851 }
852
853 /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
854 if (kind == TYPE1 && symbolic)
855 {
856 for (i = 0; i < 256; i++)
857 if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i]))
858 estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i];
859 }
860
861 fz_unlock(ctx, FZ_LOCK_FREETYPE);
862 has_lock = 0;
863
864 fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
865 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
866 fontdesc->cid_to_gid_len = 256;
867 fontdesc->cid_to_gid = etable;
868
869 fz_try(ctx)
870 {
871 pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)));
872 }
873 fz_catch(ctx)
874 {
875 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
876 fz_warn(ctx, "cannot load ToUnicode CMap");
877 }
878
879 skip_encoding:
880
881 /* Widths */
882
883 pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);
884
885 widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths));
886 if (widths)
887 {
888 int first, last;
889
890 first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar));
891 last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar));
892
893 if (first < 0 || last > 255 || first > last)
894 first = last = 0;
895
896 for (i = 0; i < last - first + 1; i++)
897 {
898 int wid = pdf_array_get_int(ctx, widths, i);
899 pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
900 }
901 }
902 else
903 {
904 for (i = 0; i < 256; i++)
905 pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
906 }
907
908 pdf_end_hmtx(ctx, fontdesc);
909 }
910 fz_catch(ctx)
911 {
912 if (has_lock)
913 fz_unlock(ctx, FZ_LOCK_FREETYPE);
914 if (fontdesc && etable != fontdesc->cid_to_gid)
915 fz_free(ctx, etable);
916 pdf_drop_font(ctx, fontdesc);
917 fz_rethrow(ctx);
918 }
919 return fontdesc;
920 }
921
922 static int
hail_mary_make_hash_key(fz_context * ctx,fz_store_hash * hash,void * key_)923 hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_)
924 {
925 hash->u.pi.i = 0;
926 hash->u.pi.ptr = NULL;
927 return 1;
928 }
929
930 static void *
hail_mary_keep_key(fz_context * ctx,void * key)931 hail_mary_keep_key(fz_context *ctx, void *key)
932 {
933 return key;
934 }
935
936 static void
hail_mary_drop_key(fz_context * ctx,void * key)937 hail_mary_drop_key(fz_context *ctx, void *key)
938 {
939 }
940
941 static int
hail_mary_cmp_key(fz_context * ctx,void * k0,void * k1)942 hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1)
943 {
944 return k0 == k1;
945 }
946
947 static void
hail_mary_format_key(fz_context * ctx,char * s,size_t n,void * key_)948 hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_)
949 {
950 fz_strlcpy(s, "(hail mary font)", n);
951 }
952
953 static int hail_mary_store_key; /* Dummy */
954
955 static const fz_store_type hail_mary_store_type =
956 {
957 "hail-mary",
958 hail_mary_make_hash_key,
959 hail_mary_keep_key,
960 hail_mary_drop_key,
961 hail_mary_cmp_key,
962 hail_mary_format_key,
963 NULL
964 };
965
966 pdf_font_desc *
pdf_load_hail_mary_font(fz_context * ctx,pdf_document * doc)967 pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc)
968 {
969 pdf_font_desc *fontdesc;
970 pdf_font_desc *existing;
971
972 if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL)
973 {
974 return fontdesc;
975 }
976
977 /* FIXME: Get someone with a clue about fonts to fix this */
978 fontdesc = pdf_load_simple_font(ctx, doc, NULL);
979
980 existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type);
981 assert(existing == NULL);
982 (void)existing; /* Silence warning in release builds */
983
984 return fontdesc;
985 }
986
987 /*
988 * CID Fonts
989 */
990
991 static pdf_font_desc *
load_cid_font(fz_context * ctx,pdf_document * doc,pdf_obj * dict,pdf_obj * encoding,pdf_obj * to_unicode)992 load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
993 {
994 pdf_obj *widths;
995 pdf_obj *descriptor;
996 pdf_font_desc *fontdesc = NULL;
997 fz_buffer *buf = NULL;
998 pdf_cmap *cmap;
999 FT_Face face;
1000 char collection[256];
1001 const char *basefont;
1002 int i, k, fterr;
1003 pdf_obj *cidtogidmap;
1004 pdf_obj *obj;
1005 int dw;
1006
1007 fz_var(fontdesc);
1008 fz_var(buf);
1009
1010 fz_try(ctx)
1011 {
1012 /* Get font name and CID collection */
1013
1014 basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont)));
1015
1016 {
1017 pdf_obj *cidinfo;
1018 const char *reg, *ord;
1019
1020 cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo));
1021 if (!cidinfo)
1022 fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing info");
1023
1024 reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL);
1025 ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL);
1026 fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord);
1027 }
1028
1029 /* Encoding */
1030
1031 if (pdf_is_name(ctx, encoding))
1032 {
1033 cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding));
1034 }
1035 else if (pdf_is_indirect(ctx, encoding))
1036 {
1037 cmap = pdf_load_embedded_cmap(ctx, doc, encoding);
1038 }
1039 else
1040 {
1041 fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding");
1042 }
1043
1044 /* Load font file */
1045
1046 fontdesc = pdf_new_font_desc(ctx);
1047
1048 fontdesc->encoding = cmap;
1049 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1050
1051 pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));
1052
1053 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
1054 if (!descriptor)
1055 fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor");
1056 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1);
1057
1058 face = fontdesc->font->ft_face;
1059
1060 /* Apply encoding */
1061
1062 cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap));
1063 if (pdf_is_stream(ctx, cidtogidmap))
1064 {
1065 size_t z, len;
1066 unsigned char *data;
1067
1068 buf = pdf_load_stream(ctx, cidtogidmap);
1069
1070 len = fz_buffer_storage(ctx, buf, &data);
1071 fontdesc->cid_to_gid_len = len / 2;
1072 fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map");
1073 fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
1074 for (z = 0; z < fontdesc->cid_to_gid_len; z++)
1075 fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1];
1076 }
1077 else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap))
1078 {
1079 fz_warn(ctx, "ignoring unknown CIDToGIDMap entry");
1080 }
1081
1082 /* if font is external, cidtogidmap should not be identity */
1083 /* so we map from cid to unicode and then map that through the (3 1) */
1084 /* unicode cmap to get a glyph id */
1085 else if (fontdesc->font->flags.ft_substitute)
1086 {
1087 fterr = FT_Select_Charmap(face, ft_encoding_unicode);
1088 if (fterr)
1089 fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
1090
1091 if (!strcmp(collection, "Adobe-CNS1"))
1092 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
1093 else if (!strcmp(collection, "Adobe-GB1"))
1094 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
1095 else if (!strcmp(collection, "Adobe-Japan1"))
1096 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
1097 else if (!strcmp(collection, "Adobe-Japan2"))
1098 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
1099 else if (!strcmp(collection, "Adobe-Korea1"))
1100 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
1101 }
1102
1103 pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode);
1104
1105 /* If we have an identity encoding, we're supposed to use the glyph ids directly.
1106 * If we only have a substitute font, that won't work.
1107 * Make a last ditch attempt by using
1108 * the ToUnicode table if it exists to map via the substitute font's cmap. */
1109 if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute)
1110 {
1111 fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
1112 if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap)
1113 fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
1114 }
1115
1116 /* Horizontal */
1117
1118 dw = 1000;
1119 obj = pdf_dict_get(ctx, dict, PDF_NAME(DW));
1120 if (obj)
1121 dw = pdf_to_int(ctx, obj);
1122 pdf_set_default_hmtx(ctx, fontdesc, dw);
1123
1124 widths = pdf_dict_get(ctx, dict, PDF_NAME(W));
1125 if (widths)
1126 {
1127 int c0, c1, w, n, m;
1128
1129 n = pdf_array_len(ctx, widths);
1130 for (i = 0; i < n; )
1131 {
1132 c0 = pdf_array_get_int(ctx, widths, i);
1133 obj = pdf_array_get(ctx, widths, i + 1);
1134 if (pdf_is_array(ctx, obj))
1135 {
1136 m = pdf_array_len(ctx, obj);
1137 for (k = 0; k < m; k++)
1138 {
1139 w = pdf_array_get_int(ctx, obj, k);
1140 pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
1141 }
1142 i += 2;
1143 }
1144 else
1145 {
1146 c1 = pdf_to_int(ctx, obj);
1147 w = pdf_array_get_int(ctx, widths, i + 2);
1148 pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
1149 i += 3;
1150 }
1151 }
1152 }
1153
1154 pdf_end_hmtx(ctx, fontdesc);
1155
1156 /* Vertical */
1157
1158 if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
1159 {
1160 int dw2y = 880;
1161 int dw2w = -1000;
1162
1163 obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2));
1164 if (obj)
1165 {
1166 dw2y = pdf_array_get_int(ctx, obj, 0);
1167 dw2w = pdf_array_get_int(ctx, obj, 1);
1168 }
1169
1170 pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);
1171
1172 widths = pdf_dict_get(ctx, dict, PDF_NAME(W2));
1173 if (widths)
1174 {
1175 int c0, c1, w, x, y, n;
1176
1177 n = pdf_array_len(ctx, widths);
1178 for (i = 0; i < n; )
1179 {
1180 c0 = pdf_array_get_int(ctx, widths, i);
1181 obj = pdf_array_get(ctx, widths, i + 1);
1182 if (pdf_is_array(ctx, obj))
1183 {
1184 int m = pdf_array_len(ctx, obj);
1185 for (k = 0; k * 3 < m; k ++)
1186 {
1187 w = pdf_array_get_int(ctx, obj, k * 3 + 0);
1188 x = pdf_array_get_int(ctx, obj, k * 3 + 1);
1189 y = pdf_array_get_int(ctx, obj, k * 3 + 2);
1190 pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
1191 }
1192 i += 2;
1193 }
1194 else
1195 {
1196 c1 = pdf_to_int(ctx, obj);
1197 w = pdf_array_get_int(ctx, widths, i + 2);
1198 x = pdf_array_get_int(ctx, widths, i + 3);
1199 y = pdf_array_get_int(ctx, widths, i + 4);
1200 pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
1201 i += 5;
1202 }
1203 }
1204 }
1205
1206 pdf_end_vmtx(ctx, fontdesc);
1207 }
1208 }
1209 fz_always(ctx)
1210 fz_drop_buffer(ctx, buf);
1211 fz_catch(ctx)
1212 {
1213 pdf_drop_font(ctx, fontdesc);
1214 fz_rethrow(ctx);
1215 }
1216
1217 return fontdesc;
1218 }
1219
1220 static pdf_font_desc *
pdf_load_type0_font(fz_context * ctx,pdf_document * doc,pdf_obj * dict)1221 pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1222 {
1223 pdf_obj *dfonts;
1224 pdf_obj *dfont;
1225 pdf_obj *subtype;
1226 pdf_obj *encoding;
1227 pdf_obj *to_unicode;
1228
1229 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1230 if (!dfonts)
1231 fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts");
1232
1233 dfont = pdf_array_get(ctx, dfonts, 0);
1234
1235 subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype));
1236 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
1237 to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode));
1238
1239 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
1240 return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1241 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
1242 return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1243 fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type");
1244 }
1245
1246 /*
1247 * FontDescriptor
1248 */
1249
1250 static void
pdf_load_font_descriptor(fz_context * ctx,pdf_document * doc,pdf_font_desc * fontdesc,pdf_obj * dict,const char * collection,const char * basefont,int iscidfont)1251 pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
1252 const char *collection, const char *basefont, int iscidfont)
1253 {
1254 pdf_obj *obj1, *obj2, *obj3, *obj;
1255 const char *fontname;
1256 FT_Face face;
1257
1258 /* Prefer BaseFont; don't bother with FontName */
1259 fontname = basefont;
1260
1261 fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags));
1262 fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle));
1263 fontdesc->ascent = pdf_dict_get_real(ctx, dict, PDF_NAME(Ascent));
1264 fontdesc->descent = pdf_dict_get_real(ctx, dict, PDF_NAME(Descent));
1265 fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight));
1266 fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight));
1267 fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth));
1268
1269 obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile));
1270 obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2));
1271 obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3));
1272 obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
1273
1274 if (pdf_is_indirect(ctx, obj))
1275 {
1276 fz_try(ctx)
1277 {
1278 pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj);
1279 }
1280 fz_catch(ctx)
1281 {
1282 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1283 fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
1284 if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1285 pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1286 else
1287 pdf_load_system_font(ctx, fontdesc, fontname, collection);
1288 }
1289 }
1290 else
1291 {
1292 if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1293 pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1294 else
1295 pdf_load_system_font(ctx, fontdesc, fontname, collection);
1296 }
1297
1298 /* Check for DynaLab fonts that must use hinting */
1299 face = fontdesc->font->ft_face;
1300 if (ft_kind(face) == TRUETYPE)
1301 {
1302 /* FreeType's own 'tricky' font detection needs a bit of help */
1303 if (is_dynalab(fontdesc->font->name))
1304 face->face_flags |= FT_FACE_FLAG_TRICKY;
1305
1306 if (fontdesc->ascent == 0.0f)
1307 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
1308
1309 if (fontdesc->descent == 0.0f)
1310 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
1311 }
1312 }
1313
1314 static void
pdf_make_width_table(fz_context * ctx,pdf_font_desc * fontdesc)1315 pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc)
1316 {
1317 fz_font *font = fontdesc->font;
1318 int i, k, n, cid, gid;
1319
1320 n = 0;
1321 for (i = 0; i < fontdesc->hmtx_len; i++)
1322 {
1323 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1324 {
1325 cid = pdf_lookup_cmap(fontdesc->encoding, k);
1326 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1327 if (gid > n)
1328 n = gid;
1329 }
1330 }
1331
1332 font->width_count = n + 1;
1333 font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths");
1334 fontdesc->size += font->width_count * sizeof(short);
1335
1336 font->width_default = fontdesc->dhmtx.w;
1337 for (i = 0; i < font->width_count; i++)
1338 font->width_table[i] = -1;
1339
1340 for (i = 0; i < fontdesc->hmtx_len; i++)
1341 {
1342 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1343 {
1344 cid = pdf_lookup_cmap(fontdesc->encoding, k);
1345 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1346 if (gid >= 0 && gid < font->width_count)
1347 font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]);
1348 }
1349 }
1350
1351 for (i = 0; i < font->width_count; i++)
1352 if (font->width_table[i] == -1)
1353 font->width_table[i] = font->width_default;
1354 }
1355
1356 pdf_font_desc *
pdf_load_font(fz_context * ctx,pdf_document * doc,pdf_obj * rdb,pdf_obj * dict)1357 pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
1358 {
1359 pdf_obj *subtype;
1360 pdf_obj *dfonts;
1361 pdf_obj *charprocs;
1362 pdf_font_desc *fontdesc = NULL;
1363 int type3 = 0;
1364
1365 if (pdf_obj_marked(ctx, dict))
1366 fz_throw(ctx, FZ_ERROR_SYNTAX, "Recursive Type3 font definition.");
1367
1368 if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL)
1369 {
1370 return fontdesc;
1371 }
1372
1373 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
1374 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1375 charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs));
1376
1377 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
1378 fontdesc = pdf_load_type0_font(ctx, doc, dict);
1379 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
1380 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1381 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
1382 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1383 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
1384 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1385 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3)))
1386 {
1387 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1388 type3 = 1;
1389 }
1390 else if (charprocs)
1391 {
1392 fz_warn(ctx, "unknown font format, guessing type3.");
1393 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1394 type3 = 1;
1395 }
1396 else if (dfonts)
1397 {
1398 fz_warn(ctx, "unknown font format, guessing type0.");
1399 fontdesc = pdf_load_type0_font(ctx, doc, dict);
1400 }
1401 else
1402 {
1403 fz_warn(ctx, "unknown font format, guessing type1 or truetype.");
1404 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1405 }
1406
1407 pdf_mark_obj(ctx, dict);
1408 fz_try(ctx)
1409 {
1410 /* Create glyph width table for stretching substitute fonts and text extraction. */
1411 pdf_make_width_table(ctx, fontdesc);
1412
1413 /* Load CharProcs */
1414 if (type3)
1415 pdf_load_type3_glyphs(ctx, doc, fontdesc);
1416
1417 pdf_store_item(ctx, dict, fontdesc, fontdesc->size);
1418 }
1419 fz_always(ctx)
1420 pdf_unmark_obj(ctx, dict);
1421 fz_catch(ctx)
1422 {
1423 pdf_drop_font(ctx, fontdesc);
1424 fz_rethrow(ctx);
1425 }
1426
1427 return fontdesc;
1428 }
1429
1430 void
pdf_print_font(fz_context * ctx,fz_output * out,pdf_font_desc * fontdesc)1431 pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc)
1432 {
1433 int i;
1434
1435 fz_write_printf(ctx, out, "fontdesc {\n");
1436
1437 if (fontdesc->font->ft_face)
1438 fz_write_printf(ctx, out, "\tfreetype font\n");
1439 if (fontdesc->font->t3procs)
1440 fz_write_printf(ctx, out, "\ttype3 font\n");
1441
1442 fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode);
1443 fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w);
1444
1445 fz_write_printf(ctx, out, "\tW {\n");
1446 for (i = 0; i < fontdesc->hmtx_len; i++)
1447 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n",
1448 fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
1449 fz_write_printf(ctx, out, "\t}\n");
1450
1451 if (fontdesc->wmode)
1452 {
1453 fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
1454 fz_write_printf(ctx, out, "\tW2 {\n");
1455 for (i = 0; i < fontdesc->vmtx_len; i++)
1456 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
1457 fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
1458 fz_write_printf(ctx, out, "\t}\n");
1459 }
1460 }
1461