1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <assert.h>
5 
6 #include <ft2build.h>
7 #include FT_FREETYPE_H
8 #include FT_ADVANCES_H
9 #ifdef FT_FONT_FORMATS_H
10 #include FT_FONT_FORMATS_H
11 #else
12 #include FT_XFREE86_H
13 #endif
14 #include FT_TRUETYPE_TABLES_H
15 
16 #ifndef FT_SFNT_HEAD
17 #define FT_SFNT_HEAD ft_sfnt_head
18 #endif
19 
20 void
pdf_load_encoding(const char ** estrings,const char * encoding)21 pdf_load_encoding(const char **estrings, const char *encoding)
22 {
23 	const char * const *bstrings = NULL;
24 	int i;
25 
26 	if (!strcmp(encoding, "StandardEncoding"))
27 		bstrings = fz_glyph_name_from_adobe_standard;
28 	if (!strcmp(encoding, "MacRomanEncoding"))
29 		bstrings = fz_glyph_name_from_mac_roman;
30 	if (!strcmp(encoding, "MacExpertEncoding"))
31 		bstrings = fz_glyph_name_from_mac_expert;
32 	if (!strcmp(encoding, "WinAnsiEncoding"))
33 		bstrings = fz_glyph_name_from_win_ansi;
34 
35 	if (bstrings)
36 		for (i = 0; i < 256; i++)
37 			estrings[i] = bstrings[i];
38 }
39 
40 static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
41 	const char *collection, const char *basefont, int iscidfont);
42 
43 static const char *base_font_names[][10] =
44 {
45 	{ "Courier", "CourierNew", "CourierNewPSMT", NULL },
46 	{ "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
47 		"CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
48 	{ "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
49 		"CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
50 	{ "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
51 		"CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
52 	{ "Helvetica", "ArialMT", "Arial", NULL },
53 	{ "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
54 		"Helvetica,Bold", NULL },
55 	{ "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
56 		"Helvetica,Italic", "Helvetica-Italic", NULL },
57 	{ "Helvetica-BoldOblique", "Arial-BoldItalicMT",
58 		"Arial,BoldItalic", "Arial-BoldItalic",
59 		"Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
60 	{ "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
61 		"TimesNewRomanPS", NULL },
62 	{ "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
63 		"TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
64 	{ "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
65 		"TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
66 	{ "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
67 		"TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
68 		"TimesNewRoman-BoldItalic", NULL },
69 	{ "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
70 		"SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
71 	{ "ZapfDingbats", NULL }
72 };
73 
74 const unsigned char *
pdf_lookup_substitute_font(fz_context * ctx,int mono,int serif,int bold,int italic,int * len)75 pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len)
76 {
77 	if (mono) {
78 		if (bold) {
79 			if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len);
80 			else return fz_lookup_base14_font(ctx, "Courier-Bold", len);
81 		} else {
82 			if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len);
83 			else return fz_lookup_base14_font(ctx, "Courier", len);
84 		}
85 	} else if (serif) {
86 		if (bold) {
87 			if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len);
88 			else return fz_lookup_base14_font(ctx, "Times-Bold", len);
89 		} else {
90 			if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len);
91 			else return fz_lookup_base14_font(ctx, "Times-Roman", len);
92 		}
93 	} else {
94 		if (bold) {
95 			if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len);
96 			else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len);
97 		} else {
98 			if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len);
99 			else return fz_lookup_base14_font(ctx, "Helvetica", len);
100 		}
101 	}
102 }
103 
is_dynalab(char * name)104 static int is_dynalab(char *name)
105 {
106 	if (strstr(name, "HuaTian"))
107 		return 1;
108 	if (strstr(name, "MingLi"))
109 		return 1;
110 	if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
111 		return 1;
112 	if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
113 		return 1;
114 	return 0;
115 }
116 
strcmp_ignore_space(const char * a,const char * b)117 static int strcmp_ignore_space(const char *a, const char *b)
118 {
119 	while (1)
120 	{
121 		while (*a == ' ')
122 			a++;
123 		while (*b == ' ')
124 			b++;
125 		if (*a != *b)
126 			return 1;
127 		if (*a == 0)
128 			return *a != *b;
129 		if (*b == 0)
130 			return *a != *b;
131 		a++;
132 		b++;
133 	}
134 }
135 
pdf_clean_font_name(const char * fontname)136 const char *pdf_clean_font_name(const char *fontname)
137 {
138 	int i, k;
139 	for (i = 0; i < (int)nelem(base_font_names); i++)
140 		for (k = 0; base_font_names[i][k]; k++)
141 			if (!strcmp_ignore_space(base_font_names[i][k], fontname))
142 				return base_font_names[i][0];
143 	return fontname;
144 }
145 
146 /*
147  * FreeType and Rendering glue
148  */
149 
150 enum { UNKNOWN, TYPE1, TRUETYPE };
151 
ft_kind(FT_Face face)152 static int ft_kind(FT_Face face)
153 {
154 #ifdef FT_FONT_FORMATS_H
155 	const char *kind = FT_Get_Font_Format(face);
156 #else
157 	const char *kind = FT_Get_X11_Font_Format(face);
158 #endif
159 	if (!strcmp(kind, "TrueType")) return TRUETYPE;
160 	if (!strcmp(kind, "Type 1")) return TYPE1;
161 	if (!strcmp(kind, "CFF")) return TYPE1;
162 	if (!strcmp(kind, "CID Type 1")) return TYPE1;
163 	return UNKNOWN;
164 }
165 
ft_cid_to_gid(pdf_font_desc * fontdesc,int cid)166 static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
167 {
168 	if (fontdesc->to_ttf_cmap)
169 	{
170 		cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
171 
172 		/* vertical presentation forms */
173 		if (fontdesc->font->flags.ft_substitute && fontdesc->wmode)
174 		{
175 			switch (cid)
176 			{
177 			case 0x0021: cid = 0xFE15; break; /* ! */
178 			case 0x0028: cid = 0xFE35; break; /* ( */
179 			case 0x0029: cid = 0xFE36; break; /* ) */
180 			case 0x002C: cid = 0xFE10; break; /* , */
181 			case 0x003A: cid = 0xFE13; break; /* : */
182 			case 0x003B: cid = 0xFE14; break; /* ; */
183 			case 0x003F: cid = 0xFE16; break; /* ? */
184 			case 0x005B: cid = 0xFE47; break; /* [ */
185 			case 0x005D: cid = 0xFE48; break; /* ] */
186 			case 0x005F: cid = 0xFE33; break; /* _ */
187 			case 0x007B: cid = 0xFE37; break; /* { */
188 			case 0x007D: cid = 0xFE38; break; /* } */
189 			case 0x2013: cid = 0xFE32; break; /* EN DASH */
190 			case 0x2014: cid = 0xFE31; break; /* EM DASH */
191 			case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */
192 			case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */
193 			case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */
194 			case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */
195 			case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */
196 			case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */
197 			case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */
198 			case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */
199 			case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */
200 			case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */
201 			case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */
202 			case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */
203 			case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */
204 			case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */
205 			case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */
206 			case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */
207 			case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */
208 			case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */
209 
210 			case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */
211 			case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */
212 			case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */
213 			case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */
214 			case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */
215 			case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */
216 			case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */
217 			case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */
218 			case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */
219 			case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */
220 			case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */
221 			case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */
222 
223 			case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
224 			case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */
225 			}
226 		}
227 
228 		return ft_char_index(fontdesc->font->ft_face, cid);
229 	}
230 
231 	if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0)
232 		return fontdesc->cid_to_gid[cid];
233 
234 	return cid;
235 }
236 
237 int
pdf_font_cid_to_gid(fz_context * ctx,pdf_font_desc * fontdesc,int cid)238 pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
239 {
240 	if (fontdesc->font->ft_face)
241 		return ft_cid_to_gid(fontdesc, cid);
242 	return cid;
243 }
244 
ft_width(fz_context * ctx,pdf_font_desc * fontdesc,int cid)245 static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
246 {
247 	int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM;
248 	int gid = ft_cid_to_gid(fontdesc, cid);
249 	FT_Fixed adv = 0;
250 	int fterr;
251 	FT_Face face = fontdesc->font->ft_face;
252 	FT_UShort units_per_EM;
253 
254 	fterr = FT_Get_Advance(face, gid, mask, &adv);
255 	if (fterr && fterr != FT_Err_Invalid_Argument)
256 		fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr));
257 
258 	units_per_EM = face->units_per_EM;
259 	if (units_per_EM == 0)
260 		units_per_EM = 2048;
261 
262 	return adv * 1000 / units_per_EM;
263 }
264 
265 static const struct { int code; const char *name; } mre_diff_table[] =
266 {
267 	{ 173, "notequal" },
268 	{ 176, "infinity" },
269 	{ 178, "lessequal" },
270 	{ 179, "greaterequal" },
271 	{ 182, "partialdiff" },
272 	{ 183, "summation" },
273 	{ 184, "product" },
274 	{ 185, "pi" },
275 	{ 186, "integral" },
276 	{ 189, "Omega" },
277 	{ 195, "radical" },
278 	{ 197, "approxequal" },
279 	{ 198, "Delta" },
280 	{ 215, "lozenge" },
281 	{ 219, "Euro" },
282 	{ 240, "apple" },
283 };
284 
lookup_mre_code(const char * name)285 static int lookup_mre_code(const char *name)
286 {
287 	int i;
288 	for (i = 0; i < (int)nelem(mre_diff_table); ++i)
289 		if (!strcmp(name, mre_diff_table[i].name))
290 			return mre_diff_table[i].code;
291 	for (i = 0; i < 256; i++)
292 		if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i]))
293 			return i;
294 	return -1;
295 }
296 
ft_find_glyph_by_unicode_name(FT_Face face,const char * name)297 static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name)
298 {
299 	int unicode, glyph;
300 
301 	/* Prefer exact unicode match if available. */
302 	unicode = fz_unicode_from_glyph_name_strict(name);
303 	if (unicode > 0)
304 	{
305 		glyph = ft_char_index(face, unicode);
306 		if (glyph > 0)
307 			return glyph;
308 	}
309 
310 	/* Fall back to font glyph name if we can. */
311 	glyph = ft_name_index(face, name);
312 	if (glyph > 0)
313 		return glyph;
314 
315 	/* Fuzzy unicode match as last attempt. */
316 	unicode = fz_unicode_from_glyph_name(name);
317 	if (unicode > 0)
318 		return ft_char_index(face, unicode);
319 
320 	/* Failed. */
321 	return 0;
322 }
323 
324 /*
325  * Load font files.
326  */
327 
328 static void
pdf_load_builtin_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,int has_descriptor)329 pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor)
330 {
331 	FT_Face face;
332 	const char *clean_name = pdf_clean_font_name(fontname);
333 	if (clean_name == fontname)
334 		clean_name = "Times-Roman";
335 
336 	fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor);
337 	if (!fontdesc->font)
338 	{
339 		const unsigned char *data;
340 		int len;
341 
342 		data = fz_lookup_base14_font(ctx, clean_name, &len);
343 		if (!data)
344 			fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname);
345 
346 		fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
347 		fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times");
348 	}
349 
350 	if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats"))
351 		fontdesc->flags |= PDF_FD_SYMBOLIC;
352 
353 	face = fontdesc->font->ft_face;
354 	fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
355 	fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
356 }
357 
358 static void
pdf_load_substitute_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,int mono,int serif,int bold,int italic)359 pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic)
360 {
361 	fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0);
362 	if (!fontdesc->font)
363 	{
364 		const unsigned char *data;
365 		int len;
366 
367 		data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len);
368 		if (!data)
369 			fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font");
370 
371 		fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
372 		fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold;
373 		fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic;
374 
375 		fontdesc->font->flags.is_mono = mono;
376 		fontdesc->font->flags.is_serif = serif;
377 		fontdesc->font->flags.is_bold = bold;
378 		fontdesc->font->flags.is_italic = italic;
379 	}
380 
381 	fontdesc->font->flags.ft_substitute = 1;
382 	fontdesc->font->flags.ft_stretch = 1;
383 }
384 
385 static void
pdf_load_substitute_cjk_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,int ros,int serif)386 pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif)
387 {
388 	fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif);
389 	if (!fontdesc->font)
390 	{
391 		const unsigned char *data;
392 		int size;
393 		int subfont;
394 
395 		data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
396 		if (!data)
397 			fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font");
398 
399 		/* A glyph bbox cache is too big for CJK fonts. */
400 		fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
401 	}
402 
403 	fontdesc->font->flags.ft_substitute = 1;
404 	fontdesc->font->flags.ft_stretch = 0;
405 }
406 
407 static void
pdf_load_system_font(fz_context * ctx,pdf_font_desc * fontdesc,const char * fontname,const char * collection)408 pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection)
409 {
410 	int bold = 0;
411 	int italic = 0;
412 	int serif = 0;
413 	int mono = 0;
414 
415 	if (strstr(fontname, "Bold"))
416 		bold = 1;
417 	if (strstr(fontname, "Italic"))
418 		italic = 1;
419 	if (strstr(fontname, "Oblique"))
420 		italic = 1;
421 
422 	if (fontdesc->flags & PDF_FD_FIXED_PITCH)
423 		mono = 1;
424 	if (fontdesc->flags & PDF_FD_SERIF)
425 		serif = 1;
426 	if (fontdesc->flags & PDF_FD_ITALIC)
427 		italic = 1;
428 	if (fontdesc->flags & PDF_FD_FORCE_BOLD)
429 		bold = 1;
430 
431 	if (collection)
432 	{
433 		if (!strcmp(collection, "Adobe-CNS1"))
434 			pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
435 		else if (!strcmp(collection, "Adobe-GB1"))
436 			pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
437 		else if (!strcmp(collection, "Adobe-Japan1"))
438 			pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
439 		else if (!strcmp(collection, "Adobe-Korea1"))
440 			pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
441 		else
442 		{
443 			if (strcmp(collection, "Adobe-Identity") != 0)
444 				fz_warn(ctx, "unknown cid collection: %s", collection);
445 			pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
446 		}
447 	}
448 	else
449 	{
450 		pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
451 	}
452 }
453 
454 static void
pdf_load_embedded_font(fz_context * ctx,pdf_document * doc,pdf_font_desc * fontdesc,const char * fontname,pdf_obj * stmref)455 pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref)
456 {
457 	fz_buffer *buf;
458 
459 	buf = pdf_load_stream(ctx, stmref);
460 	fz_try(ctx)
461 		fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1);
462 	fz_always(ctx)
463 		fz_drop_buffer(ctx, buf);
464 	fz_catch(ctx)
465 		fz_rethrow(ctx);
466 
467 	fontdesc->size += fz_buffer_storage(ctx, buf, NULL);
468 	fontdesc->is_embedded = 1;
469 }
470 
471 /*
472  * Create and destroy
473  */
474 
475 pdf_font_desc *
pdf_keep_font(fz_context * ctx,pdf_font_desc * fontdesc)476 pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc)
477 {
478 	return fz_keep_storable(ctx, &fontdesc->storable);
479 }
480 
481 void
pdf_drop_font(fz_context * ctx,pdf_font_desc * fontdesc)482 pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc)
483 {
484 	fz_drop_storable(ctx, &fontdesc->storable);
485 }
486 
487 static void
pdf_drop_font_imp(fz_context * ctx,fz_storable * fontdesc_)488 pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_)
489 {
490 	pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_;
491 
492 	fz_drop_font(ctx, fontdesc->font);
493 	pdf_drop_cmap(ctx, fontdesc->encoding);
494 	pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap);
495 	pdf_drop_cmap(ctx, fontdesc->to_unicode);
496 	fz_free(ctx, fontdesc->cid_to_gid);
497 	fz_free(ctx, fontdesc->cid_to_ucs);
498 	fz_free(ctx, fontdesc->hmtx);
499 	fz_free(ctx, fontdesc->vmtx);
500 	fz_free(ctx, fontdesc);
501 }
502 
503 pdf_font_desc *
pdf_new_font_desc(fz_context * ctx)504 pdf_new_font_desc(fz_context *ctx)
505 {
506 	pdf_font_desc *fontdesc;
507 
508 	fontdesc = fz_malloc_struct(ctx, pdf_font_desc);
509 	FZ_INIT_STORABLE(fontdesc, 1, pdf_drop_font_imp);
510 	fontdesc->size = sizeof(pdf_font_desc);
511 
512 	fontdesc->font = NULL;
513 
514 	fontdesc->flags = 0;
515 	fontdesc->italic_angle = 0;
516 	fontdesc->ascent = 800;
517 	fontdesc->descent = -200;
518 	fontdesc->cap_height = 800;
519 	fontdesc->x_height = 500;
520 	fontdesc->missing_width = 0;
521 
522 	fontdesc->encoding = NULL;
523 	fontdesc->to_ttf_cmap = NULL;
524 	fontdesc->cid_to_gid_len = 0;
525 	fontdesc->cid_to_gid = NULL;
526 
527 	fontdesc->to_unicode = NULL;
528 	fontdesc->cid_to_ucs_len = 0;
529 	fontdesc->cid_to_ucs = NULL;
530 
531 	fontdesc->wmode = 0;
532 
533 	fontdesc->hmtx_cap = 0;
534 	fontdesc->vmtx_cap = 0;
535 	fontdesc->hmtx_len = 0;
536 	fontdesc->vmtx_len = 0;
537 	fontdesc->hmtx = NULL;
538 	fontdesc->vmtx = NULL;
539 
540 	fontdesc->dhmtx.lo = 0x0000;
541 	fontdesc->dhmtx.hi = 0xFFFF;
542 	fontdesc->dhmtx.w = 1000;
543 
544 	fontdesc->dvmtx.lo = 0x0000;
545 	fontdesc->dvmtx.hi = 0xFFFF;
546 	fontdesc->dvmtx.x = 0;
547 	fontdesc->dvmtx.y = 880;
548 	fontdesc->dvmtx.w = -1000;
549 
550 	fontdesc->is_embedded = 0;
551 
552 	return fontdesc;
553 }
554 
555 /*
556  * Simple fonts (Type1 and TrueType)
557  */
558 
559 static FT_CharMap
select_type1_cmap(FT_Face face)560 select_type1_cmap(FT_Face face)
561 {
562 	int i;
563 	for (i = 0; i < face->num_charmaps; i++)
564 		if (face->charmaps[i]->platform_id == 7)
565 			return face->charmaps[i];
566 	if (face->num_charmaps > 0)
567 		return face->charmaps[0];
568 	return NULL;
569 }
570 
571 static FT_CharMap
select_truetype_cmap(FT_Face face,int symbolic)572 select_truetype_cmap(FT_Face face, int symbolic)
573 {
574 	int i;
575 
576 	/* First look for a Microsoft symbolic cmap, if applicable */
577 	if (symbolic)
578 	{
579 		for (i = 0; i < face->num_charmaps; i++)
580 			if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0)
581 				return face->charmaps[i];
582 	}
583 
584 	/* Then look for a Microsoft Unicode cmap */
585 	for (i = 0; i < face->num_charmaps; i++)
586 		if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1)
587 			if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
588 				return face->charmaps[i];
589 
590 	/* Finally look for an Apple MacRoman cmap */
591 	for (i = 0; i < face->num_charmaps; i++)
592 		if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0)
593 			if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
594 				return face->charmaps[i];
595 
596 	if (face->num_charmaps > 0)
597 		if (FT_Get_CMap_Format(face->charmaps[0]) != -1)
598 			return face->charmaps[0];
599 	return NULL;
600 }
601 
602 static FT_CharMap
select_unknown_cmap(FT_Face face)603 select_unknown_cmap(FT_Face face)
604 {
605 	if (face->num_charmaps > 0)
606 		return face->charmaps[0];
607 	return NULL;
608 }
609 
610 static pdf_font_desc *
pdf_load_simple_font(fz_context * ctx,pdf_document * doc,pdf_obj * dict)611 pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
612 {
613 	const char *basefont;
614 	pdf_obj *descriptor;
615 	pdf_obj *encoding;
616 	pdf_obj *widths;
617 	unsigned short *etable = NULL;
618 	pdf_font_desc *fontdesc = NULL;
619 	pdf_obj *subtype;
620 	FT_Face face;
621 	FT_CharMap cmap;
622 	int symbolic;
623 	int kind;
624 	int glyph;
625 
626 	const char *estrings[256];
627 	char ebuffer[256][32];
628 	int i, k, n;
629 	int fterr;
630 	int has_lock = 0;
631 
632 	fz_var(fontdesc);
633 	fz_var(etable);
634 	fz_var(has_lock);
635 
636 	/* Load font file */
637 	fz_try(ctx)
638 	{
639 		fontdesc = pdf_new_font_desc(ctx);
640 
641 		basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont)));
642 
643 		descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
644 		if (descriptor)
645 			pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0);
646 		else
647 			pdf_load_builtin_font(ctx, fontdesc, basefont, 0);
648 
649 		/* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
650 		if (descriptor && pdf_is_string(ctx, pdf_dict_get(ctx, descriptor, PDF_NAME(FontName))) &&
651 			!pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)) &&
652 			pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Encoding)), PDF_NAME(WinAnsiEncoding)) &&
653 			pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) == 4)
654 		{
655 			char *cp936fonts[] = {
656 				"\xCB\xCE\xCC\xE5", "SimSun,Regular",
657 				"\xBA\xDA\xCC\xE5", "SimHei,Regular",
658 				"\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
659 				"\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
660 				"\xC1\xA5\xCA\xE9", "SimLi,Regular",
661 				NULL
662 			};
663 			for (i = 0; cp936fonts[i]; i += 2)
664 				if (!strcmp(basefont, cp936fonts[i]))
665 					break;
666 			if (cp936fonts[i])
667 			{
668 				fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
669 				pdf_drop_font(ctx, fontdesc);
670 				fontdesc = NULL;
671 				fontdesc = pdf_new_font_desc(ctx);
672 				pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0);
673 				fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
674 				fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
675 				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
676 
677 				goto skip_encoding;
678 			}
679 		}
680 
681 		face = fontdesc->font->ft_face;
682 		kind = ft_kind(face);
683 
684 		/* Encoding */
685 
686 		symbolic = fontdesc->flags & 4;
687 
688 		if (kind == TYPE1)
689 			cmap = select_type1_cmap(face);
690 		else if (kind == TRUETYPE)
691 			cmap = select_truetype_cmap(face, symbolic);
692 		else
693 			cmap = select_unknown_cmap(face);
694 
695 		if (cmap)
696 		{
697 			fterr = FT_Set_Charmap(face, cmap);
698 			if (fterr)
699 				fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
700 		}
701 		else
702 			fz_warn(ctx, "freetype could not find any cmaps");
703 
704 		/* FIXME: etable may leak on error. */
705 		etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid");
706 		fontdesc->size += 256 * sizeof(unsigned short);
707 		for (i = 0; i < 256; i++)
708 		{
709 			estrings[i] = NULL;
710 			etable[i] = 0;
711 		}
712 
713 		encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
714 		if (encoding)
715 		{
716 			if (pdf_is_name(ctx, encoding))
717 				pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));
718 
719 			if (pdf_is_dict(ctx, encoding))
720 			{
721 				pdf_obj *base, *diff, *item;
722 
723 				base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
724 				if (pdf_is_name(ctx, base))
725 					pdf_load_encoding(estrings, pdf_to_name(ctx, base));
726 				else if (!fontdesc->is_embedded && !symbolic)
727 					pdf_load_encoding(estrings, "StandardEncoding");
728 
729 				diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences));
730 				if (pdf_is_array(ctx, diff))
731 				{
732 					n = pdf_array_len(ctx, diff);
733 					k = 0;
734 					for (i = 0; i < n; i++)
735 					{
736 						item = pdf_array_get(ctx, diff, i);
737 						if (pdf_is_int(ctx, item))
738 							k = pdf_to_int(ctx, item);
739 						if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings))
740 							estrings[k++] = pdf_to_name(ctx, item);
741 					}
742 				}
743 			}
744 		}
745 		else if (!fontdesc->is_embedded && !symbolic)
746 			pdf_load_encoding(estrings, "StandardEncoding");
747 
748 		/* start with the builtin encoding */
749 		for (i = 0; i < 256; i++)
750 			etable[i] = ft_char_index(face, i);
751 
752 		fz_lock(ctx, FZ_LOCK_FREETYPE);
753 		has_lock = 1;
754 
755 		/* built-in and substitute fonts may be a different type than what the document expects */
756 		subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
757 		if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
758 			kind = TYPE1;
759 		else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
760 			kind = TYPE1;
761 		else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
762 			kind = TRUETYPE;
763 		else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
764 			kind = TYPE1;
765 		else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
766 			kind = TRUETYPE;
767 
768 		/* encode by glyph name where we can */
769 		if (kind == TYPE1)
770 		{
771 			for (i = 0; i < 256; i++)
772 			{
773 				if (estrings[i])
774 				{
775 					glyph = ft_name_index(face, estrings[i]);
776 					if (glyph > 0)
777 						etable[i] = glyph;
778 				}
779 			}
780 		}
781 
782 		/* encode by glyph name where we can */
783 		if (kind == TRUETYPE)
784 		{
785 			/* Unicode cmap */
786 			if (!symbolic && face->charmap && face->charmap->platform_id == 3)
787 			{
788 				for (i = 0; i < 256; i++)
789 				{
790 					if (estrings[i])
791 					{
792 						glyph = ft_find_glyph_by_unicode_name(face, estrings[i]);
793 						if (glyph > 0)
794 							etable[i] = glyph;
795 					}
796 				}
797 			}
798 
799 			/* MacRoman cmap */
800 			else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
801 			{
802 				for (i = 0; i < 256; i++)
803 				{
804 					if (estrings[i])
805 					{
806 						int mrcode = lookup_mre_code(estrings[i]);
807 						glyph = 0;
808 						if (mrcode > 0)
809 							glyph = ft_char_index(face, mrcode);
810 						if (glyph == 0)
811 							glyph = ft_name_index(face, estrings[i]);
812 						if (glyph > 0)
813 							etable[i] = glyph;
814 					}
815 				}
816 			}
817 
818 			/* Symbolic cmap */
819 			else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
820 			{
821 				for (i = 0; i < 256; i++)
822 				{
823 					if (estrings[i])
824 					{
825 						glyph = ft_name_index(face, estrings[i]);
826 						if (glyph > 0)
827 							etable[i] = glyph;
828 					}
829 				}
830 			}
831 		}
832 
833 		/* try to reverse the glyph names from the builtin encoding */
834 		for (i = 0; i < 256; i++)
835 		{
836 			if (etable[i] && !estrings[i])
837 			{
838 				if (FT_HAS_GLYPH_NAMES(face))
839 				{
840 					fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
841 					if (fterr)
842 						fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
843 					if (ebuffer[i][0])
844 						estrings[i] = ebuffer[i];
845 				}
846 				else
847 				{
848 					estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */
849 				}
850 			}
851 		}
852 
853 		/* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
854 		if (kind == TYPE1 && symbolic)
855 		{
856 			for (i = 0; i < 256; i++)
857 				if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i]))
858 					estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i];
859 		}
860 
861 		fz_unlock(ctx, FZ_LOCK_FREETYPE);
862 		has_lock = 0;
863 
864 		fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
865 		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
866 		fontdesc->cid_to_gid_len = 256;
867 		fontdesc->cid_to_gid = etable;
868 
869 		fz_try(ctx)
870 		{
871 			pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)));
872 		}
873 		fz_catch(ctx)
874 		{
875 			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
876 			fz_warn(ctx, "cannot load ToUnicode CMap");
877 		}
878 
879 	skip_encoding:
880 
881 		/* Widths */
882 
883 		pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);
884 
885 		widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths));
886 		if (widths)
887 		{
888 			int first, last;
889 
890 			first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar));
891 			last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar));
892 
893 			if (first < 0 || last > 255 || first > last)
894 				first = last = 0;
895 
896 			for (i = 0; i < last - first + 1; i++)
897 			{
898 				int wid = pdf_array_get_int(ctx, widths, i);
899 				pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
900 			}
901 		}
902 		else
903 		{
904 			for (i = 0; i < 256; i++)
905 				pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
906 		}
907 
908 		pdf_end_hmtx(ctx, fontdesc);
909 	}
910 	fz_catch(ctx)
911 	{
912 		if (has_lock)
913 			fz_unlock(ctx, FZ_LOCK_FREETYPE);
914 		if (fontdesc && etable != fontdesc->cid_to_gid)
915 			fz_free(ctx, etable);
916 		pdf_drop_font(ctx, fontdesc);
917 		fz_rethrow(ctx);
918 	}
919 	return fontdesc;
920 }
921 
922 static int
hail_mary_make_hash_key(fz_context * ctx,fz_store_hash * hash,void * key_)923 hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_)
924 {
925 	hash->u.pi.i = 0;
926 	hash->u.pi.ptr = NULL;
927 	return 1;
928 }
929 
930 static void *
hail_mary_keep_key(fz_context * ctx,void * key)931 hail_mary_keep_key(fz_context *ctx, void *key)
932 {
933 	return key;
934 }
935 
936 static void
hail_mary_drop_key(fz_context * ctx,void * key)937 hail_mary_drop_key(fz_context *ctx, void *key)
938 {
939 }
940 
941 static int
hail_mary_cmp_key(fz_context * ctx,void * k0,void * k1)942 hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1)
943 {
944 	return k0 == k1;
945 }
946 
947 static void
hail_mary_format_key(fz_context * ctx,char * s,size_t n,void * key_)948 hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_)
949 {
950 	fz_strlcpy(s, "(hail mary font)", n);
951 }
952 
953 static int hail_mary_store_key; /* Dummy */
954 
955 static const fz_store_type hail_mary_store_type =
956 {
957 	"hail-mary",
958 	hail_mary_make_hash_key,
959 	hail_mary_keep_key,
960 	hail_mary_drop_key,
961 	hail_mary_cmp_key,
962 	hail_mary_format_key,
963 	NULL
964 };
965 
966 pdf_font_desc *
pdf_load_hail_mary_font(fz_context * ctx,pdf_document * doc)967 pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc)
968 {
969 	pdf_font_desc *fontdesc;
970 	pdf_font_desc *existing;
971 
972 	if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL)
973 	{
974 		return fontdesc;
975 	}
976 
977 	/* FIXME: Get someone with a clue about fonts to fix this */
978 	fontdesc = pdf_load_simple_font(ctx, doc, NULL);
979 
980 	existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type);
981 	assert(existing == NULL);
982 	(void)existing; /* Silence warning in release builds */
983 
984 	return fontdesc;
985 }
986 
987 /*
988  * CID Fonts
989  */
990 
991 static pdf_font_desc *
load_cid_font(fz_context * ctx,pdf_document * doc,pdf_obj * dict,pdf_obj * encoding,pdf_obj * to_unicode)992 load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
993 {
994 	pdf_obj *widths;
995 	pdf_obj *descriptor;
996 	pdf_font_desc *fontdesc = NULL;
997 	fz_buffer *buf = NULL;
998 	pdf_cmap *cmap;
999 	FT_Face face;
1000 	char collection[256];
1001 	const char *basefont;
1002 	int i, k, fterr;
1003 	pdf_obj *cidtogidmap;
1004 	pdf_obj *obj;
1005 	int dw;
1006 
1007 	fz_var(fontdesc);
1008 	fz_var(buf);
1009 
1010 	fz_try(ctx)
1011 	{
1012 		/* Get font name and CID collection */
1013 
1014 		basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont)));
1015 
1016 		{
1017 			pdf_obj *cidinfo;
1018 			const char *reg, *ord;
1019 
1020 			cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo));
1021 			if (!cidinfo)
1022 				fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing info");
1023 
1024 			reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL);
1025 			ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL);
1026 			fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord);
1027 		}
1028 
1029 		/* Encoding */
1030 
1031 		if (pdf_is_name(ctx, encoding))
1032 		{
1033 			cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding));
1034 		}
1035 		else if (pdf_is_indirect(ctx, encoding))
1036 		{
1037 			cmap = pdf_load_embedded_cmap(ctx, doc, encoding);
1038 		}
1039 		else
1040 		{
1041 			fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding");
1042 		}
1043 
1044 		/* Load font file */
1045 
1046 		fontdesc = pdf_new_font_desc(ctx);
1047 
1048 		fontdesc->encoding = cmap;
1049 		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1050 
1051 		pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));
1052 
1053 		descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
1054 		if (!descriptor)
1055 			fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor");
1056 		pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1);
1057 
1058 		face = fontdesc->font->ft_face;
1059 
1060 		/* Apply encoding */
1061 
1062 		cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap));
1063 		if (pdf_is_stream(ctx, cidtogidmap))
1064 		{
1065 			size_t z, len;
1066 			unsigned char *data;
1067 
1068 			buf = pdf_load_stream(ctx, cidtogidmap);
1069 
1070 			len = fz_buffer_storage(ctx, buf, &data);
1071 			fontdesc->cid_to_gid_len = len / 2;
1072 			fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map");
1073 			fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
1074 			for (z = 0; z < fontdesc->cid_to_gid_len; z++)
1075 				fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1];
1076 		}
1077 		else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap))
1078 		{
1079 			fz_warn(ctx, "ignoring unknown CIDToGIDMap entry");
1080 		}
1081 
1082 		/* if font is external, cidtogidmap should not be identity */
1083 		/* so we map from cid to unicode and then map that through the (3 1) */
1084 		/* unicode cmap to get a glyph id */
1085 		else if (fontdesc->font->flags.ft_substitute)
1086 		{
1087 			fterr = FT_Select_Charmap(face, ft_encoding_unicode);
1088 			if (fterr)
1089 				fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
1090 
1091 			if (!strcmp(collection, "Adobe-CNS1"))
1092 				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
1093 			else if (!strcmp(collection, "Adobe-GB1"))
1094 				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
1095 			else if (!strcmp(collection, "Adobe-Japan1"))
1096 				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
1097 			else if (!strcmp(collection, "Adobe-Japan2"))
1098 				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
1099 			else if (!strcmp(collection, "Adobe-Korea1"))
1100 				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
1101 		}
1102 
1103 		pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode);
1104 
1105 		/* If we have an identity encoding, we're supposed to use the glyph ids directly.
1106 		 * If we only have a substitute font, that won't work.
1107 		 * Make a last ditch attempt by using
1108 		 * the ToUnicode table if it exists to map via the substitute font's cmap. */
1109 		if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute)
1110 		{
1111 			fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
1112 			if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap)
1113 				fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
1114 		}
1115 
1116 		/* Horizontal */
1117 
1118 		dw = 1000;
1119 		obj = pdf_dict_get(ctx, dict, PDF_NAME(DW));
1120 		if (obj)
1121 			dw = pdf_to_int(ctx, obj);
1122 		pdf_set_default_hmtx(ctx, fontdesc, dw);
1123 
1124 		widths = pdf_dict_get(ctx, dict, PDF_NAME(W));
1125 		if (widths)
1126 		{
1127 			int c0, c1, w, n, m;
1128 
1129 			n = pdf_array_len(ctx, widths);
1130 			for (i = 0; i < n; )
1131 			{
1132 				c0 = pdf_array_get_int(ctx, widths, i);
1133 				obj = pdf_array_get(ctx, widths, i + 1);
1134 				if (pdf_is_array(ctx, obj))
1135 				{
1136 					m = pdf_array_len(ctx, obj);
1137 					for (k = 0; k < m; k++)
1138 					{
1139 						w = pdf_array_get_int(ctx, obj, k);
1140 						pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
1141 					}
1142 					i += 2;
1143 				}
1144 				else
1145 				{
1146 					c1 = pdf_to_int(ctx, obj);
1147 					w = pdf_array_get_int(ctx, widths, i + 2);
1148 					pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
1149 					i += 3;
1150 				}
1151 			}
1152 		}
1153 
1154 		pdf_end_hmtx(ctx, fontdesc);
1155 
1156 		/* Vertical */
1157 
1158 		if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
1159 		{
1160 			int dw2y = 880;
1161 			int dw2w = -1000;
1162 
1163 			obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2));
1164 			if (obj)
1165 			{
1166 				dw2y = pdf_array_get_int(ctx, obj, 0);
1167 				dw2w = pdf_array_get_int(ctx, obj, 1);
1168 			}
1169 
1170 			pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);
1171 
1172 			widths = pdf_dict_get(ctx, dict, PDF_NAME(W2));
1173 			if (widths)
1174 			{
1175 				int c0, c1, w, x, y, n;
1176 
1177 				n = pdf_array_len(ctx, widths);
1178 				for (i = 0; i < n; )
1179 				{
1180 					c0 = pdf_array_get_int(ctx, widths, i);
1181 					obj = pdf_array_get(ctx, widths, i + 1);
1182 					if (pdf_is_array(ctx, obj))
1183 					{
1184 						int m = pdf_array_len(ctx, obj);
1185 						for (k = 0; k * 3 < m; k ++)
1186 						{
1187 							w = pdf_array_get_int(ctx, obj, k * 3 + 0);
1188 							x = pdf_array_get_int(ctx, obj, k * 3 + 1);
1189 							y = pdf_array_get_int(ctx, obj, k * 3 + 2);
1190 							pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
1191 						}
1192 						i += 2;
1193 					}
1194 					else
1195 					{
1196 						c1 = pdf_to_int(ctx, obj);
1197 						w = pdf_array_get_int(ctx, widths, i + 2);
1198 						x = pdf_array_get_int(ctx, widths, i + 3);
1199 						y = pdf_array_get_int(ctx, widths, i + 4);
1200 						pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
1201 						i += 5;
1202 					}
1203 				}
1204 			}
1205 
1206 			pdf_end_vmtx(ctx, fontdesc);
1207 		}
1208 	}
1209 	fz_always(ctx)
1210 		fz_drop_buffer(ctx, buf);
1211 	fz_catch(ctx)
1212 	{
1213 		pdf_drop_font(ctx, fontdesc);
1214 		fz_rethrow(ctx);
1215 	}
1216 
1217 	return fontdesc;
1218 }
1219 
1220 static pdf_font_desc *
pdf_load_type0_font(fz_context * ctx,pdf_document * doc,pdf_obj * dict)1221 pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1222 {
1223 	pdf_obj *dfonts;
1224 	pdf_obj *dfont;
1225 	pdf_obj *subtype;
1226 	pdf_obj *encoding;
1227 	pdf_obj *to_unicode;
1228 
1229 	dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1230 	if (!dfonts)
1231 		fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts");
1232 
1233 	dfont = pdf_array_get(ctx, dfonts, 0);
1234 
1235 	subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype));
1236 	encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
1237 	to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode));
1238 
1239 	if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
1240 		return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1241 	if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
1242 		return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1243 	fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type");
1244 }
1245 
1246 /*
1247  * FontDescriptor
1248  */
1249 
1250 static void
pdf_load_font_descriptor(fz_context * ctx,pdf_document * doc,pdf_font_desc * fontdesc,pdf_obj * dict,const char * collection,const char * basefont,int iscidfont)1251 pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
1252 	const char *collection, const char *basefont, int iscidfont)
1253 {
1254 	pdf_obj *obj1, *obj2, *obj3, *obj;
1255 	const char *fontname;
1256 	FT_Face face;
1257 
1258 	/* Prefer BaseFont; don't bother with FontName */
1259 	fontname = basefont;
1260 
1261 	fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags));
1262 	fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle));
1263 	fontdesc->ascent = pdf_dict_get_real(ctx, dict, PDF_NAME(Ascent));
1264 	fontdesc->descent = pdf_dict_get_real(ctx, dict, PDF_NAME(Descent));
1265 	fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight));
1266 	fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight));
1267 	fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth));
1268 
1269 	obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile));
1270 	obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2));
1271 	obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3));
1272 	obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
1273 
1274 	if (pdf_is_indirect(ctx, obj))
1275 	{
1276 		fz_try(ctx)
1277 		{
1278 			pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj);
1279 		}
1280 		fz_catch(ctx)
1281 		{
1282 			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1283 			fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
1284 			if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1285 				pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1286 			else
1287 				pdf_load_system_font(ctx, fontdesc, fontname, collection);
1288 		}
1289 	}
1290 	else
1291 	{
1292 		if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1293 			pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1294 		else
1295 			pdf_load_system_font(ctx, fontdesc, fontname, collection);
1296 	}
1297 
1298 	/* Check for DynaLab fonts that must use hinting */
1299 	face = fontdesc->font->ft_face;
1300 	if (ft_kind(face) == TRUETYPE)
1301 	{
1302 		/* FreeType's own 'tricky' font detection needs a bit of help */
1303 		if (is_dynalab(fontdesc->font->name))
1304 			face->face_flags |= FT_FACE_FLAG_TRICKY;
1305 
1306 		if (fontdesc->ascent == 0.0f)
1307 			fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
1308 
1309 		if (fontdesc->descent == 0.0f)
1310 			fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
1311 	}
1312 }
1313 
1314 static void
pdf_make_width_table(fz_context * ctx,pdf_font_desc * fontdesc)1315 pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc)
1316 {
1317 	fz_font *font = fontdesc->font;
1318 	int i, k, n, cid, gid;
1319 
1320 	n = 0;
1321 	for (i = 0; i < fontdesc->hmtx_len; i++)
1322 	{
1323 		for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1324 		{
1325 			cid = pdf_lookup_cmap(fontdesc->encoding, k);
1326 			gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1327 			if (gid > n)
1328 				n = gid;
1329 		}
1330 	}
1331 
1332 	font->width_count = n + 1;
1333 	font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths");
1334 	fontdesc->size += font->width_count * sizeof(short);
1335 
1336 	font->width_default = fontdesc->dhmtx.w;
1337 	for (i = 0; i < font->width_count; i++)
1338 		font->width_table[i] = -1;
1339 
1340 	for (i = 0; i < fontdesc->hmtx_len; i++)
1341 	{
1342 		for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1343 		{
1344 			cid = pdf_lookup_cmap(fontdesc->encoding, k);
1345 			gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1346 			if (gid >= 0 && gid < font->width_count)
1347 				font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]);
1348 		}
1349 	}
1350 
1351 	for (i = 0; i < font->width_count; i++)
1352 		if (font->width_table[i] == -1)
1353 			font->width_table[i] = font->width_default;
1354 }
1355 
1356 pdf_font_desc *
pdf_load_font(fz_context * ctx,pdf_document * doc,pdf_obj * rdb,pdf_obj * dict)1357 pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
1358 {
1359 	pdf_obj *subtype;
1360 	pdf_obj *dfonts;
1361 	pdf_obj *charprocs;
1362 	pdf_font_desc *fontdesc = NULL;
1363 	int type3 = 0;
1364 
1365 	if (pdf_obj_marked(ctx, dict))
1366 		fz_throw(ctx, FZ_ERROR_SYNTAX, "Recursive Type3 font definition.");
1367 
1368 	if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL)
1369 	{
1370 		return fontdesc;
1371 	}
1372 
1373 	subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
1374 	dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1375 	charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs));
1376 
1377 	if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
1378 		fontdesc = pdf_load_type0_font(ctx, doc, dict);
1379 	else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
1380 		fontdesc = pdf_load_simple_font(ctx, doc, dict);
1381 	else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
1382 		fontdesc = pdf_load_simple_font(ctx, doc, dict);
1383 	else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
1384 		fontdesc = pdf_load_simple_font(ctx, doc, dict);
1385 	else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3)))
1386 	{
1387 		fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1388 		type3 = 1;
1389 	}
1390 	else if (charprocs)
1391 	{
1392 		fz_warn(ctx, "unknown font format, guessing type3.");
1393 		fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1394 		type3 = 1;
1395 	}
1396 	else if (dfonts)
1397 	{
1398 		fz_warn(ctx, "unknown font format, guessing type0.");
1399 		fontdesc = pdf_load_type0_font(ctx, doc, dict);
1400 	}
1401 	else
1402 	{
1403 		fz_warn(ctx, "unknown font format, guessing type1 or truetype.");
1404 		fontdesc = pdf_load_simple_font(ctx, doc, dict);
1405 	}
1406 
1407 	pdf_mark_obj(ctx, dict);
1408 	fz_try(ctx)
1409 	{
1410 		/* Create glyph width table for stretching substitute fonts and text extraction. */
1411 		pdf_make_width_table(ctx, fontdesc);
1412 
1413 		/* Load CharProcs */
1414 		if (type3)
1415 			pdf_load_type3_glyphs(ctx, doc, fontdesc);
1416 
1417 		pdf_store_item(ctx, dict, fontdesc, fontdesc->size);
1418 	}
1419 	fz_always(ctx)
1420 		pdf_unmark_obj(ctx, dict);
1421 	fz_catch(ctx)
1422 	{
1423 		pdf_drop_font(ctx, fontdesc);
1424 		fz_rethrow(ctx);
1425 	}
1426 
1427 	return fontdesc;
1428 }
1429 
1430 void
pdf_print_font(fz_context * ctx,fz_output * out,pdf_font_desc * fontdesc)1431 pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc)
1432 {
1433 	int i;
1434 
1435 	fz_write_printf(ctx, out, "fontdesc {\n");
1436 
1437 	if (fontdesc->font->ft_face)
1438 		fz_write_printf(ctx, out, "\tfreetype font\n");
1439 	if (fontdesc->font->t3procs)
1440 		fz_write_printf(ctx, out, "\ttype3 font\n");
1441 
1442 	fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode);
1443 	fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w);
1444 
1445 	fz_write_printf(ctx, out, "\tW {\n");
1446 	for (i = 0; i < fontdesc->hmtx_len; i++)
1447 		fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n",
1448 			fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
1449 	fz_write_printf(ctx, out, "\t}\n");
1450 
1451 	if (fontdesc->wmode)
1452 	{
1453 		fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
1454 		fz_write_printf(ctx, out, "\tW2 {\n");
1455 		for (i = 0; i < fontdesc->vmtx_len; i++)
1456 			fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
1457 				fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
1458 		fz_write_printf(ctx, out, "\t}\n");
1459 	}
1460 }
1461