1 #include <iconv.h>
2 #include <errno.h>
3 #include "find-font.h"
4 
5 struct mac_font_mapping {
6 	unsigned short encoding_id;
7 	unsigned short language_id;
8 	const char *code_page;
9 };
10 
11 #define TT_MAC_LANGID_ANY 0xFFFF
12 
13 static const struct mac_font_mapping mac_codes[] = {
14 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_ENGLISH, "macintosh"},
15 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_ICELANDIC, "x-mac-icelandic"},
16 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_TURKISH, "x-mac-ce"},
17 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_POLISH, "x-mac-ce"},
18 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_ROMANIAN, "x-mac-romanian"},
19 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_CZECH, "x-mac-ce"},
20 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_SLOVAK, "x-mac-ce"},
21 	{TT_MAC_ID_ROMAN, TT_MAC_LANGID_ANY, "macintosh"},
22 	{TT_MAC_ID_JAPANESE, TT_MAC_LANGID_JAPANESE, "Shift_JIS"},
23 	{TT_MAC_ID_JAPANESE, TT_MAC_LANGID_ANY, "Shift_JIS"},
24 	{TT_MAC_ID_KOREAN, TT_MAC_LANGID_KOREAN, "EUC-KR"},
25 	{TT_MAC_ID_KOREAN, TT_MAC_LANGID_ANY, "EUC-KR"},
26 	{TT_MAC_ID_ARABIC, TT_MAC_LANGID_ARABIC, "x-mac-arabic"},
27 	{TT_MAC_ID_ARABIC, TT_MAC_LANGID_URDU, "x-mac-farsi"},
28 	{TT_MAC_ID_ARABIC, TT_MAC_LANGID_FARSI, "x-mac-farsi"},
29 	{TT_MAC_ID_ARABIC, TT_MAC_LANGID_ANY, "x-mac-arabic"},
30 	{TT_MAC_ID_HEBREW, TT_MAC_LANGID_HEBREW, "x-mac-hebrew"},
31 	{TT_MAC_ID_HEBREW, TT_MAC_LANGID_ANY, "x-mac-hebrew"},
32 	{TT_MAC_ID_GREEK, TT_MAC_LANGID_ANY, "x-mac-greek"},
33 	{TT_MAC_ID_RUSSIAN, TT_MAC_LANGID_ANY, "x-mac-cyrillic"},
34 	{TT_MAC_ID_DEVANAGARI, TT_MAC_LANGID_ANY, "x-mac-devanagari"},
35 	{TT_MAC_ID_GURMUKHI, TT_MAC_LANGID_ANY, "x-mac-gurmukhi"},
36 	{TT_MAC_ID_GUJARATI, TT_MAC_LANGID_ANY, "x-mac-gujarati"},
37 	{TT_MAC_ID_TRADITIONAL_CHINESE, TT_MAC_LANGID_CHINESE_SIMPLIFIED,
38 	 "Big5"},
39 	{TT_MAC_ID_TRADITIONAL_CHINESE, TT_MAC_LANGID_ANY, "Big5"},
40 	{TT_MAC_ID_SIMPLIFIED_CHINESE, TT_MAC_LANGID_CHINESE_SIMPLIFIED,
41 	 "GB2312"},
42 	{TT_MAC_ID_SIMPLIFIED_CHINESE, TT_MAC_LANGID_ANY, "GB2312"}};
43 
44 const char *iso_codes[] = {"us-ascii", NULL, "iso-8859-1"};
45 
46 const char *ms_codes[] = {"UTF-16BE", "UTF-16BE", "Shift_JIS", NULL,
47 			  "Big5",     NULL,       NULL,        NULL,
48 			  NULL,       NULL,       "UTF-16BE"};
49 
50 static const size_t mac_code_count = sizeof(mac_codes) / sizeof(mac_codes[0]);
51 static const size_t iso_code_count = sizeof(iso_codes) / sizeof(iso_codes[0]);
52 static const size_t ms_code_count = sizeof(ms_codes) / sizeof(ms_codes[0]);
53 
get_mac_code(uint16_t encoding_id,uint16_t language_id)54 static const char *get_mac_code(uint16_t encoding_id, uint16_t language_id)
55 {
56 	for (size_t i = 0; i < mac_code_count; i++) {
57 		const struct mac_font_mapping *mac_code = &mac_codes[i];
58 
59 		if (mac_code->encoding_id == encoding_id &&
60 		    mac_code->language_id == language_id)
61 			return mac_code->code_page;
62 	}
63 
64 	return NULL;
65 }
66 
get_code_page_for_font(uint16_t platform_id,uint16_t encoding_id,uint16_t language_id)67 static const char *get_code_page_for_font(uint16_t platform_id,
68 					  uint16_t encoding_id,
69 					  uint16_t language_id)
70 {
71 	const char *ret;
72 
73 	switch (platform_id) {
74 	case TT_PLATFORM_APPLE_UNICODE:
75 		return "UTF-16BE";
76 	case TT_PLATFORM_MACINTOSH:
77 		ret = get_mac_code(encoding_id, language_id);
78 		if (!ret)
79 			ret = get_mac_code(encoding_id, TT_MAC_LANGID_ANY);
80 		return ret;
81 	case TT_PLATFORM_ISO:
82 		if (encoding_id < iso_code_count)
83 			return iso_codes[encoding_id];
84 		break;
85 	case TT_PLATFORM_MICROSOFT:
86 		if (encoding_id < ms_code_count)
87 			return ms_codes[encoding_id];
88 		break;
89 	}
90 
91 	return NULL;
92 }
93 
sfnt_name_to_utf8(FT_SfntName * sfnt_name)94 char *sfnt_name_to_utf8(FT_SfntName *sfnt_name)
95 {
96 	const char *charset = get_code_page_for_font(sfnt_name->platform_id,
97 						     sfnt_name->encoding_id,
98 						     sfnt_name->language_id);
99 	char utf8[256];
100 	char *conv_in, *conv_out;
101 	size_t in_len, out_len;
102 
103 	if (!charset) {
104 		blog(LOG_DEBUG,
105 		     "invalid character set found, "
106 		     "platform_id: %d, encoding_id: %d, "
107 		     "language_id: %d",
108 		     sfnt_name->platform_id, sfnt_name->encoding_id,
109 		     sfnt_name->language_id);
110 		return NULL;
111 	}
112 
113 	iconv_t ic = iconv_open("UTF-8", charset);
114 	if (ic == (iconv_t)-1) {
115 		blog(LOG_DEBUG,
116 		     "couldn't intialize font code page "
117 		     "conversion:  '%s' to 'utf-8': errno = %d",
118 		     charset, (int)errno);
119 		return NULL;
120 	}
121 
122 	conv_in = (char *)sfnt_name->string;
123 	conv_out = utf8;
124 	in_len = sfnt_name->string_len;
125 	out_len = 256;
126 
127 	size_t n = iconv(ic, &conv_in, &in_len, &conv_out, &out_len);
128 	if (n == (size_t)-1) {
129 		blog(LOG_WARNING, "couldn't convert font name text: errno = %d",
130 		     (int)errno);
131 		iconv_close(ic);
132 		return NULL;
133 	}
134 
135 	iconv_close(ic);
136 	*conv_out = 0;
137 	return bstrdup(utf8);
138 }
139