1 /* font-manager-orthography.c
2 *
3 * Copyright (C) 2009 - 2021 Jerry Casiano
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.
17 *
18 * If not, see <http://www.gnu.org/licenses/gpl-3.0.txt>.
19 */
20
21 #include "font-manager-orthography.h"
22
23 /**
24 * SECTION: font-manager-orthography
25 * @short_description: Font language support
26 * @title: Orthography
27 * @include: font-manager-orthography.h
28 *
29 * A #FontManagerOrthography holds information about the extent to which a
30 * font supports a particular language.
31 *
32 * In addition to the english name, it includes the untranslated name of the orthography
33 * along with a pangram or sample string for the language, if available.
34 */
35
36 #define N_ARABIC G_N_ELEMENTS(ArabicOrthographies)
37 #define N_CHINESE G_N_ELEMENTS(ChineseOrthographies)
38 #define N_GREEK G_N_ELEMENTS(GreekOrthographies)
39 #define N_JAPANESE G_N_ELEMENTS(JapaneseOrthographies)
40 #define N_KOREAN G_N_ELEMENTS(KoreanOrthographies)
41 #define N_LATIN G_N_ELEMENTS(LatinOrthographies)
42 #define N_MISC G_N_ELEMENTS(UncategorizedOrthographies)
43
44 #define GET_OBJECT(n) json_node_get_object((JsonNode *) n)
45 #define HAS_COVERAGE(n) JSON_NODE_HOLDS_OBJECT((JsonNode *) n) && json_object_has_member(GET_OBJECT(n), "coverage")
46 #define GET_COVERAGE(n) HAS_COVERAGE(n) ? json_object_get_double_member(GET_OBJECT(n), "coverage") : 0.0
47 #define LEN_CHARSET(n) json_array_get_length(json_object_get_array_member(GET_OBJECT(n), "filter"))
48
49 static GList *
charset_to_list(const FcCharSet * charset)50 charset_to_list (const FcCharSet *charset)
51 {
52 GList *result = NULL;
53 FcChar32 ucs4, pos;
54 FcChar32 map[FC_CHARSET_MAP_SIZE];
55
56 for (ucs4 = FcCharSetFirstPage (charset, map, &pos);
57 ucs4 != FC_CHARSET_DONE;
58 ucs4 = FcCharSetNextPage (charset, map, &pos)) {
59
60 for (int i = 0; i < FC_CHARSET_MAP_SIZE; i++) {
61 int b = 0;
62 FcChar32 bits = map[i];
63 FcChar32 base = ucs4 + i * 32;
64 while (bits) {
65 if (bits & 1) {
66 gunichar ch = (base + b);
67 if (unicode_unichar_isgraph(ch))
68 result = g_list_prepend(result, GINT_TO_POINTER(ch));
69 }
70 bits >>= 1;
71 b++;
72 }
73 }
74
75 }
76
77 return g_list_reverse(result);
78 }
79
80 static JsonArray *
charset_to_json_array(const FcCharSet * charset)81 charset_to_json_array (const FcCharSet *charset)
82 {
83 JsonArray *result = json_array_new();
84 FcChar32 ucs4, pos;
85 FcChar32 map[FC_CHARSET_MAP_SIZE];
86
87 for (ucs4 = FcCharSetFirstPage (charset, map, &pos);
88 ucs4 != FC_CHARSET_DONE;
89 ucs4 = FcCharSetNextPage (charset, map, &pos)) {
90
91 for (int i = 0; i < FC_CHARSET_MAP_SIZE; i++) {
92 int b = 0;
93 FcChar32 bits = map[i];
94 FcChar32 base = ucs4 + i * 32;
95 while (bits) {
96 if (bits & 1) {
97 gunichar ch = (base + b);
98 if (unicode_unichar_isgraph(ch))
99 json_array_add_int_element(result, ch);
100 }
101 bits >>= 1;
102 b++;
103 }
104 }
105
106 }
107
108 return result;
109 }
110
111 static FcCharSet *
get_fccharset_from_filepath(const gchar * filepath,int index)112 get_fccharset_from_filepath (const gchar *filepath, int index)
113 {
114 FT_Face face;
115 FT_Library library;
116 FT_Error error;
117
118 gsize filesize = 0;
119 g_autofree gchar *font = NULL;
120
121 FcCharSet *result = NULL;
122
123 if (G_UNLIKELY(!g_file_get_contents(filepath, &font, &filesize, NULL))) {
124 return result;
125 }
126
127 error = FT_Init_FreeType(&library);
128 if (G_UNLIKELY(error)) {
129 return result;
130 }
131
132 error = FT_New_Memory_Face(library, (const FT_Byte *) font,
133 (FT_Long) filesize, index, &face);
134
135 if (G_UNLIKELY(error)) {
136 return result;
137 }
138
139 FcBlanks *blanks = FcBlanksCreate();
140 result = FcFreeTypeCharSet(face, blanks);
141 FT_Done_Face(face);
142 FT_Done_FreeType(library);
143 FcBlanksDestroy(blanks);
144 return result;
145 }
146
147 static FcCharSet *
get_fccharset_from_font_object(JsonObject * font)148 get_fccharset_from_font_object (JsonObject *font)
149 {
150 int result = -1, index = json_object_get_int_member(font, "findex");
151 const gchar *filepath = json_object_get_string_member(font, "filepath");
152 FcPattern *pattern = FcPatternBuild(NULL,
153 FC_FILE, FcTypeString, filepath,
154 FC_INDEX, FcTypeInteger, index,
155 NULL);
156 FcObjectSet *objectset = FcObjectSetBuild(FC_CHARSET, NULL);
157 FcFontSet *fontset = FcFontList(NULL, pattern, objectset);
158 FcCharSet *charset = NULL;
159 if (fontset->nfont > 0)
160 result = FcPatternGetCharSet(fontset->fonts[0], FC_CHARSET, 0, &charset);
161 FcObjectSetDestroy(objectset);
162 FcPatternDestroy(pattern);
163 FcFontSetDestroy(fontset);
164 return result == FcResultMatch ? charset : get_fccharset_from_filepath(filepath, index);
165 }
166
167 static gint
sort_by_charset_size(gconstpointer a,gconstpointer b)168 sort_by_charset_size (gconstpointer a, gconstpointer b)
169 {
170 /* Using variables to avoid unused value warning */
171 gint len_a = LEN_CHARSET(a);
172 gint len_b = LEN_CHARSET(b);
173 return len_a - len_b;
174 }
175
176 static gint
sort_by_coverage(gconstpointer a,gconstpointer b)177 sort_by_coverage (gconstpointer a, gconstpointer b)
178 {
179 gint order = (int) GET_COVERAGE(a) - GET_COVERAGE(b);
180 return order != 0 ? order : sort_by_charset_size(a, b);
181 }
182
183 static gchar *
get_sample_from_charlist(GList * charset)184 get_sample_from_charlist (GList *charset)
185 {
186 GString *res = g_string_new(NULL);
187 guint length = g_list_length(charset);
188 if (length > 0)
189 for (int i = 0; i < 24; i++) {
190 int rand = g_random_int_range(0, length);
191 gunichar ch = GPOINTER_TO_INT(g_list_nth_data(charset, rand));
192 g_string_append_unichar(res, ch);
193 }
194 return g_string_free(res, FALSE);
195 }
196
197 static gchar *
get_sample_from_charset(FcCharSet * charset)198 get_sample_from_charset (FcCharSet *charset)
199 {
200 GList *charlist = charset_to_list(charset);
201 gchar *res = get_sample_from_charlist(charlist);
202 g_list_free(charlist);
203 return res;
204 }
205
206 static JsonObject *
get_default_orthography(JsonObject * orthography)207 get_default_orthography (JsonObject *orthography)
208 {
209 GList *orthographies = json_object_get_values(orthography);
210 JsonObject *res = NULL;
211 if (g_list_length(orthographies) > 0) {
212 orthographies = g_list_sort(orthographies, sort_by_coverage);
213 res = json_node_get_object(g_list_nth_data(orthographies, 0));
214 }
215 g_list_free(orthographies);
216 return res;
217 }
218
219 static double
get_coverage_from_charset(JsonObject * results,FcCharSet * charset,const FontManagerOrthographyData * data)220 get_coverage_from_charset (JsonObject *results,
221 FcCharSet *charset,
222 const FontManagerOrthographyData *data)
223 {
224 int hits = 0, tries = 0;
225 JsonArray *filter = NULL;
226
227 /* If it doesn't contain key there's no point in going further */
228 if (!FcCharSetHasChar(charset, data->key))
229 return 0;
230
231 if (results)
232 filter = json_array_new();
233
234 for (int i = 0; data->values[i] != FONT_MANAGER_END_OF_DATA; i++) {
235
236 if (data->values[i] == FONT_MANAGER_START_RANGE_PAIR) {
237
238 gunichar start = data->values[++i];
239 gunichar end = data->values[++i];
240
241 for (gunichar ch = start; ch <= end; ch++) {
242 tries++;
243 if (FcCharSetHasChar(charset, ch))
244 hits++;
245 if (results)
246 json_array_add_int_element(filter, (int) ch);
247 }
248
249 } else {
250
251 tries++;
252 if (FcCharSetHasChar(charset, data->values[i]))
253 hits++;
254 if (results)
255 json_array_add_int_element(filter, (int) data->values[i]);
256
257 }
258
259 }
260
261 if (results)
262 json_object_set_array_member(results, "filter", filter);
263
264 return ((double) 100 * hits/tries );
265 }
266
267 static gboolean
check_orthography(JsonObject * results,FcCharSet * charset,const FontManagerOrthographyData * data)268 check_orthography (JsonObject *results,
269 FcCharSet *charset,
270 const FontManagerOrthographyData *data)
271 {
272 g_autoptr(JsonObject) res = NULL;
273 if (results)
274 res = json_object_new();
275 double coverage = get_coverage_from_charset(res, charset, data);
276 if (coverage == 0)
277 return FALSE;
278 if (!results)
279 return TRUE;
280 json_object_set_string_member(res, "name", data->name);
281 json_object_set_string_member(res, "native", data->native);
282 json_object_set_string_member(res, "sample", data->sample);
283 json_object_set_double_member(res, "coverage", coverage);
284 json_object_set_object_member(results, data->name, json_object_ref(res));
285 return TRUE;
286 }
287
288 static gboolean
charlist_contains_sample_string(GList * charlist,const char * sample)289 charlist_contains_sample_string (GList *charlist, const char *sample)
290 {
291 const char *p = sample;
292 while (*p) {
293 gunichar ch = g_utf8_get_char(p);
294 if (!g_list_find(charlist, GINT_TO_POINTER(ch)))
295 return FALSE;
296 p = g_utf8_next_char(p);
297 }
298 return TRUE;
299 }
300
301 static gboolean
charset_contains_sample_string(const FcCharSet * charset,const char * sample)302 charset_contains_sample_string (const FcCharSet *charset, const char *sample)
303 {
304 const char *p = sample;
305 while (*p) {
306 gunichar ch = g_utf8_get_char(p);
307 if (!FcCharSetHasChar(charset, ch))
308 return FALSE;
309 p = g_utf8_next_char(p);
310 }
311 return TRUE;
312 }
313
314 static void
check_orthographies(JsonObject * results,FcCharSet * charset,const FontManagerOrthographyData orth[],int len)315 check_orthographies (JsonObject *results,
316 FcCharSet *charset,
317 const FontManagerOrthographyData orth[],
318 int len)
319 {
320 for (int i = 0; i < len; i++)
321 check_orthography(results, charset, &orth[i]);
322 return;
323 }
324
325 static gchar *
get_default_sample_string_for_orthography(JsonObject * orthography)326 get_default_sample_string_for_orthography (JsonObject *orthography)
327 {
328 if (json_object_has_member(orthography, "Basic Latin")) {
329 JsonObject *latin = json_object_get_object_member(orthography, "Basic Latin");
330 if (json_object_get_double_member(latin, "coverage") > 90) {
331 PangoLanguage *xx = pango_language_from_string("xx");
332 return g_strdup(pango_language_get_sample_string(xx));
333 }
334 }
335
336 if (json_object_get_size(orthography) > 0) {
337 JsonObject *def = get_default_orthography(orthography);
338 if (def && json_object_get_double_member(def, "coverage") > 90) {
339 const gchar *sample = NULL;
340 if (json_object_has_member(orthography, "sample"))
341 sample = json_object_get_string_member(orthography, "sample");
342 if (sample != NULL && g_strcmp0(sample, "") != 0)
343 return g_strdup(sample);
344 }
345 }
346
347 return NULL;
348 }
349
350 static gchar *
font_manager_get_sample_string(JsonObject * orthography,FcCharSet * charset)351 font_manager_get_sample_string (JsonObject *orthography, FcCharSet *charset)
352 {
353 const char *local_sample = pango_language_get_sample_string(NULL);
354 if (charset_contains_sample_string(charset, local_sample))
355 return NULL;
356 gchar *sample = get_default_sample_string_for_orthography(orthography);
357 return sample ? sample : get_sample_from_charset(charset);
358 }
359
360 /**
361 * font_manager_get_orthography_results:
362 * @font: (nullable) (transfer none): #JsonObject
363 *
364 * The #JsonObject returned will have the following structure:
365 *
366 *|[
367 * {
368 * "Basic Latin": {
369 * "filter": [65, 66, ... 122],
370 * "name": "Basic Latin",
371 * "native": "Basic Latin",
372 * "sample": "AaBbCcGgQqRrSsZz",
373 * "coverage": 100.0
374 * },
375 * ...,
376 * "sample" : null
377 * }
378 *]|
379 *
380 * The returned object contains a member for each orthography detected in @font.
381 *
382 * sample will be set to %NULL if the font supports rendering the sample string returned
383 * by #font_manager_get_localized_pangram, otherwise sample will be set to the
384 * sample string from the member with the highest coverage, if that should fail then
385 * sample will be set to a string randomly generated from the characters available in @font.
386 *
387 * Returns: (nullable) (transfer full): #JsonObject containing orthography results
388 */
389 JsonObject *
font_manager_get_orthography_results(JsonObject * font)390 font_manager_get_orthography_results (JsonObject *font)
391 {
392 FcCharSet *charset = NULL;
393 JsonObject *results = json_object_new();
394
395 if (font)
396 charset = get_fccharset_from_font_object(font);
397
398 if (charset) {
399 if (check_orthography(NULL, charset, LatinOrthographies))
400 check_orthographies(results, charset, LatinOrthographies, N_LATIN);
401
402 if (check_orthography(NULL, charset, GreekOrthographies))
403 check_orthographies(results, charset, GreekOrthographies, N_GREEK);
404
405 if (check_orthography(NULL, charset, ArabicOrthographies))
406 check_orthographies(results, charset, ArabicOrthographies, N_ARABIC);
407
408 check_orthographies(results, charset, ChineseOrthographies, N_CHINESE);
409 check_orthographies(results, charset, JapaneseOrthographies, N_JAPANESE);
410 check_orthographies(results, charset, KoreanOrthographies, N_KOREAN);
411 check_orthographies(results, charset, UncategorizedOrthographies, N_MISC);
412 }
413
414 if (charset && FcCharSetCount(charset) > 0) {
415
416 if (json_object_get_size(results) == 0) {
417 JsonObject *uncategorized = json_object_new();
418 JsonArray *char_array = charset_to_json_array(charset);
419 json_object_set_string_member(uncategorized, "name", "Uncategorized");
420 json_object_set_double_member(uncategorized, "coverage", 100);
421 json_object_set_array_member(uncategorized, "filter", char_array);
422 json_object_set_object_member(results, "Uncategorized", uncategorized);
423 }
424
425 g_autofree gchar *sample = font_manager_get_sample_string(results, charset);
426 json_object_set_string_member(results, "sample", sample);
427
428 } else {
429
430 json_object_set_string_member(results, "sample", NULL);
431
432 }
433
434 return results;
435 }
436
437 /**
438 * font_manager_get_sample_string_for_orthography:
439 * @orthography: #JsonObject containing orthography results
440 * @charset: (nullable) (transfer none) (element-type uint): #GList of unichar or %NULL
441 *
442 * @orthography should be one of the members of the object returned
443 * by #font_manager_get_orthography_results()
444 *
445 * Returns: (nullable) (transfer full): a sample string for the given orthography/charset
446 * or %NULL if the systems default language is supported
447 */
448 gchar *
font_manager_get_sample_string_for_orthography(JsonObject * orthography,GList * charset)449 font_manager_get_sample_string_for_orthography (JsonObject *orthography, GList *charset)
450 {
451 const char *local_sample = pango_language_get_sample_string(NULL);
452 if (charlist_contains_sample_string(charset, local_sample))
453 return NULL;
454 gchar *sample = get_default_sample_string_for_orthography(orthography);
455 return sample ? sample : get_sample_from_charlist(charset);
456 }
457
458 #define PROPERTIES OrthographyProperties
459 #define N_PROPERTIES G_N_ELEMENTS(PROPERTIES)
460 static GParamSpec *obj_properties[N_PROPERTIES] = {0};
461
462 struct _FontManagerOrthography
463 {
464 GObjectClass parent_class;
465 };
466
G_DEFINE_TYPE(FontManagerOrthography,font_manager_orthography,FONT_MANAGER_TYPE_JSON_PROXY)467 G_DEFINE_TYPE(FontManagerOrthography, font_manager_orthography, FONT_MANAGER_TYPE_JSON_PROXY)
468
469 static void
470 font_manager_orthography_class_init (FontManagerOrthographyClass *klass)
471 {
472 GObjectClass *object_class = G_OBJECT_CLASS(klass);
473 GObjectClass *parent_class = G_OBJECT_CLASS(font_manager_orthography_parent_class);
474 object_class->get_property = parent_class->get_property;
475 object_class->set_property = parent_class->set_property;
476 font_manager_json_proxy_generate_properties(obj_properties, PROPERTIES, N_PROPERTIES);
477 g_object_class_install_properties(object_class, N_PROPERTIES, obj_properties);
478 return;
479 }
480
481 static void
font_manager_orthography_init(G_GNUC_UNUSED FontManagerOrthography * self)482 font_manager_orthography_init (G_GNUC_UNUSED FontManagerOrthography *self)
483 {
484 g_return_if_fail(self != NULL);
485 }
486
487 /**
488 * font_manager_orthography_get_filter:
489 * @self: #FontManagerOrthography
490 *
491 * Returns: (element-type uint) (transfer container) (nullable): #GList containing codepoints.
492 * Free the returned #GList using #g_list_free().
493 */
494 GList *
font_manager_orthography_get_filter(FontManagerOrthography * self)495 font_manager_orthography_get_filter (FontManagerOrthography *self)
496 {
497 g_return_val_if_fail(self != NULL, NULL);
498 GList *charlist = NULL;
499 g_autoptr(JsonObject) source = NULL;
500 g_object_get(self, FONT_MANAGER_JSON_PROXY_SOURCE, &source, NULL);
501 g_return_val_if_fail(source != NULL, charlist);
502 if (json_object_has_member(source, "filter")) {
503 JsonArray *arr = json_object_get_array_member(source, "filter");
504 guint arr_length = json_array_get_length(arr);
505 for (guint index = 0; index < arr_length; index++) {
506 gunichar uc = (gunichar) json_array_get_int_element(arr, index);
507 charlist = g_list_prepend(charlist, GINT_TO_POINTER(uc));
508 }
509 charlist = g_list_reverse(charlist);
510 }
511 return charlist;
512 }
513
514 /**
515 * font_manager_orthography_new:
516 * @orthography: #JsonObject containing orthography results
517 *
518 * @orthography should be one of the members of the object returned
519 * by #font_manager_get_orthography_results()
520 *
521 * Returns: (transfer full): A newly created #FontManagerOrthography.
522 * Free the returned object using #g_object_unref().
523 */
524 FontManagerOrthography *
font_manager_orthography_new(JsonObject * orthography)525 font_manager_orthography_new (JsonObject *orthography)
526 {
527 return g_object_new(FONT_MANAGER_TYPE_ORTHOGRAPHY, FONT_MANAGER_JSON_PROXY_SOURCE, orthography, NULL);
528 }
529