1 /* font-manager-orthography.c
2  *
3  * Copyright (C) 2009 - 2021 Jerry Casiano
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.
17  *
18  * If not, see <http://www.gnu.org/licenses/gpl-3.0.txt>.
19 */
20 
21 #include "font-manager-orthography.h"
22 
23 /**
24  * SECTION: font-manager-orthography
25  * @short_description: Font language support
26  * @title: Orthography
27  * @include: font-manager-orthography.h
28  *
29  * A #FontManagerOrthography holds information about the extent to which a
30  * font supports a particular language.
31  *
32  * In addition to the english name, it includes the untranslated name of the orthography
33  * along with a pangram or sample string for the language, if available.
34  */
35 
36 #define N_ARABIC G_N_ELEMENTS(ArabicOrthographies)
37 #define N_CHINESE G_N_ELEMENTS(ChineseOrthographies)
38 #define N_GREEK G_N_ELEMENTS(GreekOrthographies)
39 #define N_JAPANESE G_N_ELEMENTS(JapaneseOrthographies)
40 #define N_KOREAN G_N_ELEMENTS(KoreanOrthographies)
41 #define N_LATIN G_N_ELEMENTS(LatinOrthographies)
42 #define N_MISC G_N_ELEMENTS(UncategorizedOrthographies)
43 
44 #define GET_OBJECT(n) json_node_get_object((JsonNode *) n)
45 #define HAS_COVERAGE(n) JSON_NODE_HOLDS_OBJECT((JsonNode *) n)  && json_object_has_member(GET_OBJECT(n), "coverage")
46 #define GET_COVERAGE(n) HAS_COVERAGE(n) ? json_object_get_double_member(GET_OBJECT(n), "coverage") : 0.0
47 #define LEN_CHARSET(n) json_array_get_length(json_object_get_array_member(GET_OBJECT(n), "filter"))
48 
49 static GList *
charset_to_list(const FcCharSet * charset)50 charset_to_list (const FcCharSet *charset)
51 {
52     GList *result = NULL;
53     FcChar32  ucs4, pos;
54     FcChar32  map[FC_CHARSET_MAP_SIZE];
55 
56     for (ucs4 = FcCharSetFirstPage (charset, map, &pos);
57          ucs4 != FC_CHARSET_DONE;
58          ucs4 = FcCharSetNextPage (charset, map, &pos)) {
59 
60         for (int i = 0; i < FC_CHARSET_MAP_SIZE; i++) {
61             int b = 0;
62             FcChar32 bits = map[i];
63             FcChar32 base = ucs4 + i * 32;
64             while (bits) {
65                 if (bits & 1) {
66                     gunichar ch = (base + b);
67                     if (unicode_unichar_isgraph(ch))
68                         result = g_list_prepend(result, GINT_TO_POINTER(ch));
69                 }
70                 bits >>= 1;
71                 b++;
72             }
73         }
74 
75     }
76 
77     return g_list_reverse(result);
78 }
79 
80 static JsonArray *
charset_to_json_array(const FcCharSet * charset)81 charset_to_json_array (const FcCharSet *charset)
82 {
83     JsonArray *result = json_array_new();
84     FcChar32  ucs4, pos;
85     FcChar32  map[FC_CHARSET_MAP_SIZE];
86 
87     for (ucs4 = FcCharSetFirstPage (charset, map, &pos);
88          ucs4 != FC_CHARSET_DONE;
89          ucs4 = FcCharSetNextPage (charset, map, &pos)) {
90 
91         for (int i = 0; i < FC_CHARSET_MAP_SIZE; i++) {
92             int b = 0;
93             FcChar32 bits = map[i];
94             FcChar32 base = ucs4 + i * 32;
95             while (bits) {
96                 if (bits & 1) {
97                     gunichar ch = (base + b);
98                     if (unicode_unichar_isgraph(ch))
99                         json_array_add_int_element(result, ch);
100                 }
101                 bits >>= 1;
102                 b++;
103             }
104         }
105 
106     }
107 
108     return result;
109 }
110 
111 static FcCharSet *
get_fccharset_from_filepath(const gchar * filepath,int index)112 get_fccharset_from_filepath (const gchar *filepath, int index)
113 {
114     FT_Face         face;
115     FT_Library      library;
116     FT_Error         error;
117 
118     gsize           filesize = 0;
119     g_autofree gchar *font = NULL;
120 
121     FcCharSet *result = NULL;
122 
123     if (G_UNLIKELY(!g_file_get_contents(filepath, &font, &filesize, NULL))) {
124         return result;
125     }
126 
127     error = FT_Init_FreeType(&library);
128     if (G_UNLIKELY(error)) {
129         return result;
130     }
131 
132     error = FT_New_Memory_Face(library, (const FT_Byte *) font,
133                                (FT_Long) filesize, index, &face);
134 
135     if (G_UNLIKELY(error)) {
136         return result;
137     }
138 
139     FcBlanks *blanks = FcBlanksCreate();
140     result = FcFreeTypeCharSet(face, blanks);
141     FT_Done_Face(face);
142     FT_Done_FreeType(library);
143     FcBlanksDestroy(blanks);
144     return result;
145 }
146 
147 static FcCharSet *
get_fccharset_from_font_object(JsonObject * font)148 get_fccharset_from_font_object (JsonObject *font)
149 {
150     int result = -1, index = json_object_get_int_member(font, "findex");
151     const gchar *filepath = json_object_get_string_member(font, "filepath");
152     FcPattern *pattern = FcPatternBuild(NULL,
153                                          FC_FILE, FcTypeString, filepath,
154                                          FC_INDEX, FcTypeInteger, index,
155                                          NULL);
156     FcObjectSet *objectset = FcObjectSetBuild(FC_CHARSET, NULL);
157     FcFontSet *fontset = FcFontList(NULL, pattern, objectset);
158     FcCharSet *charset = NULL;
159     if (fontset->nfont > 0)
160         result = FcPatternGetCharSet(fontset->fonts[0], FC_CHARSET, 0, &charset);
161     FcObjectSetDestroy(objectset);
162     FcPatternDestroy(pattern);
163     FcFontSetDestroy(fontset);
164     return result == FcResultMatch ? charset : get_fccharset_from_filepath(filepath, index);
165 }
166 
167 static gint
sort_by_charset_size(gconstpointer a,gconstpointer b)168 sort_by_charset_size (gconstpointer a, gconstpointer b)
169 {
170     /* Using variables to avoid unused value warning */
171     gint len_a = LEN_CHARSET(a);
172     gint len_b = LEN_CHARSET(b);
173     return len_a - len_b;
174 }
175 
176 static gint
sort_by_coverage(gconstpointer a,gconstpointer b)177 sort_by_coverage (gconstpointer a, gconstpointer b)
178 {
179     gint order = (int) GET_COVERAGE(a) - GET_COVERAGE(b);
180     return order != 0 ? order : sort_by_charset_size(a, b);
181 }
182 
183 static gchar *
get_sample_from_charlist(GList * charset)184 get_sample_from_charlist (GList *charset)
185 {
186     GString *res = g_string_new(NULL);
187     guint length = g_list_length(charset);
188     if (length > 0)
189         for (int i = 0; i < 24; i++) {
190             int rand = g_random_int_range(0, length);
191             gunichar ch = GPOINTER_TO_INT(g_list_nth_data(charset, rand));
192             g_string_append_unichar(res, ch);
193         }
194     return g_string_free(res, FALSE);
195 }
196 
197 static gchar *
get_sample_from_charset(FcCharSet * charset)198 get_sample_from_charset (FcCharSet *charset)
199 {
200     GList *charlist = charset_to_list(charset);
201     gchar *res = get_sample_from_charlist(charlist);
202     g_list_free(charlist);
203     return res;
204 }
205 
206 static JsonObject *
get_default_orthography(JsonObject * orthography)207 get_default_orthography (JsonObject *orthography)
208 {
209     GList *orthographies = json_object_get_values(orthography);
210     JsonObject *res = NULL;
211     if (g_list_length(orthographies) > 0) {
212         orthographies = g_list_sort(orthographies, sort_by_coverage);
213         res = json_node_get_object(g_list_nth_data(orthographies, 0));
214     }
215     g_list_free(orthographies);
216     return res;
217 }
218 
219 static double
get_coverage_from_charset(JsonObject * results,FcCharSet * charset,const FontManagerOrthographyData * data)220 get_coverage_from_charset (JsonObject *results,
221                            FcCharSet *charset,
222                            const FontManagerOrthographyData *data)
223 {
224     int hits = 0, tries = 0;
225     JsonArray *filter = NULL;
226 
227     /* If it doesn't contain key there's no point in going further */
228     if (!FcCharSetHasChar(charset, data->key))
229         return 0;
230 
231     if (results)
232         filter = json_array_new();
233 
234     for (int i = 0; data->values[i] != FONT_MANAGER_END_OF_DATA; i++) {
235 
236         if (data->values[i] == FONT_MANAGER_START_RANGE_PAIR) {
237 
238             gunichar start = data->values[++i];
239             gunichar end = data->values[++i];
240 
241             for (gunichar ch = start; ch <= end; ch++) {
242                 tries++;
243                 if (FcCharSetHasChar(charset, ch))
244                     hits++;
245                 if (results)
246                     json_array_add_int_element(filter, (int) ch);
247             }
248 
249         } else {
250 
251             tries++;
252             if (FcCharSetHasChar(charset, data->values[i]))
253                 hits++;
254             if (results)
255                 json_array_add_int_element(filter, (int) data->values[i]);
256 
257         }
258 
259     }
260 
261     if (results)
262         json_object_set_array_member(results, "filter", filter);
263 
264     return ((double) 100 * hits/tries );
265 }
266 
267 static gboolean
check_orthography(JsonObject * results,FcCharSet * charset,const FontManagerOrthographyData * data)268 check_orthography (JsonObject *results,
269                    FcCharSet *charset,
270                    const FontManagerOrthographyData *data)
271 {
272     g_autoptr(JsonObject) res = NULL;
273     if (results)
274         res = json_object_new();
275     double coverage = get_coverage_from_charset(res, charset, data);
276     if (coverage == 0)
277         return FALSE;
278     if (!results)
279         return TRUE;
280     json_object_set_string_member(res, "name", data->name);
281     json_object_set_string_member(res, "native", data->native);
282     json_object_set_string_member(res, "sample", data->sample);
283     json_object_set_double_member(res, "coverage", coverage);
284     json_object_set_object_member(results, data->name, json_object_ref(res));
285     return TRUE;
286 }
287 
288 static gboolean
charlist_contains_sample_string(GList * charlist,const char * sample)289 charlist_contains_sample_string (GList *charlist, const char *sample)
290 {
291     const char *p = sample;
292     while (*p) {
293         gunichar ch = g_utf8_get_char(p);
294         if (!g_list_find(charlist, GINT_TO_POINTER(ch)))
295             return FALSE;
296         p = g_utf8_next_char(p);
297     }
298     return TRUE;
299 }
300 
301 static gboolean
charset_contains_sample_string(const FcCharSet * charset,const char * sample)302 charset_contains_sample_string (const FcCharSet *charset, const char *sample)
303 {
304     const char *p = sample;
305     while (*p) {
306         gunichar ch = g_utf8_get_char(p);
307         if (!FcCharSetHasChar(charset, ch))
308             return FALSE;
309         p = g_utf8_next_char(p);
310     }
311     return TRUE;
312 }
313 
314 static void
check_orthographies(JsonObject * results,FcCharSet * charset,const FontManagerOrthographyData orth[],int len)315 check_orthographies (JsonObject *results,
316                      FcCharSet *charset,
317                      const FontManagerOrthographyData orth[],
318                      int len)
319 {
320     for (int i = 0; i < len; i++)
321         check_orthography(results, charset, &orth[i]);
322     return;
323 }
324 
325 static gchar *
get_default_sample_string_for_orthography(JsonObject * orthography)326 get_default_sample_string_for_orthography (JsonObject *orthography)
327 {
328     if (json_object_has_member(orthography, "Basic Latin")) {
329         JsonObject *latin = json_object_get_object_member(orthography, "Basic Latin");
330         if (json_object_get_double_member(latin, "coverage") > 90) {
331             PangoLanguage *xx = pango_language_from_string("xx");
332             return g_strdup(pango_language_get_sample_string(xx));
333         }
334     }
335 
336     if (json_object_get_size(orthography) > 0) {
337         JsonObject *def = get_default_orthography(orthography);
338         if (def && json_object_get_double_member(def, "coverage") > 90) {
339             const gchar *sample = NULL;
340             if (json_object_has_member(orthography, "sample"))
341                 sample = json_object_get_string_member(orthography, "sample");
342             if (sample != NULL && g_strcmp0(sample, "") != 0)
343                 return g_strdup(sample);
344         }
345     }
346 
347     return NULL;
348 }
349 
350 static gchar *
font_manager_get_sample_string(JsonObject * orthography,FcCharSet * charset)351 font_manager_get_sample_string (JsonObject *orthography, FcCharSet *charset)
352 {
353     const char *local_sample = pango_language_get_sample_string(NULL);
354     if (charset_contains_sample_string(charset, local_sample))
355         return NULL;
356     gchar *sample = get_default_sample_string_for_orthography(orthography);
357     return sample ? sample : get_sample_from_charset(charset);
358 }
359 
360 /**
361  * font_manager_get_orthography_results:
362  * @font: (nullable) (transfer none): #JsonObject
363  *
364  * The #JsonObject returned will have the following structure:
365  *
366  *|[
367  * {
368  *   "Basic Latin": {
369  *     "filter": [65, 66, ... 122],
370  *     "name": "Basic Latin",
371  *     "native": "Basic Latin",
372  *     "sample": "AaBbCcGgQqRrSsZz",
373  *     "coverage": 100.0
374  *   },
375  *   ...,
376  *   "sample" : null
377  * }
378  *]|
379  *
380  * The returned object contains a member for each orthography detected in @font.
381  *
382  * sample will be set to %NULL if the font supports rendering the sample string returned
383  * by #font_manager_get_localized_pangram, otherwise sample will be set to the
384  * sample string from the member with the highest coverage, if that should fail then
385  * sample will be set to a string randomly generated from the characters available in @font.
386  *
387  * Returns: (nullable) (transfer full): #JsonObject containing orthography results
388  */
389 JsonObject *
font_manager_get_orthography_results(JsonObject * font)390 font_manager_get_orthography_results (JsonObject *font)
391 {
392     FcCharSet *charset = NULL;
393     JsonObject *results = json_object_new();
394 
395     if (font)
396         charset = get_fccharset_from_font_object(font);
397 
398     if (charset) {
399         if (check_orthography(NULL, charset, LatinOrthographies))
400             check_orthographies(results, charset, LatinOrthographies, N_LATIN);
401 
402         if (check_orthography(NULL, charset, GreekOrthographies))
403             check_orthographies(results, charset, GreekOrthographies, N_GREEK);
404 
405         if (check_orthography(NULL, charset, ArabicOrthographies))
406             check_orthographies(results, charset, ArabicOrthographies, N_ARABIC);
407 
408         check_orthographies(results, charset, ChineseOrthographies, N_CHINESE);
409         check_orthographies(results, charset, JapaneseOrthographies, N_JAPANESE);
410         check_orthographies(results, charset, KoreanOrthographies, N_KOREAN);
411         check_orthographies(results, charset, UncategorizedOrthographies, N_MISC);
412     }
413 
414     if (charset && FcCharSetCount(charset) > 0) {
415 
416         if (json_object_get_size(results) == 0) {
417             JsonObject *uncategorized = json_object_new();
418             JsonArray *char_array = charset_to_json_array(charset);
419             json_object_set_string_member(uncategorized, "name", "Uncategorized");
420             json_object_set_double_member(uncategorized, "coverage", 100);
421             json_object_set_array_member(uncategorized, "filter", char_array);
422             json_object_set_object_member(results, "Uncategorized", uncategorized);
423         }
424 
425         g_autofree gchar *sample = font_manager_get_sample_string(results, charset);
426         json_object_set_string_member(results, "sample", sample);
427 
428     } else {
429 
430         json_object_set_string_member(results, "sample", NULL);
431 
432     }
433 
434     return results;
435 }
436 
437 /**
438  * font_manager_get_sample_string_for_orthography:
439  * @orthography: #JsonObject containing orthography results
440  * @charset: (nullable) (transfer none) (element-type uint): #GList of unichar or %NULL
441  *
442  * @orthography should be one of the members of the object returned
443  * by #font_manager_get_orthography_results()
444  *
445  * Returns: (nullable) (transfer full): a sample string for the given orthography/charset
446  *                                      or %NULL if the systems default language is supported
447  */
448 gchar *
font_manager_get_sample_string_for_orthography(JsonObject * orthography,GList * charset)449 font_manager_get_sample_string_for_orthography (JsonObject *orthography, GList *charset)
450 {
451     const char *local_sample = pango_language_get_sample_string(NULL);
452     if (charlist_contains_sample_string(charset, local_sample))
453         return NULL;
454     gchar *sample = get_default_sample_string_for_orthography(orthography);
455     return sample ? sample : get_sample_from_charlist(charset);
456 }
457 
458 #define PROPERTIES OrthographyProperties
459 #define N_PROPERTIES G_N_ELEMENTS(PROPERTIES)
460 static GParamSpec *obj_properties[N_PROPERTIES] = {0};
461 
462 struct _FontManagerOrthography
463 {
464     GObjectClass parent_class;
465 };
466 
G_DEFINE_TYPE(FontManagerOrthography,font_manager_orthography,FONT_MANAGER_TYPE_JSON_PROXY)467 G_DEFINE_TYPE(FontManagerOrthography, font_manager_orthography, FONT_MANAGER_TYPE_JSON_PROXY)
468 
469 static void
470 font_manager_orthography_class_init (FontManagerOrthographyClass *klass)
471 {
472     GObjectClass *object_class = G_OBJECT_CLASS(klass);
473     GObjectClass *parent_class = G_OBJECT_CLASS(font_manager_orthography_parent_class);
474     object_class->get_property = parent_class->get_property;
475     object_class->set_property = parent_class->set_property;
476     font_manager_json_proxy_generate_properties(obj_properties, PROPERTIES, N_PROPERTIES);
477     g_object_class_install_properties(object_class, N_PROPERTIES, obj_properties);
478     return;
479 }
480 
481 static void
font_manager_orthography_init(G_GNUC_UNUSED FontManagerOrthography * self)482 font_manager_orthography_init (G_GNUC_UNUSED FontManagerOrthography *self)
483 {
484     g_return_if_fail(self != NULL);
485 }
486 
487 /**
488  * font_manager_orthography_get_filter:
489  * @self: #FontManagerOrthography
490  *
491  * Returns: (element-type uint) (transfer container) (nullable): #GList containing codepoints.
492  * Free the returned #GList using #g_list_free().
493  */
494 GList *
font_manager_orthography_get_filter(FontManagerOrthography * self)495 font_manager_orthography_get_filter (FontManagerOrthography *self)
496 {
497     g_return_val_if_fail(self != NULL, NULL);
498     GList *charlist = NULL;
499     g_autoptr(JsonObject) source = NULL;
500     g_object_get(self, FONT_MANAGER_JSON_PROXY_SOURCE, &source, NULL);
501     g_return_val_if_fail(source != NULL, charlist);
502     if (json_object_has_member(source, "filter")) {
503         JsonArray *arr = json_object_get_array_member(source, "filter");
504         guint arr_length = json_array_get_length(arr);
505         for (guint index = 0; index < arr_length; index++) {
506             gunichar uc = (gunichar) json_array_get_int_element(arr, index);
507             charlist = g_list_prepend(charlist, GINT_TO_POINTER(uc));
508         }
509         charlist = g_list_reverse(charlist);
510     }
511     return charlist;
512 }
513 
514 /**
515  * font_manager_orthography_new:
516  * @orthography:    #JsonObject containing orthography results
517  *
518  * @orthography should be one of the members of the object returned
519  * by #font_manager_get_orthography_results()
520  *
521  * Returns: (transfer full): A newly created #FontManagerOrthography.
522  * Free the returned object using #g_object_unref().
523  */
524 FontManagerOrthography *
font_manager_orthography_new(JsonObject * orthography)525 font_manager_orthography_new (JsonObject *orthography)
526 {
527     return g_object_new(FONT_MANAGER_TYPE_ORTHOGRAPHY, FONT_MANAGER_JSON_PROXY_SOURCE, orthography, NULL);
528 }
529