1 /* GIMP - The GNU Image Manipulation Program
2 * Copyright (C) 1995 Spencer Kimball and Peter Mattis
3 *
4 * gimplanguagestore-parser.c
5 * Copyright (C) 2008, 2009 Sven Neumann <sven@gimp.org>
6 * Copyright (C) 2013 Jehan <jehan at girinstud.io>
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include "config.h"
23
24 #include <locale.h>
25 #include <string.h>
26
27 #include <gtk/gtk.h>
28
29 #include "libgimpbase/gimpbase.h"
30
31 #include "widgets-types.h"
32
33 #include "config/gimpxmlparser.h"
34
35 #include "gimplanguagestore.h"
36 #include "gimplanguagestore-parser.h"
37
38 #include "gimp-intl.h"
39
40
41 typedef enum
42 {
43 ISO_CODES_START,
44 ISO_CODES_IN_ENTRIES,
45 ISO_CODES_IN_ENTRY,
46 ISO_CODES_IN_UNKNOWN
47 } IsoCodesParserState;
48
49 typedef struct
50 {
51 IsoCodesParserState state;
52 IsoCodesParserState last_known_state;
53 gint unknown_depth;
54 GHashTable *base_lang_list;
55 } IsoCodesParser;
56
57
58 static gboolean parse_iso_codes (GHashTable *base_lang_list,
59 GError **error);
60
61 #ifdef HAVE_ISO_CODES
62 static void iso_codes_parser_init (void);
63 static void iso_codes_parser_start_element (GMarkupParseContext *context,
64 const gchar *element_name,
65 const gchar **attribute_names,
66 const gchar **attribute_values,
67 gpointer user_data,
68 GError **error);
69 static void iso_codes_parser_end_element (GMarkupParseContext *context,
70 const gchar *element_name,
71 gpointer user_data,
72 GError **error);
73
74 static void iso_codes_parser_start_unknown (IsoCodesParser *parser);
75 static void iso_codes_parser_end_unknown (IsoCodesParser *parser);
76 #endif /* HAVE_ISO_CODES */
77
78 /*
79 * Language lists that we want to generate only once at program startup:
80 * @l10n_lang_list: all available localizations self-localized;
81 * @all_lang_list: all known languages, in the user-selected language.
82 */
83 static GHashTable *l10n_lang_list = NULL;
84 static GHashTable *all_lang_list = NULL;
85
86 /********************\
87 * Public Functions *
88 \********************/
89
90 /*
91 * Initialize and run the language listing parser. This call must be
92 * made only once, at program initialization, but after language_init().
93 */
94 void
gimp_language_store_parser_init(void)95 gimp_language_store_parser_init (void)
96 {
97 GHashTable *base_lang_list;
98 gchar *current_env;
99 GDir *locales_dir;
100 GError *error = NULL;
101 GHashTableIter lang_iter;
102 gpointer key;
103
104 if (l10n_lang_list != NULL)
105 {
106 g_warning ("gimp_language_store_parser_init() must be run only once.");
107 return;
108 }
109
110 current_env = g_strdup (g_getenv ("LANGUAGE"));
111
112 l10n_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
113 (GDestroyNotify) g_free,
114 (GDestroyNotify) g_free);
115 all_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
116 (GDestroyNotify) g_free,
117 (GDestroyNotify) g_free);
118 base_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
119 (GDestroyNotify) g_free,
120 (GDestroyNotify) g_free);
121
122 /* Check all locales we have translations for. */
123 locales_dir = g_dir_open (gimp_locale_directory (), 0, NULL);
124 if (locales_dir)
125 {
126 const gchar *locale;
127
128 while ((locale = g_dir_read_name (locales_dir)) != NULL)
129 {
130 gchar *filename = g_build_filename (gimp_locale_directory (),
131 locale,
132 "LC_MESSAGES",
133 GETTEXT_PACKAGE ".mo",
134 NULL);
135 if (g_file_test (filename, G_FILE_TEST_EXISTS))
136 {
137 gchar *delimiter = NULL;
138 gchar *base_code = NULL;
139
140 delimiter = strchr (locale, '_');
141
142 if (delimiter)
143 base_code = g_strndup (locale, delimiter - locale);
144 else
145 base_code = g_strdup (locale);
146
147 delimiter = strchr (base_code, '@');
148
149 if (delimiter)
150 {
151 gchar *temp = base_code;
152 base_code = g_strndup (base_code, delimiter - base_code);
153 g_free (temp);
154 }
155
156 /* Save the full language code. */
157 g_hash_table_insert (l10n_lang_list, g_strdup (locale), NULL);
158 /* Save the base language code. */
159 g_hash_table_insert (base_lang_list, base_code, NULL);
160 }
161
162 g_free (filename);
163 }
164
165 g_dir_close (locales_dir);
166 }
167
168 /* Parse ISO-639 file to get full list of language and their names. */
169 parse_iso_codes (base_lang_list, &error);
170
171 /* Generate the localized language names. */
172 g_hash_table_iter_init (&lang_iter, l10n_lang_list);
173 while (g_hash_table_iter_next (&lang_iter, &key, NULL))
174 {
175 gchar *code = (gchar*) key;
176 gchar *localized_name = NULL;
177 gchar *english_name = NULL;
178 gchar *delimiter = NULL;
179 gchar *base_code = NULL;
180
181 delimiter = strchr (code, '_');
182
183 if (delimiter)
184 base_code = g_strndup (code, delimiter - code);
185 else
186 base_code = g_strdup (code);
187
188 delimiter = strchr (base_code, '@');
189
190 if (delimiter)
191 {
192 gchar *temp = base_code;
193 base_code = g_strndup (base_code, delimiter - base_code);
194 g_free (temp);
195 }
196
197 english_name = (gchar*) (g_hash_table_lookup (base_lang_list, base_code));
198
199 if (english_name)
200 {
201 gchar *semicolon;
202
203 /* If possible, we want to localize a language in itself.
204 * If it fails, gettext fallbacks to C (en_US) itself.
205 */
206 g_setenv ("LANGUAGE", code, TRUE);
207 setlocale (LC_ALL, "");
208
209 localized_name = g_strdup (dgettext ("iso_639", english_name));
210
211 /* If original and localized names are the same for other than English,
212 * maybe localization failed. Try now in the main dialect. */
213 if (g_strcmp0 (english_name, localized_name) == 0 &&
214 g_strcmp0 (base_code, "en") != 0 &&
215 g_strcmp0 (code, base_code) != 0)
216 {
217 g_free (localized_name);
218
219 g_setenv ("LANGUAGE", base_code, TRUE);
220 setlocale (LC_ALL, "");
221
222 localized_name = g_strdup (dgettext ("iso_639", english_name));
223 }
224
225 /* there might be several language names; use the first one */
226 semicolon = strchr (localized_name, ';');
227
228 if (semicolon)
229 {
230 gchar *temp = localized_name;
231 localized_name = g_strndup (localized_name, semicolon - localized_name);
232 g_free (temp);
233 }
234 }
235
236 g_hash_table_replace (l10n_lang_list, g_strdup(code),
237 g_strdup_printf ("%s [%s]",
238 localized_name ?
239 localized_name : "???",
240 code));
241 g_free (localized_name);
242 g_free (base_code);
243 }
244
245 /* Add special entries for system locale.
246 * We want the system locale to be localized in itself. */
247 g_setenv ("LANGUAGE", setlocale (LC_ALL, NULL), TRUE);
248 setlocale (LC_ALL, "");
249
250 /* g_str_hash() does not accept NULL. I give an empty code instead.
251 * Other solution would to create a custom hash. */
252 g_hash_table_insert (l10n_lang_list, g_strdup(""),
253 g_strdup (_("System Language")));
254
255 /* Go back to original localization. */
256 if (current_env)
257 {
258 g_setenv ("LANGUAGE", current_env, TRUE);
259 g_free (current_env);
260 }
261 else
262 g_unsetenv ("LANGUAGE");
263 setlocale (LC_ALL, "");
264
265 /* Add special entry for C (en_US). */
266 g_hash_table_insert (l10n_lang_list, g_strdup ("en_US"),
267 g_strdup ("English [en_US]"));
268
269 g_hash_table_destroy (base_lang_list);
270 }
271
272 void
gimp_language_store_parser_clean(void)273 gimp_language_store_parser_clean (void)
274 {
275 g_hash_table_destroy (l10n_lang_list);
276 g_hash_table_destroy (all_lang_list);
277 }
278
279 /*
280 * Returns a Hash table of languages.
281 * Keys and values are respectively language codes and names from the
282 * ISO-639 standard code.
283 *
284 * If @localization_only is TRUE, it returns only the list of available
285 * GIMP localizations, and language names are translated in their own
286 * locale.
287 * If @localization_only is FALSE, the full list of ISO-639 languages
288 * is returned, and language names are in the user-set locale.
289 *
290 * Do not free the list or elements of the list.
291 */
292 GHashTable *
gimp_language_store_parser_get_languages(gboolean localization_only)293 gimp_language_store_parser_get_languages (gboolean localization_only)
294 {
295 if (localization_only)
296 return l10n_lang_list;
297 else
298 return all_lang_list;
299 }
300
301 /*****************************\
302 * Private Parsing Functions *
303 \*****************************/
304
305 /*
306 * Parse the ISO-639 code list if available on this system, and fill
307 * @base_lang_list with English names of all needed base codes.
308 *
309 * It will also fill the static @all_lang_list.
310 */
311 static gboolean
parse_iso_codes(GHashTable * base_lang_list,GError ** error)312 parse_iso_codes (GHashTable *base_lang_list,
313 GError **error)
314 {
315 gboolean success = TRUE;
316
317 #ifdef HAVE_ISO_CODES
318 static const GMarkupParser markup_parser =
319 {
320 iso_codes_parser_start_element,
321 iso_codes_parser_end_element,
322 NULL, /* characters */
323 NULL, /* passthrough */
324 NULL /* error */
325 };
326
327 GimpXmlParser *xml_parser;
328 GFile *file;
329 IsoCodesParser parser = { 0, };
330
331 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
332
333 iso_codes_parser_init ();
334
335 parser.base_lang_list = g_hash_table_ref (base_lang_list);
336
337 xml_parser = gimp_xml_parser_new (&markup_parser, &parser);
338
339 #if ENABLE_RELOCATABLE_RESOURCES
340 file = gimp_installation_directory_file ("share", "xml", "iso-codes",
341 "iso_639.xml", NULL);
342 #else
343 file = g_file_new_for_path (ISO_CODES_LOCATION G_DIR_SEPARATOR_S
344 "iso_639.xml");
345 #endif
346
347 success = gimp_xml_parser_parse_gfile (xml_parser, file, error);
348 if (error && *error)
349 {
350 g_warning ("%s: error parsing '%s': %s\n",
351 G_STRFUNC, g_file_get_path (file),
352 (*error)->message);
353 g_clear_error (error);
354 }
355
356 g_object_unref (file);
357
358 gimp_xml_parser_free (xml_parser);
359 g_hash_table_unref (parser.base_lang_list);
360
361 #endif /* HAVE_ISO_CODES */
362
363 return success;
364 }
365
366 #ifdef HAVE_ISO_CODES
367 static void
iso_codes_parser_init(void)368 iso_codes_parser_init (void)
369 {
370 static gboolean initialized = FALSE;
371
372 if (initialized)
373 return;
374
375 #ifdef G_OS_WIN32
376 /* on Win32, assume iso-codes is installed in the same location as GIMP */
377 bindtextdomain ("iso_639", gimp_locale_directory ());
378 #else
379 bindtextdomain ("iso_639", ISO_CODES_LOCALEDIR);
380 #endif
381
382 bind_textdomain_codeset ("iso_639", "UTF-8");
383
384 initialized = TRUE;
385 }
386
387 static void
iso_codes_parser_entry(IsoCodesParser * parser,const gchar ** names,const gchar ** values)388 iso_codes_parser_entry (IsoCodesParser *parser,
389 const gchar **names,
390 const gchar **values)
391 {
392 const gchar *lang = NULL;
393 const gchar *code = NULL;
394
395 while (*names && *values)
396 {
397 if (strcmp (*names, "name") == 0)
398 lang = *values;
399 else if (strcmp (*names, "iso_639_2B_code") == 0 && code == NULL)
400 /* 2-letter ISO 639-1 codes have priority.
401 * But some languages have no 2-letter code. Ex: Asturian (ast).
402 */
403 code = *values;
404 else if (strcmp (*names, "iso_639_2T_code") == 0 && code == NULL)
405 code = *values;
406 else if (strcmp (*names, "iso_639_1_code") == 0)
407 code = *values;
408
409 names++;
410 values++;
411 }
412
413 if (lang && *lang && code && *code)
414 {
415 gchar *semicolon;
416 gchar *localized_name = g_strdup (dgettext ("iso_639", lang));
417
418 /* If the language is in our base table, we save its standard English name. */
419 if (g_hash_table_contains (parser->base_lang_list, code))
420 g_hash_table_replace (parser->base_lang_list, g_strdup (code), g_strdup (lang));
421
422 /* there might be several language names; use the first one */
423 semicolon = strchr (localized_name, ';');
424
425 if (semicolon)
426 {
427 gchar *temp = localized_name;
428 localized_name = g_strndup (localized_name, semicolon - localized_name);
429 g_free (temp);
430 }
431 /* In any case, we save the name in user-set language for all lang. */
432 g_hash_table_insert (all_lang_list, g_strdup (code), localized_name);
433 }
434 }
435
436 static void
iso_codes_parser_start_element(GMarkupParseContext * context,const gchar * element_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)437 iso_codes_parser_start_element (GMarkupParseContext *context,
438 const gchar *element_name,
439 const gchar **attribute_names,
440 const gchar **attribute_values,
441 gpointer user_data,
442 GError **error)
443 {
444 IsoCodesParser *parser = user_data;
445
446 switch (parser->state)
447 {
448 case ISO_CODES_START:
449 if (strcmp (element_name, "iso_639_entries") == 0)
450 {
451 parser->state = ISO_CODES_IN_ENTRIES;
452 break;
453 }
454
455 case ISO_CODES_IN_ENTRIES:
456 if (strcmp (element_name, "iso_639_entry") == 0)
457 {
458 parser->state = ISO_CODES_IN_ENTRY;
459 iso_codes_parser_entry (parser, attribute_names, attribute_values);
460 break;
461 }
462
463 case ISO_CODES_IN_ENTRY:
464 case ISO_CODES_IN_UNKNOWN:
465 iso_codes_parser_start_unknown (parser);
466 break;
467 }
468 }
469
470 static void
iso_codes_parser_end_element(GMarkupParseContext * context,const gchar * element_name,gpointer user_data,GError ** error)471 iso_codes_parser_end_element (GMarkupParseContext *context,
472 const gchar *element_name,
473 gpointer user_data,
474 GError **error)
475 {
476 IsoCodesParser *parser = user_data;
477
478 switch (parser->state)
479 {
480 case ISO_CODES_START:
481 g_warning ("%s: shouldn't get here", G_STRLOC);
482 break;
483
484 case ISO_CODES_IN_ENTRIES:
485 parser->state = ISO_CODES_START;
486 break;
487
488 case ISO_CODES_IN_ENTRY:
489 parser->state = ISO_CODES_IN_ENTRIES;
490 break;
491
492 case ISO_CODES_IN_UNKNOWN:
493 iso_codes_parser_end_unknown (parser);
494 break;
495 }
496 }
497
498 static void
iso_codes_parser_start_unknown(IsoCodesParser * parser)499 iso_codes_parser_start_unknown (IsoCodesParser *parser)
500 {
501 if (parser->unknown_depth == 0)
502 parser->last_known_state = parser->state;
503
504 parser->state = ISO_CODES_IN_UNKNOWN;
505 parser->unknown_depth++;
506 }
507
508 static void
iso_codes_parser_end_unknown(IsoCodesParser * parser)509 iso_codes_parser_end_unknown (IsoCodesParser *parser)
510 {
511 gimp_assert (parser->unknown_depth > 0 &&
512 parser->state == ISO_CODES_IN_UNKNOWN);
513
514 parser->unknown_depth--;
515
516 if (parser->unknown_depth == 0)
517 parser->state = parser->last_known_state;
518 }
519 #endif /* HAVE_ISO_CODES */
520