1 /* GIMP - The GNU Image Manipulation Program
2  * Copyright (C) 1995 Spencer Kimball and Peter Mattis
3  *
4  * gimplanguagestore-parser.c
5  * Copyright (C) 2008, 2009  Sven Neumann <sven@gimp.org>
6  * Copyright (C) 2013  Jehan <jehan at girinstud.io>
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include "config.h"
23 
24 #include <locale.h>
25 #include <string.h>
26 
27 #include <gtk/gtk.h>
28 
29 #include "libgimpbase/gimpbase.h"
30 
31 #include "widgets-types.h"
32 
33 #include "config/gimpxmlparser.h"
34 
35 #include "gimplanguagestore.h"
36 #include "gimplanguagestore-parser.h"
37 
38 #include "gimp-intl.h"
39 
40 
41 typedef enum
42 {
43   ISO_CODES_START,
44   ISO_CODES_IN_ENTRIES,
45   ISO_CODES_IN_ENTRY,
46   ISO_CODES_IN_UNKNOWN
47 } IsoCodesParserState;
48 
49 typedef struct
50 {
51   IsoCodesParserState  state;
52   IsoCodesParserState  last_known_state;
53   gint                 unknown_depth;
54   GHashTable          *base_lang_list;
55 } IsoCodesParser;
56 
57 
58 static gboolean parse_iso_codes                 (GHashTable  *base_lang_list,
59                                                  GError     **error);
60 
61 #ifdef HAVE_ISO_CODES
62 static void     iso_codes_parser_init           (void);
63 static void     iso_codes_parser_start_element  (GMarkupParseContext  *context,
64                                                  const gchar          *element_name,
65                                                  const gchar         **attribute_names,
66                                                  const gchar         **attribute_values,
67                                                  gpointer              user_data,
68                                                  GError              **error);
69 static void     iso_codes_parser_end_element    (GMarkupParseContext  *context,
70                                                  const gchar          *element_name,
71                                                  gpointer              user_data,
72                                                  GError              **error);
73 
74 static void     iso_codes_parser_start_unknown  (IsoCodesParser       *parser);
75 static void     iso_codes_parser_end_unknown    (IsoCodesParser       *parser);
76 #endif /* HAVE_ISO_CODES */
77 
78 /*
79  * Language lists that we want to generate only once at program startup:
80  * @l10n_lang_list: all available localizations self-localized;
81  * @all_lang_list: all known languages, in the user-selected language.
82  */
83 static GHashTable *l10n_lang_list = NULL;
84 static GHashTable *all_lang_list = NULL;
85 
86 /********************\
87  * Public Functions *
88 \********************/
89 
90 /*
91  * Initialize and run the language listing parser. This call must be
92  * made only once, at program initialization, but after language_init().
93  */
94 void
gimp_language_store_parser_init(void)95 gimp_language_store_parser_init (void)
96 {
97   GHashTable     *base_lang_list;
98   gchar          *current_env;
99   GDir           *locales_dir;
100   GError         *error = NULL;
101   GHashTableIter  lang_iter;
102   gpointer        key;
103 
104   if (l10n_lang_list != NULL)
105     {
106       g_warning ("gimp_language_store_parser_init() must be run only once.");
107       return;
108     }
109 
110   current_env = g_strdup (g_getenv ("LANGUAGE"));
111 
112   l10n_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
113                                           (GDestroyNotify) g_free,
114                                           (GDestroyNotify) g_free);
115   all_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
116                                          (GDestroyNotify) g_free,
117                                          (GDestroyNotify) g_free);
118   base_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
119                                           (GDestroyNotify) g_free,
120                                           (GDestroyNotify) g_free);
121 
122   /* Check all locales we have translations for. */
123   locales_dir = g_dir_open (gimp_locale_directory (), 0, NULL);
124   if (locales_dir)
125     {
126       const gchar *locale;
127 
128       while ((locale = g_dir_read_name (locales_dir)) != NULL)
129         {
130           gchar *filename = g_build_filename (gimp_locale_directory (),
131                                               locale,
132                                               "LC_MESSAGES",
133                                               GETTEXT_PACKAGE ".mo",
134                                               NULL);
135           if (g_file_test (filename, G_FILE_TEST_EXISTS))
136             {
137               gchar *delimiter = NULL;
138               gchar *base_code = NULL;
139 
140               delimiter = strchr (locale, '_');
141 
142               if (delimiter)
143                 base_code = g_strndup (locale, delimiter - locale);
144               else
145                 base_code = g_strdup (locale);
146 
147               delimiter = strchr (base_code, '@');
148 
149               if (delimiter)
150                 {
151                   gchar *temp = base_code;
152                   base_code = g_strndup (base_code, delimiter - base_code);
153                   g_free (temp);
154                 }
155 
156               /* Save the full language code. */
157               g_hash_table_insert (l10n_lang_list, g_strdup (locale), NULL);
158               /* Save the base language code. */
159               g_hash_table_insert (base_lang_list, base_code, NULL);
160             }
161 
162           g_free (filename);
163         }
164 
165       g_dir_close (locales_dir);
166     }
167 
168   /* Parse ISO-639 file to get full list of language and their names. */
169   parse_iso_codes (base_lang_list, &error);
170 
171   /* Generate the localized language names. */
172   g_hash_table_iter_init (&lang_iter, l10n_lang_list);
173   while (g_hash_table_iter_next (&lang_iter, &key, NULL))
174     {
175       gchar *code           = (gchar*) key;
176       gchar *localized_name = NULL;
177       gchar *english_name   = NULL;
178       gchar *delimiter      = NULL;
179       gchar *base_code      = NULL;
180 
181       delimiter = strchr (code, '_');
182 
183       if (delimiter)
184         base_code = g_strndup (code, delimiter - code);
185       else
186         base_code = g_strdup (code);
187 
188       delimiter = strchr (base_code, '@');
189 
190       if (delimiter)
191         {
192           gchar *temp = base_code;
193           base_code = g_strndup (base_code, delimiter - base_code);
194           g_free (temp);
195         }
196 
197       english_name = (gchar*) (g_hash_table_lookup (base_lang_list, base_code));
198 
199       if (english_name)
200         {
201           gchar *semicolon;
202 
203           /* If possible, we want to localize a language in itself.
204            * If it fails, gettext fallbacks to C (en_US) itself.
205            */
206           g_setenv ("LANGUAGE", code, TRUE);
207           setlocale (LC_ALL, "");
208 
209           localized_name = g_strdup (dgettext ("iso_639", english_name));
210 
211           /* If original and localized names are the same for other than English,
212            * maybe localization failed. Try now in the main dialect. */
213           if (g_strcmp0 (english_name, localized_name) == 0 &&
214               g_strcmp0 (base_code, "en") != 0 &&
215               g_strcmp0 (code, base_code) != 0)
216             {
217               g_free (localized_name);
218 
219               g_setenv ("LANGUAGE", base_code, TRUE);
220               setlocale (LC_ALL, "");
221 
222               localized_name = g_strdup (dgettext ("iso_639", english_name));
223             }
224 
225           /*  there might be several language names; use the first one  */
226           semicolon = strchr (localized_name, ';');
227 
228           if (semicolon)
229             {
230               gchar *temp = localized_name;
231               localized_name = g_strndup (localized_name, semicolon - localized_name);
232               g_free (temp);
233             }
234         }
235 
236       g_hash_table_replace (l10n_lang_list, g_strdup(code),
237                             g_strdup_printf ("%s [%s]",
238                                              localized_name ?
239                                              localized_name : "???",
240                                              code));
241       g_free (localized_name);
242       g_free (base_code);
243     }
244 
245   /*  Add special entries for system locale.
246    *  We want the system locale to be localized in itself. */
247   g_setenv ("LANGUAGE", setlocale (LC_ALL, NULL), TRUE);
248   setlocale (LC_ALL, "");
249 
250   /* g_str_hash() does not accept NULL. I give an empty code instead.
251    * Other solution would to create a custom hash. */
252   g_hash_table_insert (l10n_lang_list, g_strdup(""),
253                        g_strdup (_("System Language")));
254 
255   /* Go back to original localization. */
256   if (current_env)
257     {
258       g_setenv ("LANGUAGE", current_env, TRUE);
259       g_free (current_env);
260     }
261   else
262     g_unsetenv ("LANGUAGE");
263   setlocale (LC_ALL, "");
264 
265   /* Add special entry for C (en_US). */
266   g_hash_table_insert (l10n_lang_list, g_strdup ("en_US"),
267                        g_strdup ("English [en_US]"));
268 
269   g_hash_table_destroy (base_lang_list);
270 }
271 
272 void
gimp_language_store_parser_clean(void)273 gimp_language_store_parser_clean (void)
274 {
275   g_hash_table_destroy (l10n_lang_list);
276   g_hash_table_destroy (all_lang_list);
277 }
278 
279 /*
280  * Returns a Hash table of languages.
281  * Keys and values are respectively language codes and names from the
282  * ISO-639 standard code.
283  *
284  * If @localization_only is TRUE, it returns only the list of available
285  * GIMP localizations, and language names are translated in their own
286  * locale.
287  * If @localization_only is FALSE, the full list of ISO-639 languages
288  * is returned, and language names are in the user-set locale.
289  *
290  * Do not free the list or elements of the list.
291  */
292 GHashTable *
gimp_language_store_parser_get_languages(gboolean localization_only)293 gimp_language_store_parser_get_languages (gboolean localization_only)
294 {
295   if (localization_only)
296     return l10n_lang_list;
297   else
298     return all_lang_list;
299 }
300 
301 /*****************************\
302  * Private Parsing Functions *
303 \*****************************/
304 
305 /*
306  * Parse the ISO-639 code list if available on this system, and fill
307  * @base_lang_list with English names of all needed base codes.
308  *
309  * It will also fill the static @all_lang_list.
310  */
311 static gboolean
parse_iso_codes(GHashTable * base_lang_list,GError ** error)312 parse_iso_codes (GHashTable  *base_lang_list,
313                  GError     **error)
314 {
315   gboolean success = TRUE;
316 
317 #ifdef HAVE_ISO_CODES
318   static const GMarkupParser markup_parser =
319     {
320       iso_codes_parser_start_element,
321       iso_codes_parser_end_element,
322       NULL,  /*  characters   */
323       NULL,  /*  passthrough  */
324       NULL   /*  error        */
325     };
326 
327   GimpXmlParser  *xml_parser;
328   GFile          *file;
329   IsoCodesParser  parser = { 0, };
330 
331   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
332 
333   iso_codes_parser_init ();
334 
335   parser.base_lang_list = g_hash_table_ref (base_lang_list);
336 
337   xml_parser = gimp_xml_parser_new (&markup_parser, &parser);
338 
339 #if ENABLE_RELOCATABLE_RESOURCES
340   file = gimp_installation_directory_file ("share", "xml", "iso-codes",
341                                            "iso_639.xml", NULL);
342 #else
343   file = g_file_new_for_path (ISO_CODES_LOCATION G_DIR_SEPARATOR_S
344                               "iso_639.xml");
345 #endif
346 
347   success = gimp_xml_parser_parse_gfile (xml_parser, file, error);
348   if (error && *error)
349     {
350       g_warning ("%s: error parsing '%s': %s\n",
351                  G_STRFUNC, g_file_get_path (file),
352                  (*error)->message);
353       g_clear_error (error);
354     }
355 
356   g_object_unref (file);
357 
358   gimp_xml_parser_free (xml_parser);
359   g_hash_table_unref (parser.base_lang_list);
360 
361 #endif /* HAVE_ISO_CODES */
362 
363   return success;
364 }
365 
366 #ifdef HAVE_ISO_CODES
367 static void
iso_codes_parser_init(void)368 iso_codes_parser_init (void)
369 {
370   static gboolean initialized = FALSE;
371 
372   if (initialized)
373     return;
374 
375 #ifdef G_OS_WIN32
376   /*  on Win32, assume iso-codes is installed in the same location as GIMP  */
377   bindtextdomain ("iso_639", gimp_locale_directory ());
378 #else
379   bindtextdomain ("iso_639", ISO_CODES_LOCALEDIR);
380 #endif
381 
382   bind_textdomain_codeset ("iso_639", "UTF-8");
383 
384   initialized = TRUE;
385 }
386 
387 static void
iso_codes_parser_entry(IsoCodesParser * parser,const gchar ** names,const gchar ** values)388 iso_codes_parser_entry (IsoCodesParser  *parser,
389                         const gchar    **names,
390                         const gchar    **values)
391 {
392   const gchar *lang = NULL;
393   const gchar *code = NULL;
394 
395   while (*names && *values)
396     {
397       if (strcmp (*names, "name") == 0)
398         lang = *values;
399       else if (strcmp (*names, "iso_639_2B_code") == 0 && code == NULL)
400         /* 2-letter ISO 639-1 codes have priority.
401          * But some languages have no 2-letter code. Ex: Asturian (ast).
402          */
403         code = *values;
404       else if (strcmp (*names, "iso_639_2T_code") == 0 && code == NULL)
405         code = *values;
406       else if (strcmp (*names, "iso_639_1_code") == 0)
407         code = *values;
408 
409       names++;
410       values++;
411     }
412 
413   if (lang && *lang && code && *code)
414     {
415       gchar *semicolon;
416       gchar *localized_name = g_strdup (dgettext ("iso_639", lang));
417 
418       /* If the language is in our base table, we save its standard English name. */
419       if (g_hash_table_contains (parser->base_lang_list, code))
420         g_hash_table_replace (parser->base_lang_list, g_strdup (code), g_strdup (lang));
421 
422       /*  there might be several language names; use the first one  */
423       semicolon = strchr (localized_name, ';');
424 
425       if (semicolon)
426         {
427           gchar *temp = localized_name;
428           localized_name = g_strndup (localized_name, semicolon - localized_name);
429           g_free (temp);
430         }
431       /* In any case, we save the name in user-set language for all lang. */
432       g_hash_table_insert (all_lang_list, g_strdup (code), localized_name);
433     }
434 }
435 
436 static void
iso_codes_parser_start_element(GMarkupParseContext * context,const gchar * element_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)437 iso_codes_parser_start_element (GMarkupParseContext  *context,
438                                 const gchar          *element_name,
439                                 const gchar         **attribute_names,
440                                 const gchar         **attribute_values,
441                                 gpointer              user_data,
442                                 GError              **error)
443 {
444   IsoCodesParser *parser = user_data;
445 
446   switch (parser->state)
447     {
448     case ISO_CODES_START:
449       if (strcmp (element_name, "iso_639_entries") == 0)
450         {
451           parser->state = ISO_CODES_IN_ENTRIES;
452           break;
453         }
454 
455     case ISO_CODES_IN_ENTRIES:
456       if (strcmp (element_name, "iso_639_entry") == 0)
457         {
458           parser->state = ISO_CODES_IN_ENTRY;
459           iso_codes_parser_entry (parser, attribute_names, attribute_values);
460           break;
461         }
462 
463     case ISO_CODES_IN_ENTRY:
464     case ISO_CODES_IN_UNKNOWN:
465       iso_codes_parser_start_unknown (parser);
466       break;
467     }
468 }
469 
470 static void
iso_codes_parser_end_element(GMarkupParseContext * context,const gchar * element_name,gpointer user_data,GError ** error)471 iso_codes_parser_end_element (GMarkupParseContext *context,
472                               const gchar         *element_name,
473                               gpointer             user_data,
474                               GError             **error)
475 {
476   IsoCodesParser *parser = user_data;
477 
478   switch (parser->state)
479     {
480     case ISO_CODES_START:
481       g_warning ("%s: shouldn't get here", G_STRLOC);
482       break;
483 
484     case ISO_CODES_IN_ENTRIES:
485       parser->state = ISO_CODES_START;
486       break;
487 
488     case ISO_CODES_IN_ENTRY:
489       parser->state = ISO_CODES_IN_ENTRIES;
490       break;
491 
492     case ISO_CODES_IN_UNKNOWN:
493       iso_codes_parser_end_unknown (parser);
494       break;
495     }
496 }
497 
498 static void
iso_codes_parser_start_unknown(IsoCodesParser * parser)499 iso_codes_parser_start_unknown (IsoCodesParser *parser)
500 {
501   if (parser->unknown_depth == 0)
502     parser->last_known_state = parser->state;
503 
504   parser->state = ISO_CODES_IN_UNKNOWN;
505   parser->unknown_depth++;
506 }
507 
508 static void
iso_codes_parser_end_unknown(IsoCodesParser * parser)509 iso_codes_parser_end_unknown (IsoCodesParser *parser)
510 {
511   gimp_assert (parser->unknown_depth > 0 &&
512                parser->state == ISO_CODES_IN_UNKNOWN);
513 
514   parser->unknown_depth--;
515 
516   if (parser->unknown_depth == 0)
517     parser->state = parser->last_known_state;
518 }
519 #endif /* HAVE_ISO_CODES */
520