1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * pluma-spell-checker-language.c
4  * This file is part of pluma
5  *
6  * Copyright (C) 2006 Paolo Maggi
7  * Copyright (C) 2012-2021 MATE Developers
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor,
22  * Boston, MA 02110-1301, USA.
23  */
24 
25 /*
26  * Modified by the pluma Team, 2006. See the AUTHORS file for a
27  * list of people on the pluma Team.
28  * See the ChangeLog files for a list of changes.
29  */
30 
31 /* Part of the code taked from Epiphany.
32  *
33  * Copyright (C) 2003, 2004 Christian Persch
34  */
35 
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 
40 #include <string.h>
41 
42 #include <enchant.h>
43 
44 #include <glib/gi18n.h>
45 #include <libxml/xmlreader.h>
46 
47 #include "pluma-spell-checker-language.h"
48 
49 #include <pluma/pluma-debug.h>
50 
51 #define ISO_639_DOMAIN	"iso_639"
52 #define ISO_3166_DOMAIN	"iso_3166"
53 
54 #define ISOCODESLOCALEDIR	ISO_CODES_PREFIX "/share/locale"
55 
56 struct _PlumaSpellCheckerLanguage
57 {
58 	gchar *abrev;
59 	gchar *name;
60 };
61 
62 static gboolean available_languages_initialized = FALSE;
63 static GSList *available_languages = NULL;
64 
65 static GHashTable *iso_639_table = NULL;
66 static GHashTable *iso_3166_table = NULL;
67 
68 static void
bind_iso_domains(void)69 bind_iso_domains (void)
70 {
71 	static gboolean bound = FALSE;
72 
73 	if (bound == FALSE)
74 	{
75 	        bindtextdomain (ISO_639_DOMAIN, ISOCODESLOCALEDIR);
76 	        bind_textdomain_codeset (ISO_639_DOMAIN, "UTF-8");
77 
78 	        bindtextdomain(ISO_3166_DOMAIN, ISOCODESLOCALEDIR);
79 	        bind_textdomain_codeset (ISO_3166_DOMAIN, "UTF-8");
80 
81 		bound = TRUE;
82 	}
83 }
84 
85 static void
read_iso_639_entry(xmlTextReaderPtr reader,GHashTable * table)86 read_iso_639_entry (xmlTextReaderPtr reader,
87 		    GHashTable *table)
88 {
89 	xmlChar *code, *name;
90 
91 	code = xmlTextReaderGetAttribute (reader, (const xmlChar *) "iso_639_1_code");
92 	name = xmlTextReaderGetAttribute (reader, (const xmlChar *) "name");
93 
94 	/* Get iso-639-2 code */
95 	if (code == NULL || code[0] == '\0')
96 	{
97 		xmlFree (code);
98 		/* FIXME: use the 2T or 2B code? */
99 		code = xmlTextReaderGetAttribute (reader, (const xmlChar *) "iso_639_2T_code");
100 	}
101 
102 	if (code != NULL && code[0] != '\0' && name != NULL && name[0] != '\0')
103 	{
104 		g_hash_table_insert (table, code, name);
105 	}
106 	else
107 	{
108 		xmlFree (code);
109 		xmlFree (name);
110 	}
111 }
112 
113 static void
read_iso_3166_entry(xmlTextReaderPtr reader,GHashTable * table)114 read_iso_3166_entry (xmlTextReaderPtr reader,
115 		     GHashTable *table)
116 {
117 	xmlChar *code, *name;
118 
119 	code = xmlTextReaderGetAttribute (reader, (const xmlChar *) "alpha_2_code");
120 	name = xmlTextReaderGetAttribute (reader, (const xmlChar *) "name");
121 
122 	if (code != NULL && code[0] != '\0' && name != NULL && name[0] != '\0')
123 	{
124 		char *lcode;
125 
126 		lcode = g_ascii_strdown ((char *) code, -1);
127 		xmlFree (code);
128 
129 		/* g_print ("%s -> %s\n", lcode, name); */
130 
131 		g_hash_table_insert (table, lcode, name);
132 	}
133 	else
134 	{
135 		xmlFree (code);
136 		xmlFree (name);
137 	}
138 }
139 
140 typedef enum
141 {
142 	STATE_START,
143 	STATE_STOP,
144 	STATE_ENTRIES,
145 } ParserState;
146 
147 static void
load_iso_entries(int iso,GFunc read_entry_func,gpointer user_data)148 load_iso_entries (int iso,
149 		  GFunc read_entry_func,
150 		  gpointer user_data)
151 {
152 	xmlTextReaderPtr reader;
153 	ParserState state = STATE_START;
154 	xmlChar iso_entries[32], iso_entry[32];
155 	char *filename;
156 	int ret = -1;
157 
158 	pluma_debug_message (DEBUG_PLUGINS, "Loading ISO-%d codes", iso);
159 
160 	filename = g_strdup_printf (ISO_CODES_PREFIX "/share/xml/iso-codes/iso_%d.xml", iso);
161 	reader = xmlNewTextReaderFilename (filename);
162 	if (reader == NULL) goto out;
163 
164 	xmlStrPrintf (iso_entries, sizeof (iso_entries), (const char *)"iso_%d_entries", iso);
165 	xmlStrPrintf (iso_entry, sizeof (iso_entry), (const char *)"iso_%d_entry", iso);
166 
167 	ret = xmlTextReaderRead (reader);
168 
169 	while (ret == 1)
170 	{
171 		const xmlChar *tag;
172 		xmlReaderTypes type;
173 
174 		tag = xmlTextReaderConstName (reader);
175 		type = xmlTextReaderNodeType (reader);
176 
177 		if (state == STATE_ENTRIES &&
178 		    type == XML_READER_TYPE_ELEMENT &&
179 		    xmlStrEqual (tag, iso_entry))
180 		{
181 			read_entry_func (reader, user_data);
182 		}
183 		else if (state == STATE_START &&
184 			 type == XML_READER_TYPE_ELEMENT &&
185 			 xmlStrEqual (tag, iso_entries))
186 		{
187 			state = STATE_ENTRIES;
188 		}
189 		else if (state == STATE_ENTRIES &&
190 			 type == XML_READER_TYPE_END_ELEMENT &&
191 			 xmlStrEqual (tag, iso_entries))
192 		{
193 			state = STATE_STOP;
194 		}
195 		else if (type == XML_READER_TYPE_SIGNIFICANT_WHITESPACE ||
196 			 type == XML_READER_TYPE_WHITESPACE ||
197 			 type == XML_READER_TYPE_TEXT ||
198 			 type == XML_READER_TYPE_COMMENT)
199 		{
200 			/* eat it */
201 		}
202 		else
203 		{
204 			/* ignore it */
205 		}
206 
207 		ret = xmlTextReaderRead (reader);
208 	}
209 
210 	xmlFreeTextReader (reader);
211 
212 out:
213 	if (ret < 0 || state != STATE_STOP)
214 	{
215 		g_warning ("Failed to load ISO-%d codes from %s!\n",
216 			   iso, filename);
217 	}
218 
219 	g_free (filename);
220 }
221 
222 static GHashTable *
create_iso_639_table(void)223 create_iso_639_table (void)
224 {
225 	GHashTable *table;
226 
227 	bind_iso_domains ();
228 	table = g_hash_table_new_full (g_str_hash, g_str_equal,
229 				       (GDestroyNotify) xmlFree,
230 				       (GDestroyNotify) xmlFree);
231 
232 	load_iso_entries (639, (GFunc) read_iso_639_entry, table);
233 
234 	return table;
235 }
236 
237 static GHashTable *
create_iso_3166_table(void)238 create_iso_3166_table (void)
239 {
240 	GHashTable *table;
241 
242 	bind_iso_domains ();
243 	table = g_hash_table_new_full (g_str_hash, g_str_equal,
244 				       (GDestroyNotify) g_free,
245 				       (GDestroyNotify) xmlFree);
246 
247 	load_iso_entries (3166, (GFunc) read_iso_3166_entry, table);
248 
249 	return table;
250 }
251 
252 static char *
create_name_for_language(const char * code)253 create_name_for_language (const char *code)
254 {
255 	char **str;
256 	char *name = NULL;
257 	const char *langname, *localename;
258 	int len;
259 
260 	g_return_val_if_fail (iso_639_table != NULL, NULL);
261 	g_return_val_if_fail (iso_3166_table != NULL, NULL);
262 
263 	str = g_strsplit (code, "_", -1);
264 	len = g_strv_length (str);
265 	g_return_val_if_fail (len != 0, NULL);
266 
267 	langname = (const char *) g_hash_table_lookup (iso_639_table, str[0]);
268 
269 	if (len == 1 && langname != NULL)
270 	{
271 		name = g_strdup (dgettext (ISO_639_DOMAIN, langname));
272 	}
273 	else if (len == 2 && langname != NULL)
274 	{
275 		gchar *locale_code = g_ascii_strdown (str[1], -1);
276 
277 		localename = (const char *) g_hash_table_lookup (iso_3166_table, locale_code);
278 		g_free (locale_code);
279 
280 		if (localename != NULL)
281 		{
282 			/* Translators: the first %s is the language name, and
283 			 * the second %s is the locale name. Example:
284 			 * "French (France)"
285 			 */
286 			name = g_strdup_printf (C_("language", "%s (%s)"),
287 						dgettext (ISO_639_DOMAIN, langname),
288 						dgettext (ISO_3166_DOMAIN, localename));
289 		}
290 		else
291 		{
292 			name = g_strdup_printf (C_("language", "%s (%s)"),
293 						dgettext (ISO_639_DOMAIN, langname), str[1]);
294 		}
295 	}
296 	else
297 	{
298 		/* Translators: this refers to an unknown language code
299 		 * (one which isn't in our built-in list).
300 		 */
301 		name = g_strdup_printf (C_("language", "Unknown (%s)"), code);
302 	}
303 
304 	g_strfreev (str);
305 
306 	return name;
307 }
308 
309 static void
enumerate_dicts(const char * const lang_tag,const char * const provider_name,const char * const provider_desc,const char * const provider_file,void * user_data)310 enumerate_dicts (const char * const lang_tag,
311 		 const char * const provider_name,
312 		 const char * const provider_desc,
313 		 const char * const provider_file,
314 		 void * user_data)
315 {
316 	gchar *lang_name;
317 
318 	GTree *dicts = (GTree *)user_data;
319 
320 	lang_name = create_name_for_language (lang_tag);
321 	g_return_if_fail (lang_name != NULL);
322 
323 	/* g_print ("%s - %s\n", lang_tag, lang_name); */
324 
325 	g_tree_replace (dicts, g_strdup (lang_tag), lang_name);
326 }
327 
328 static gint
key_cmp(gconstpointer a,gconstpointer b,gpointer user_data)329 key_cmp (gconstpointer a, gconstpointer b, gpointer user_data)
330 {
331 	return strcmp (a, b);
332 }
333 
334 static gint
lang_cmp(const PlumaSpellCheckerLanguage * a,const PlumaSpellCheckerLanguage * b)335 lang_cmp (const PlumaSpellCheckerLanguage *a,
336           const PlumaSpellCheckerLanguage *b)
337 {
338 	return g_utf8_collate (a->name, b->name);
339 }
340 
341 static gboolean
build_langs_list(const gchar * key,const gchar * value,gpointer data)342 build_langs_list (const gchar *key,
343 		  const gchar *value,
344 		  gpointer     data)
345 {
346 	PlumaSpellCheckerLanguage *lang = g_new (PlumaSpellCheckerLanguage, 1);
347 
348 	lang->abrev = g_strdup (key);
349 	lang->name = g_strdup (value);
350 
351 	available_languages = g_slist_insert_sorted (available_languages,
352 						     lang,
353 						     (GCompareFunc)lang_cmp);
354 
355 	return FALSE;
356 }
357 
358 const GSList *
pluma_spell_checker_get_available_languages(void)359 pluma_spell_checker_get_available_languages (void)
360 {
361 	EnchantBroker *broker;
362 	GTree *dicts;
363 
364 	if (available_languages_initialized)
365 		return available_languages;
366 
367 	g_return_val_if_fail (available_languages == NULL, NULL);
368 
369 	available_languages_initialized = TRUE;
370 
371 	broker = enchant_broker_init ();
372 	g_return_val_if_fail (broker != NULL, NULL);
373 
374 	/* Use a GTree to efficiently remove duplicates while building the list */
375 	dicts = g_tree_new_full (key_cmp,
376 				 NULL,
377 				 (GDestroyNotify)g_free,
378 				 (GDestroyNotify)g_free);
379 
380 	iso_639_table = create_iso_639_table ();
381 	iso_3166_table = create_iso_3166_table ();
382 
383 	enchant_broker_list_dicts (broker, enumerate_dicts, dicts);
384 
385 	enchant_broker_free (broker);
386 
387 	g_hash_table_destroy (iso_639_table);
388 	g_hash_table_destroy (iso_3166_table);
389 
390 	iso_639_table = NULL;
391 	iso_3166_table = NULL;
392 
393 	g_tree_foreach (dicts, (GTraverseFunc)build_langs_list, NULL);
394 
395 	g_tree_destroy (dicts);
396 
397 	return available_languages;
398 }
399 
400 const gchar *
pluma_spell_checker_language_to_string(const PlumaSpellCheckerLanguage * lang)401 pluma_spell_checker_language_to_string (const PlumaSpellCheckerLanguage *lang)
402 {
403 	if (lang == NULL)
404 		/* Translators: this refers the Default language used by the
405 		 * spell checker
406 		 */
407 		return C_("language", "Default");
408 
409 	return lang->name;
410 }
411 
412 const gchar *
pluma_spell_checker_language_to_key(const PlumaSpellCheckerLanguage * lang)413 pluma_spell_checker_language_to_key (const PlumaSpellCheckerLanguage *lang)
414 {
415 	g_return_val_if_fail (lang != NULL, NULL);
416 
417 	return lang->abrev;
418 }
419 
420 const PlumaSpellCheckerLanguage *
pluma_spell_checker_language_from_key(const gchar * key)421 pluma_spell_checker_language_from_key (const gchar *key)
422 {
423 	const GSList *langs;
424 
425 	g_return_val_if_fail (key != NULL, NULL);
426 
427 	langs = pluma_spell_checker_get_available_languages ();
428 
429 	while (langs != NULL)
430 	{
431 		const PlumaSpellCheckerLanguage *l = (const PlumaSpellCheckerLanguage *)langs->data;
432 
433 		if (g_ascii_strcasecmp (key, l->abrev) == 0)
434 			return l;
435 
436 		langs = g_slist_next (langs);
437 	}
438 
439 	return NULL;
440 }
441