1 /*
2  * pluma-encodings.c
3  * This file is part of pluma
4  *
5  * Copyright (C) 2002-2005 Paolo Maggi
6  * Copyright (C) 2012-2021 MATE Developers
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  */
23 
24 /*
25  * Modified by the pluma Team, 2002-2005. See the AUTHORS file for a
26  * list of people on the pluma Team.
27  * See the ChangeLog files for a list of changes.
28  *
29  * $Id$
30  */
31 
32 #ifdef HAVE_CONFIG_H
33 #include <config.h>
34 #endif
35 
36 #include <string.h>
37 
38 #include <glib/gi18n.h>
39 
40 #include "pluma-encodings.h"
41 
42 
43 struct _PlumaEncoding
44 {
45 	gint   index;
46 	const gchar *charset;
47 	const gchar *name;
48 };
49 
50 /*
51  * The original versions of the following tables are taken from profterm
52  *
53  * Copyright (C) 2002 Red Hat, Inc.
54  */
55 
56 typedef enum
57 {
58 
59   PLUMA_ENCODING_ISO_8859_1,
60   PLUMA_ENCODING_ISO_8859_2,
61   PLUMA_ENCODING_ISO_8859_3,
62   PLUMA_ENCODING_ISO_8859_4,
63   PLUMA_ENCODING_ISO_8859_5,
64   PLUMA_ENCODING_ISO_8859_6,
65   PLUMA_ENCODING_ISO_8859_7,
66   PLUMA_ENCODING_ISO_8859_8,
67   PLUMA_ENCODING_ISO_8859_9,
68   PLUMA_ENCODING_ISO_8859_10,
69   PLUMA_ENCODING_ISO_8859_13,
70   PLUMA_ENCODING_ISO_8859_14,
71   PLUMA_ENCODING_ISO_8859_15,
72   PLUMA_ENCODING_ISO_8859_16,
73 
74   PLUMA_ENCODING_UTF_7,
75   PLUMA_ENCODING_UTF_16,
76   PLUMA_ENCODING_UTF_16_BE,
77   PLUMA_ENCODING_UTF_16_LE,
78   PLUMA_ENCODING_UTF_32,
79   PLUMA_ENCODING_UCS_2,
80   PLUMA_ENCODING_UCS_4,
81 
82   PLUMA_ENCODING_ARMSCII_8,
83   PLUMA_ENCODING_BIG5,
84   PLUMA_ENCODING_BIG5_HKSCS,
85   PLUMA_ENCODING_CP_866,
86 
87   PLUMA_ENCODING_EUC_JP,
88   PLUMA_ENCODING_EUC_JP_MS,
89   PLUMA_ENCODING_CP932,
90   PLUMA_ENCODING_EUC_KR,
91   PLUMA_ENCODING_EUC_TW,
92 
93   PLUMA_ENCODING_GB18030,
94   PLUMA_ENCODING_GB2312,
95   PLUMA_ENCODING_GBK,
96   PLUMA_ENCODING_GEOSTD8,
97 
98   PLUMA_ENCODING_IBM_850,
99   PLUMA_ENCODING_IBM_852,
100   PLUMA_ENCODING_IBM_855,
101   PLUMA_ENCODING_IBM_857,
102   PLUMA_ENCODING_IBM_862,
103   PLUMA_ENCODING_IBM_864,
104 
105   PLUMA_ENCODING_ISO_2022_JP,
106   PLUMA_ENCODING_ISO_2022_KR,
107   PLUMA_ENCODING_ISO_IR_111,
108   PLUMA_ENCODING_JOHAB,
109   PLUMA_ENCODING_KOI8_R,
110   PLUMA_ENCODING_KOI8__R,
111   PLUMA_ENCODING_KOI8_U,
112 
113   PLUMA_ENCODING_SHIFT_JIS,
114   PLUMA_ENCODING_TCVN,
115   PLUMA_ENCODING_TIS_620,
116   PLUMA_ENCODING_UHC,
117   PLUMA_ENCODING_VISCII,
118 
119   PLUMA_ENCODING_WINDOWS_1250,
120   PLUMA_ENCODING_WINDOWS_1251,
121   PLUMA_ENCODING_WINDOWS_1252,
122   PLUMA_ENCODING_WINDOWS_1253,
123   PLUMA_ENCODING_WINDOWS_1254,
124   PLUMA_ENCODING_WINDOWS_1255,
125   PLUMA_ENCODING_WINDOWS_1256,
126   PLUMA_ENCODING_WINDOWS_1257,
127   PLUMA_ENCODING_WINDOWS_1258,
128 
129   PLUMA_ENCODING_LAST,
130 
131   PLUMA_ENCODING_UTF_8,
132   PLUMA_ENCODING_UNKNOWN
133 
134 } PlumaEncodingIndex;
135 
136 static const PlumaEncoding utf8_encoding =  {
137 	PLUMA_ENCODING_UTF_8,
138 	"UTF-8",
139 	N_("Unicode")
140 };
141 
142 /* initialized in pluma_encoding_lazy_init() */
143 static PlumaEncoding unknown_encoding = {
144 	PLUMA_ENCODING_UNKNOWN,
145 	NULL,
146 	NULL
147 };
148 
149 static const PlumaEncoding encodings [] = {
150 
151   { PLUMA_ENCODING_ISO_8859_1,
152     "ISO-8859-1", N_("Western") },
153   { PLUMA_ENCODING_ISO_8859_2,
154    "ISO-8859-2", N_("Central European") },
155   { PLUMA_ENCODING_ISO_8859_3,
156     "ISO-8859-3", N_("South European") },
157   { PLUMA_ENCODING_ISO_8859_4,
158     "ISO-8859-4", N_("Baltic") },
159   { PLUMA_ENCODING_ISO_8859_5,
160     "ISO-8859-5", N_("Cyrillic") },
161   { PLUMA_ENCODING_ISO_8859_6,
162     "ISO-8859-6", N_("Arabic") },
163   { PLUMA_ENCODING_ISO_8859_7,
164     "ISO-8859-7", N_("Greek") },
165   { PLUMA_ENCODING_ISO_8859_8,
166     "ISO-8859-8", N_("Hebrew Visual") },
167   { PLUMA_ENCODING_ISO_8859_9,
168     "ISO-8859-9", N_("Turkish") },
169   { PLUMA_ENCODING_ISO_8859_10,
170     "ISO-8859-10", N_("Nordic") },
171   { PLUMA_ENCODING_ISO_8859_13,
172     "ISO-8859-13", N_("Baltic") },
173   { PLUMA_ENCODING_ISO_8859_14,
174     "ISO-8859-14", N_("Celtic") },
175   { PLUMA_ENCODING_ISO_8859_15,
176     "ISO-8859-15", N_("Western") },
177   { PLUMA_ENCODING_ISO_8859_16,
178     "ISO-8859-16", N_("Romanian") },
179 
180   { PLUMA_ENCODING_UTF_7,
181     "UTF-7", N_("Unicode") },
182   { PLUMA_ENCODING_UTF_16,
183     "UTF-16", N_("Unicode") },
184   { PLUMA_ENCODING_UTF_16_BE,
185     "UTF-16BE", N_("Unicode") },
186   { PLUMA_ENCODING_UTF_16_LE,
187     "UTF-16LE", N_("Unicode") },
188   { PLUMA_ENCODING_UTF_32,
189     "UTF-32", N_("Unicode") },
190   { PLUMA_ENCODING_UCS_2,
191     "UCS-2", N_("Unicode") },
192   { PLUMA_ENCODING_UCS_4,
193     "UCS-4", N_("Unicode") },
194 
195   { PLUMA_ENCODING_ARMSCII_8,
196     "ARMSCII-8", N_("Armenian") },
197   { PLUMA_ENCODING_BIG5,
198     "BIG5", N_("Chinese Traditional") },
199   { PLUMA_ENCODING_BIG5_HKSCS,
200     "BIG5-HKSCS", N_("Chinese Traditional") },
201   { PLUMA_ENCODING_CP_866,
202     "CP866", N_("Cyrillic/Russian") },
203 
204   { PLUMA_ENCODING_EUC_JP,
205     "EUC-JP", N_("Japanese") },
206   { PLUMA_ENCODING_EUC_JP_MS,
207     "EUC-JP-MS", N_("Japanese") },
208   { PLUMA_ENCODING_CP932,
209     "CP932", N_("Japanese") },
210 
211   { PLUMA_ENCODING_EUC_KR,
212     "EUC-KR", N_("Korean") },
213   { PLUMA_ENCODING_EUC_TW,
214     "EUC-TW", N_("Chinese Traditional") },
215 
216   { PLUMA_ENCODING_GB18030,
217     "GB18030", N_("Chinese Simplified") },
218   { PLUMA_ENCODING_GB2312,
219     "GB2312", N_("Chinese Simplified") },
220   { PLUMA_ENCODING_GBK,
221     "GBK", N_("Chinese Simplified") },
222   { PLUMA_ENCODING_GEOSTD8,
223     "GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */
224 
225   { PLUMA_ENCODING_IBM_850,
226     "IBM850", N_("Western") },
227   { PLUMA_ENCODING_IBM_852,
228     "IBM852", N_("Central European") },
229   { PLUMA_ENCODING_IBM_855,
230     "IBM855", N_("Cyrillic") },
231   { PLUMA_ENCODING_IBM_857,
232     "IBM857", N_("Turkish") },
233   { PLUMA_ENCODING_IBM_862,
234     "IBM862", N_("Hebrew") },
235   { PLUMA_ENCODING_IBM_864,
236     "IBM864", N_("Arabic") },
237 
238   { PLUMA_ENCODING_ISO_2022_JP,
239     "ISO-2022-JP", N_("Japanese") },
240   { PLUMA_ENCODING_ISO_2022_KR,
241     "ISO-2022-KR", N_("Korean") },
242   { PLUMA_ENCODING_ISO_IR_111,
243     "ISO-IR-111", N_("Cyrillic") },
244   { PLUMA_ENCODING_JOHAB,
245     "JOHAB", N_("Korean") },
246   { PLUMA_ENCODING_KOI8_R,
247     "KOI8R", N_("Cyrillic") },
248   { PLUMA_ENCODING_KOI8__R,
249     "KOI8-R", N_("Cyrillic") },
250   { PLUMA_ENCODING_KOI8_U,
251     "KOI8U", N_("Cyrillic/Ukrainian") },
252 
253   { PLUMA_ENCODING_SHIFT_JIS,
254     "SHIFT_JIS", N_("Japanese") },
255   { PLUMA_ENCODING_TCVN,
256     "TCVN", N_("Vietnamese") },
257   { PLUMA_ENCODING_TIS_620,
258     "TIS-620", N_("Thai") },
259   { PLUMA_ENCODING_UHC,
260     "UHC", N_("Korean") },
261   { PLUMA_ENCODING_VISCII,
262     "VISCII", N_("Vietnamese") },
263 
264   { PLUMA_ENCODING_WINDOWS_1250,
265     "WINDOWS-1250", N_("Central European") },
266   { PLUMA_ENCODING_WINDOWS_1251,
267     "WINDOWS-1251", N_("Cyrillic") },
268   { PLUMA_ENCODING_WINDOWS_1252,
269     "WINDOWS-1252", N_("Western") },
270   { PLUMA_ENCODING_WINDOWS_1253,
271     "WINDOWS-1253", N_("Greek") },
272   { PLUMA_ENCODING_WINDOWS_1254,
273     "WINDOWS-1254", N_("Turkish") },
274   { PLUMA_ENCODING_WINDOWS_1255,
275     "WINDOWS-1255", N_("Hebrew") },
276   { PLUMA_ENCODING_WINDOWS_1256,
277     "WINDOWS-1256", N_("Arabic") },
278   { PLUMA_ENCODING_WINDOWS_1257,
279     "WINDOWS-1257", N_("Baltic") },
280   { PLUMA_ENCODING_WINDOWS_1258,
281     "WINDOWS-1258", N_("Vietnamese") }
282 };
283 
284 static void
pluma_encoding_lazy_init(void)285 pluma_encoding_lazy_init (void)
286 {
287 	static gboolean initialized = FALSE;
288 	const gchar *locale_charset;
289 
290 	if (initialized)
291 		return;
292 
293 	if (g_get_charset (&locale_charset) == FALSE)
294 	{
295 		unknown_encoding.charset = g_strdup (locale_charset);
296 	}
297 
298 	initialized = TRUE;
299 }
300 
301 const PlumaEncoding *
pluma_encoding_get_from_charset(const gchar * charset)302 pluma_encoding_get_from_charset (const gchar *charset)
303 {
304 	gint i;
305 
306 	g_return_val_if_fail (charset != NULL, NULL);
307 
308 	pluma_encoding_lazy_init ();
309 
310 	if (charset == NULL)
311 		return NULL;
312 
313 	if (g_ascii_strcasecmp (charset, "UTF-8") == 0)
314 		return pluma_encoding_get_utf8 ();
315 
316 	i = 0;
317 	while (i < PLUMA_ENCODING_LAST)
318 	{
319 		if (g_ascii_strcasecmp (charset, encodings[i].charset) == 0)
320 			return &encodings[i];
321 
322 		++i;
323 	}
324 
325 	if (unknown_encoding.charset != NULL)
326 	{
327 		if (g_ascii_strcasecmp (charset, unknown_encoding.charset) == 0)
328 			return &unknown_encoding;
329 	}
330 
331 	return NULL;
332 }
333 
334 const PlumaEncoding *
pluma_encoding_get_from_index(gint idx)335 pluma_encoding_get_from_index (gint idx)
336 {
337 	g_return_val_if_fail (idx >= 0, NULL);
338 
339 	if (idx >= PLUMA_ENCODING_LAST)
340 		return NULL;
341 
342 	pluma_encoding_lazy_init ();
343 
344 	return &encodings[idx];
345 }
346 
347 const PlumaEncoding *
pluma_encoding_get_utf8(void)348 pluma_encoding_get_utf8 (void)
349 {
350 	pluma_encoding_lazy_init ();
351 
352 	return &utf8_encoding;
353 }
354 
355 const PlumaEncoding *
pluma_encoding_get_current(void)356 pluma_encoding_get_current (void)
357 {
358 	static gboolean initialized = FALSE;
359 	static const PlumaEncoding *locale_encoding = NULL;
360 
361 	const gchar *locale_charset;
362 
363 	pluma_encoding_lazy_init ();
364 
365 	if (initialized != FALSE)
366 		return locale_encoding;
367 
368 	if (g_get_charset (&locale_charset) == FALSE)
369 	{
370 		g_return_val_if_fail (locale_charset != NULL, &utf8_encoding);
371 
372 		locale_encoding = pluma_encoding_get_from_charset (locale_charset);
373 	}
374 	else
375 	{
376 		locale_encoding = &utf8_encoding;
377 	}
378 
379 	if (locale_encoding == NULL)
380 	{
381 		locale_encoding = &unknown_encoding;
382 	}
383 
384 	g_return_val_if_fail (locale_encoding != NULL, NULL);
385 
386 	initialized = TRUE;
387 
388 	return locale_encoding;
389 }
390 
391 gchar *
pluma_encoding_to_string(const PlumaEncoding * enc)392 pluma_encoding_to_string (const PlumaEncoding* enc)
393 {
394 	g_return_val_if_fail (enc != NULL, NULL);
395 
396 	pluma_encoding_lazy_init ();
397 
398 	g_return_val_if_fail (enc->charset != NULL, NULL);
399 
400 	if (enc->name != NULL)
401 	{
402 	    	return g_strdup_printf ("%s (%s)", _(enc->name), enc->charset);
403 	}
404 	else
405 	{
406 		if (g_ascii_strcasecmp (enc->charset, "ANSI_X3.4-1968") == 0)
407 			return g_strdup_printf ("US-ASCII (%s)", enc->charset);
408 		else
409 			return g_strdup (enc->charset);
410 	}
411 }
412 
413 const gchar *
pluma_encoding_get_charset(const PlumaEncoding * enc)414 pluma_encoding_get_charset (const PlumaEncoding* enc)
415 {
416 	g_return_val_if_fail (enc != NULL, NULL);
417 
418 	pluma_encoding_lazy_init ();
419 
420 	g_return_val_if_fail (enc->charset != NULL, NULL);
421 
422 	return enc->charset;
423 }
424 
425 const gchar *
pluma_encoding_get_name(const PlumaEncoding * enc)426 pluma_encoding_get_name (const PlumaEncoding* enc)
427 {
428 	g_return_val_if_fail (enc != NULL, NULL);
429 
430 	pluma_encoding_lazy_init ();
431 
432 	return (enc->name == NULL) ? _("Unknown") : _(enc->name);
433 }
434 
435 /* These are to make language bindings happy. Since Encodings are
436  * const, copy() just returns the same pointer and fres() doesn't
437  * do nothing */
438 
439 PlumaEncoding *
pluma_encoding_copy(const PlumaEncoding * enc)440 pluma_encoding_copy (const PlumaEncoding *enc)
441 {
442 	g_return_val_if_fail (enc != NULL, NULL);
443 
444 	return (PlumaEncoding *) enc;
445 }
446 
447 void
pluma_encoding_free(PlumaEncoding * enc)448 pluma_encoding_free (PlumaEncoding *enc)
449 {
450 	g_return_if_fail (enc != NULL);
451 }
452 
453 /**
454  * pluma_encoding_get_type:
455  *
456  * Retrieves the GType object which is associated with the
457  * #PlumaEncoding class.
458  *
459  * Return value: the GType associated with #PlumaEncoding.
460  **/
461 GType
pluma_encoding_get_type(void)462 pluma_encoding_get_type (void)
463 {
464 	static GType our_type = 0;
465 
466 	if (!our_type)
467 		our_type = g_boxed_type_register_static (
468 			"PlumaEncoding",
469 			(GBoxedCopyFunc) pluma_encoding_copy,
470 			(GBoxedFreeFunc) pluma_encoding_free);
471 
472 	return our_type;
473 }
474 
475 static gboolean
data_exists(GSList * list,const gpointer data)476 data_exists (GSList *list, const gpointer  data)
477 {
478 	while (list != NULL)
479 	{
480 		if (list->data == data)
481 			return TRUE;
482 
483 		list = g_slist_next (list);
484 	}
485 
486 	return FALSE;
487 }
488 
489 GSList *
_pluma_encoding_strv_to_list(const gchar * const * enc_str)490 _pluma_encoding_strv_to_list (const gchar * const *enc_str)
491 {
492 	GSList *res = NULL;
493 	gchar **p;
494 	const PlumaEncoding *enc;
495 
496 	for (p = (gchar **)enc_str; p != NULL && *p != NULL; p++)
497 	{
498 		const gchar *charset = *p;
499 
500 		if (strcmp (charset, "CURRENT") == 0)
501 			g_get_charset (&charset);
502 
503 		g_return_val_if_fail (charset != NULL, NULL);
504 		enc = pluma_encoding_get_from_charset (charset);
505 
506 		if (enc != NULL)
507 		{
508 			if (!data_exists (res, (gpointer)enc))
509 				res = g_slist_prepend (res, (gpointer)enc);
510 
511 		}
512 	}
513 
514 	return g_slist_reverse (res);
515 }
516 
517 gchar **
_pluma_encoding_list_to_strv(const GSList * enc_list)518 _pluma_encoding_list_to_strv (const GSList *enc_list)
519 {
520 	GSList *l;
521 	GPtrArray *array;
522 
523 	array = g_ptr_array_sized_new (g_slist_length ((GSList *)enc_list) + 1);
524 
525 	for (l = (GSList *)enc_list; l != NULL; l = g_slist_next (l))
526 	{
527 		const PlumaEncoding *enc;
528 		const gchar *charset;
529 
530 		enc = (const PlumaEncoding *)l->data;
531 
532 		charset = pluma_encoding_get_charset (enc);
533 		g_return_val_if_fail (charset != NULL, NULL);
534 
535 		g_ptr_array_add (array, g_strdup (charset));
536 	}
537 
538 	g_ptr_array_add (array, NULL);
539 
540 	return (gchar **)g_ptr_array_free (array, FALSE);
541 }
542 
543