1 /*
2 * pluma-encodings.c
3 * This file is part of pluma
4 *
5 * Copyright (C) 2002-2005 Paolo Maggi
6 * Copyright (C) 2012-2021 MATE Developers
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 */
23
24 /*
25 * Modified by the pluma Team, 2002-2005. See the AUTHORS file for a
26 * list of people on the pluma Team.
27 * See the ChangeLog files for a list of changes.
28 *
29 * $Id$
30 */
31
32 #ifdef HAVE_CONFIG_H
33 #include <config.h>
34 #endif
35
36 #include <string.h>
37
38 #include <glib/gi18n.h>
39
40 #include "pluma-encodings.h"
41
42
43 struct _PlumaEncoding
44 {
45 gint index;
46 const gchar *charset;
47 const gchar *name;
48 };
49
50 /*
51 * The original versions of the following tables are taken from profterm
52 *
53 * Copyright (C) 2002 Red Hat, Inc.
54 */
55
56 typedef enum
57 {
58
59 PLUMA_ENCODING_ISO_8859_1,
60 PLUMA_ENCODING_ISO_8859_2,
61 PLUMA_ENCODING_ISO_8859_3,
62 PLUMA_ENCODING_ISO_8859_4,
63 PLUMA_ENCODING_ISO_8859_5,
64 PLUMA_ENCODING_ISO_8859_6,
65 PLUMA_ENCODING_ISO_8859_7,
66 PLUMA_ENCODING_ISO_8859_8,
67 PLUMA_ENCODING_ISO_8859_9,
68 PLUMA_ENCODING_ISO_8859_10,
69 PLUMA_ENCODING_ISO_8859_13,
70 PLUMA_ENCODING_ISO_8859_14,
71 PLUMA_ENCODING_ISO_8859_15,
72 PLUMA_ENCODING_ISO_8859_16,
73
74 PLUMA_ENCODING_UTF_7,
75 PLUMA_ENCODING_UTF_16,
76 PLUMA_ENCODING_UTF_16_BE,
77 PLUMA_ENCODING_UTF_16_LE,
78 PLUMA_ENCODING_UTF_32,
79 PLUMA_ENCODING_UCS_2,
80 PLUMA_ENCODING_UCS_4,
81
82 PLUMA_ENCODING_ARMSCII_8,
83 PLUMA_ENCODING_BIG5,
84 PLUMA_ENCODING_BIG5_HKSCS,
85 PLUMA_ENCODING_CP_866,
86
87 PLUMA_ENCODING_EUC_JP,
88 PLUMA_ENCODING_EUC_JP_MS,
89 PLUMA_ENCODING_CP932,
90 PLUMA_ENCODING_EUC_KR,
91 PLUMA_ENCODING_EUC_TW,
92
93 PLUMA_ENCODING_GB18030,
94 PLUMA_ENCODING_GB2312,
95 PLUMA_ENCODING_GBK,
96 PLUMA_ENCODING_GEOSTD8,
97
98 PLUMA_ENCODING_IBM_850,
99 PLUMA_ENCODING_IBM_852,
100 PLUMA_ENCODING_IBM_855,
101 PLUMA_ENCODING_IBM_857,
102 PLUMA_ENCODING_IBM_862,
103 PLUMA_ENCODING_IBM_864,
104
105 PLUMA_ENCODING_ISO_2022_JP,
106 PLUMA_ENCODING_ISO_2022_KR,
107 PLUMA_ENCODING_ISO_IR_111,
108 PLUMA_ENCODING_JOHAB,
109 PLUMA_ENCODING_KOI8_R,
110 PLUMA_ENCODING_KOI8__R,
111 PLUMA_ENCODING_KOI8_U,
112
113 PLUMA_ENCODING_SHIFT_JIS,
114 PLUMA_ENCODING_TCVN,
115 PLUMA_ENCODING_TIS_620,
116 PLUMA_ENCODING_UHC,
117 PLUMA_ENCODING_VISCII,
118
119 PLUMA_ENCODING_WINDOWS_1250,
120 PLUMA_ENCODING_WINDOWS_1251,
121 PLUMA_ENCODING_WINDOWS_1252,
122 PLUMA_ENCODING_WINDOWS_1253,
123 PLUMA_ENCODING_WINDOWS_1254,
124 PLUMA_ENCODING_WINDOWS_1255,
125 PLUMA_ENCODING_WINDOWS_1256,
126 PLUMA_ENCODING_WINDOWS_1257,
127 PLUMA_ENCODING_WINDOWS_1258,
128
129 PLUMA_ENCODING_LAST,
130
131 PLUMA_ENCODING_UTF_8,
132 PLUMA_ENCODING_UNKNOWN
133
134 } PlumaEncodingIndex;
135
136 static const PlumaEncoding utf8_encoding = {
137 PLUMA_ENCODING_UTF_8,
138 "UTF-8",
139 N_("Unicode")
140 };
141
142 /* initialized in pluma_encoding_lazy_init() */
143 static PlumaEncoding unknown_encoding = {
144 PLUMA_ENCODING_UNKNOWN,
145 NULL,
146 NULL
147 };
148
149 static const PlumaEncoding encodings [] = {
150
151 { PLUMA_ENCODING_ISO_8859_1,
152 "ISO-8859-1", N_("Western") },
153 { PLUMA_ENCODING_ISO_8859_2,
154 "ISO-8859-2", N_("Central European") },
155 { PLUMA_ENCODING_ISO_8859_3,
156 "ISO-8859-3", N_("South European") },
157 { PLUMA_ENCODING_ISO_8859_4,
158 "ISO-8859-4", N_("Baltic") },
159 { PLUMA_ENCODING_ISO_8859_5,
160 "ISO-8859-5", N_("Cyrillic") },
161 { PLUMA_ENCODING_ISO_8859_6,
162 "ISO-8859-6", N_("Arabic") },
163 { PLUMA_ENCODING_ISO_8859_7,
164 "ISO-8859-7", N_("Greek") },
165 { PLUMA_ENCODING_ISO_8859_8,
166 "ISO-8859-8", N_("Hebrew Visual") },
167 { PLUMA_ENCODING_ISO_8859_9,
168 "ISO-8859-9", N_("Turkish") },
169 { PLUMA_ENCODING_ISO_8859_10,
170 "ISO-8859-10", N_("Nordic") },
171 { PLUMA_ENCODING_ISO_8859_13,
172 "ISO-8859-13", N_("Baltic") },
173 { PLUMA_ENCODING_ISO_8859_14,
174 "ISO-8859-14", N_("Celtic") },
175 { PLUMA_ENCODING_ISO_8859_15,
176 "ISO-8859-15", N_("Western") },
177 { PLUMA_ENCODING_ISO_8859_16,
178 "ISO-8859-16", N_("Romanian") },
179
180 { PLUMA_ENCODING_UTF_7,
181 "UTF-7", N_("Unicode") },
182 { PLUMA_ENCODING_UTF_16,
183 "UTF-16", N_("Unicode") },
184 { PLUMA_ENCODING_UTF_16_BE,
185 "UTF-16BE", N_("Unicode") },
186 { PLUMA_ENCODING_UTF_16_LE,
187 "UTF-16LE", N_("Unicode") },
188 { PLUMA_ENCODING_UTF_32,
189 "UTF-32", N_("Unicode") },
190 { PLUMA_ENCODING_UCS_2,
191 "UCS-2", N_("Unicode") },
192 { PLUMA_ENCODING_UCS_4,
193 "UCS-4", N_("Unicode") },
194
195 { PLUMA_ENCODING_ARMSCII_8,
196 "ARMSCII-8", N_("Armenian") },
197 { PLUMA_ENCODING_BIG5,
198 "BIG5", N_("Chinese Traditional") },
199 { PLUMA_ENCODING_BIG5_HKSCS,
200 "BIG5-HKSCS", N_("Chinese Traditional") },
201 { PLUMA_ENCODING_CP_866,
202 "CP866", N_("Cyrillic/Russian") },
203
204 { PLUMA_ENCODING_EUC_JP,
205 "EUC-JP", N_("Japanese") },
206 { PLUMA_ENCODING_EUC_JP_MS,
207 "EUC-JP-MS", N_("Japanese") },
208 { PLUMA_ENCODING_CP932,
209 "CP932", N_("Japanese") },
210
211 { PLUMA_ENCODING_EUC_KR,
212 "EUC-KR", N_("Korean") },
213 { PLUMA_ENCODING_EUC_TW,
214 "EUC-TW", N_("Chinese Traditional") },
215
216 { PLUMA_ENCODING_GB18030,
217 "GB18030", N_("Chinese Simplified") },
218 { PLUMA_ENCODING_GB2312,
219 "GB2312", N_("Chinese Simplified") },
220 { PLUMA_ENCODING_GBK,
221 "GBK", N_("Chinese Simplified") },
222 { PLUMA_ENCODING_GEOSTD8,
223 "GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */
224
225 { PLUMA_ENCODING_IBM_850,
226 "IBM850", N_("Western") },
227 { PLUMA_ENCODING_IBM_852,
228 "IBM852", N_("Central European") },
229 { PLUMA_ENCODING_IBM_855,
230 "IBM855", N_("Cyrillic") },
231 { PLUMA_ENCODING_IBM_857,
232 "IBM857", N_("Turkish") },
233 { PLUMA_ENCODING_IBM_862,
234 "IBM862", N_("Hebrew") },
235 { PLUMA_ENCODING_IBM_864,
236 "IBM864", N_("Arabic") },
237
238 { PLUMA_ENCODING_ISO_2022_JP,
239 "ISO-2022-JP", N_("Japanese") },
240 { PLUMA_ENCODING_ISO_2022_KR,
241 "ISO-2022-KR", N_("Korean") },
242 { PLUMA_ENCODING_ISO_IR_111,
243 "ISO-IR-111", N_("Cyrillic") },
244 { PLUMA_ENCODING_JOHAB,
245 "JOHAB", N_("Korean") },
246 { PLUMA_ENCODING_KOI8_R,
247 "KOI8R", N_("Cyrillic") },
248 { PLUMA_ENCODING_KOI8__R,
249 "KOI8-R", N_("Cyrillic") },
250 { PLUMA_ENCODING_KOI8_U,
251 "KOI8U", N_("Cyrillic/Ukrainian") },
252
253 { PLUMA_ENCODING_SHIFT_JIS,
254 "SHIFT_JIS", N_("Japanese") },
255 { PLUMA_ENCODING_TCVN,
256 "TCVN", N_("Vietnamese") },
257 { PLUMA_ENCODING_TIS_620,
258 "TIS-620", N_("Thai") },
259 { PLUMA_ENCODING_UHC,
260 "UHC", N_("Korean") },
261 { PLUMA_ENCODING_VISCII,
262 "VISCII", N_("Vietnamese") },
263
264 { PLUMA_ENCODING_WINDOWS_1250,
265 "WINDOWS-1250", N_("Central European") },
266 { PLUMA_ENCODING_WINDOWS_1251,
267 "WINDOWS-1251", N_("Cyrillic") },
268 { PLUMA_ENCODING_WINDOWS_1252,
269 "WINDOWS-1252", N_("Western") },
270 { PLUMA_ENCODING_WINDOWS_1253,
271 "WINDOWS-1253", N_("Greek") },
272 { PLUMA_ENCODING_WINDOWS_1254,
273 "WINDOWS-1254", N_("Turkish") },
274 { PLUMA_ENCODING_WINDOWS_1255,
275 "WINDOWS-1255", N_("Hebrew") },
276 { PLUMA_ENCODING_WINDOWS_1256,
277 "WINDOWS-1256", N_("Arabic") },
278 { PLUMA_ENCODING_WINDOWS_1257,
279 "WINDOWS-1257", N_("Baltic") },
280 { PLUMA_ENCODING_WINDOWS_1258,
281 "WINDOWS-1258", N_("Vietnamese") }
282 };
283
284 static void
pluma_encoding_lazy_init(void)285 pluma_encoding_lazy_init (void)
286 {
287 static gboolean initialized = FALSE;
288 const gchar *locale_charset;
289
290 if (initialized)
291 return;
292
293 if (g_get_charset (&locale_charset) == FALSE)
294 {
295 unknown_encoding.charset = g_strdup (locale_charset);
296 }
297
298 initialized = TRUE;
299 }
300
301 const PlumaEncoding *
pluma_encoding_get_from_charset(const gchar * charset)302 pluma_encoding_get_from_charset (const gchar *charset)
303 {
304 gint i;
305
306 g_return_val_if_fail (charset != NULL, NULL);
307
308 pluma_encoding_lazy_init ();
309
310 if (charset == NULL)
311 return NULL;
312
313 if (g_ascii_strcasecmp (charset, "UTF-8") == 0)
314 return pluma_encoding_get_utf8 ();
315
316 i = 0;
317 while (i < PLUMA_ENCODING_LAST)
318 {
319 if (g_ascii_strcasecmp (charset, encodings[i].charset) == 0)
320 return &encodings[i];
321
322 ++i;
323 }
324
325 if (unknown_encoding.charset != NULL)
326 {
327 if (g_ascii_strcasecmp (charset, unknown_encoding.charset) == 0)
328 return &unknown_encoding;
329 }
330
331 return NULL;
332 }
333
334 const PlumaEncoding *
pluma_encoding_get_from_index(gint idx)335 pluma_encoding_get_from_index (gint idx)
336 {
337 g_return_val_if_fail (idx >= 0, NULL);
338
339 if (idx >= PLUMA_ENCODING_LAST)
340 return NULL;
341
342 pluma_encoding_lazy_init ();
343
344 return &encodings[idx];
345 }
346
347 const PlumaEncoding *
pluma_encoding_get_utf8(void)348 pluma_encoding_get_utf8 (void)
349 {
350 pluma_encoding_lazy_init ();
351
352 return &utf8_encoding;
353 }
354
355 const PlumaEncoding *
pluma_encoding_get_current(void)356 pluma_encoding_get_current (void)
357 {
358 static gboolean initialized = FALSE;
359 static const PlumaEncoding *locale_encoding = NULL;
360
361 const gchar *locale_charset;
362
363 pluma_encoding_lazy_init ();
364
365 if (initialized != FALSE)
366 return locale_encoding;
367
368 if (g_get_charset (&locale_charset) == FALSE)
369 {
370 g_return_val_if_fail (locale_charset != NULL, &utf8_encoding);
371
372 locale_encoding = pluma_encoding_get_from_charset (locale_charset);
373 }
374 else
375 {
376 locale_encoding = &utf8_encoding;
377 }
378
379 if (locale_encoding == NULL)
380 {
381 locale_encoding = &unknown_encoding;
382 }
383
384 g_return_val_if_fail (locale_encoding != NULL, NULL);
385
386 initialized = TRUE;
387
388 return locale_encoding;
389 }
390
391 gchar *
pluma_encoding_to_string(const PlumaEncoding * enc)392 pluma_encoding_to_string (const PlumaEncoding* enc)
393 {
394 g_return_val_if_fail (enc != NULL, NULL);
395
396 pluma_encoding_lazy_init ();
397
398 g_return_val_if_fail (enc->charset != NULL, NULL);
399
400 if (enc->name != NULL)
401 {
402 return g_strdup_printf ("%s (%s)", _(enc->name), enc->charset);
403 }
404 else
405 {
406 if (g_ascii_strcasecmp (enc->charset, "ANSI_X3.4-1968") == 0)
407 return g_strdup_printf ("US-ASCII (%s)", enc->charset);
408 else
409 return g_strdup (enc->charset);
410 }
411 }
412
413 const gchar *
pluma_encoding_get_charset(const PlumaEncoding * enc)414 pluma_encoding_get_charset (const PlumaEncoding* enc)
415 {
416 g_return_val_if_fail (enc != NULL, NULL);
417
418 pluma_encoding_lazy_init ();
419
420 g_return_val_if_fail (enc->charset != NULL, NULL);
421
422 return enc->charset;
423 }
424
425 const gchar *
pluma_encoding_get_name(const PlumaEncoding * enc)426 pluma_encoding_get_name (const PlumaEncoding* enc)
427 {
428 g_return_val_if_fail (enc != NULL, NULL);
429
430 pluma_encoding_lazy_init ();
431
432 return (enc->name == NULL) ? _("Unknown") : _(enc->name);
433 }
434
435 /* These are to make language bindings happy. Since Encodings are
436 * const, copy() just returns the same pointer and fres() doesn't
437 * do nothing */
438
439 PlumaEncoding *
pluma_encoding_copy(const PlumaEncoding * enc)440 pluma_encoding_copy (const PlumaEncoding *enc)
441 {
442 g_return_val_if_fail (enc != NULL, NULL);
443
444 return (PlumaEncoding *) enc;
445 }
446
447 void
pluma_encoding_free(PlumaEncoding * enc)448 pluma_encoding_free (PlumaEncoding *enc)
449 {
450 g_return_if_fail (enc != NULL);
451 }
452
453 /**
454 * pluma_encoding_get_type:
455 *
456 * Retrieves the GType object which is associated with the
457 * #PlumaEncoding class.
458 *
459 * Return value: the GType associated with #PlumaEncoding.
460 **/
461 GType
pluma_encoding_get_type(void)462 pluma_encoding_get_type (void)
463 {
464 static GType our_type = 0;
465
466 if (!our_type)
467 our_type = g_boxed_type_register_static (
468 "PlumaEncoding",
469 (GBoxedCopyFunc) pluma_encoding_copy,
470 (GBoxedFreeFunc) pluma_encoding_free);
471
472 return our_type;
473 }
474
475 static gboolean
data_exists(GSList * list,const gpointer data)476 data_exists (GSList *list, const gpointer data)
477 {
478 while (list != NULL)
479 {
480 if (list->data == data)
481 return TRUE;
482
483 list = g_slist_next (list);
484 }
485
486 return FALSE;
487 }
488
489 GSList *
_pluma_encoding_strv_to_list(const gchar * const * enc_str)490 _pluma_encoding_strv_to_list (const gchar * const *enc_str)
491 {
492 GSList *res = NULL;
493 gchar **p;
494 const PlumaEncoding *enc;
495
496 for (p = (gchar **)enc_str; p != NULL && *p != NULL; p++)
497 {
498 const gchar *charset = *p;
499
500 if (strcmp (charset, "CURRENT") == 0)
501 g_get_charset (&charset);
502
503 g_return_val_if_fail (charset != NULL, NULL);
504 enc = pluma_encoding_get_from_charset (charset);
505
506 if (enc != NULL)
507 {
508 if (!data_exists (res, (gpointer)enc))
509 res = g_slist_prepend (res, (gpointer)enc);
510
511 }
512 }
513
514 return g_slist_reverse (res);
515 }
516
517 gchar **
_pluma_encoding_list_to_strv(const GSList * enc_list)518 _pluma_encoding_list_to_strv (const GSList *enc_list)
519 {
520 GSList *l;
521 GPtrArray *array;
522
523 array = g_ptr_array_sized_new (g_slist_length ((GSList *)enc_list) + 1);
524
525 for (l = (GSList *)enc_list; l != NULL; l = g_slist_next (l))
526 {
527 const PlumaEncoding *enc;
528 const gchar *charset;
529
530 enc = (const PlumaEncoding *)l->data;
531
532 charset = pluma_encoding_get_charset (enc);
533 g_return_val_if_fail (charset != NULL, NULL);
534
535 g_ptr_array_add (array, g_strdup (charset));
536 }
537
538 g_ptr_array_add (array, NULL);
539
540 return (gchar **)g_ptr_array_free (array, FALSE);
541 }
542
543