1 /*
2  * Copyright © 2004 Noah Levitt
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License as published by the
6  * Free Software Foundation; either version 3 of the License, or (at your
7  * option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17  */
18 
19 #include <config.h>
20 
21 #include <gtk/gtk.h>
22 #include <string.h>
23 
24 #include <glib/gi18n-lib.h>
25 
26 #include "gucharmap.h"
27 #include "gucharmap-private.h"
28 
29 #include "unicode-names.h"
30 #include "unicode-blocks.h"
31 #include "unicode-nameslist.h"
32 #include "unicode-categories.h"
33 #include "unicode-versions.h"
34 #include "unicode-unihan.h"
35 
36 /* constants for hangul (de)composition, see UAX #15 */
37 #define SBase 0xAC00
38 #define LCount 19
39 #define VCount 21
40 #define TCount 28
41 #define NCount (VCount * TCount)
42 #define SCount (LCount * NCount)
43 
44 static const gchar JAMO_L_TABLE[][4] = {
45   "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
46   "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
47 };
48 
49 static const gchar JAMO_V_TABLE[][4] = {
50   "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
51   "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
52   "YU", "EU", "YI", "I"
53 };
54 
55 static const gchar JAMO_T_TABLE[][4] = {
56   "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
57   "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
58   "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
59 };
60 
61 const gchar *
gucharmap_get_unicode_name(gunichar wc)62 gucharmap_get_unicode_name (gunichar wc)
63 {
64   static gchar buf[64];
65 
66   _gucharmap_intl_ensure_initialized ();
67 
68   if ((wc >= 0x3400 && wc <= 0x4dbf)       /* CJK Unified Ideographs Extension A */
69       || (wc >= 0x4e00 && wc <= 0x9fff)    /* CJK Unified Ideographs             */
70       || (wc >= 0x20000 && wc <= 0x2a6df)  /* CJK Unified Ideographs Extension B */
71       || (wc >= 0x2a700 && wc <= 0x2b738)  /* CJK Unified Ideographs Extension C */
72       || (wc >= 0x2b740 && wc <= 0x2b81d)  /* CJK Unified Ideographs Extension D */
73       || (wc >= 0x2b820 && wc <= 0x2cea1)  /* CJK Unified Ideographs Extension E */
74       || (wc >= 0x2ceb0 && wc <= 0x2ebe0)  /* CJK Unified Ideographs Extension F */
75       || (wc >= 0x30000 && wc <= 0x3134a)) /* CJK Unified Ideographs Extension G */
76     {
77       g_snprintf (buf, sizeof (buf), "CJK UNIFIED IDEOGRAPH-%04X", wc);
78       return buf;
79     }
80   else if ((wc >= 0xf900 && wc <= 0xfaff) || /* CJK Compatibility Ideographs            */
81            (wc >= 0x2f800 && wc <= 0x2fa1d)) /* CJK Compatibility Ideographs Supplement */
82     {
83       g_snprintf (buf, sizeof (buf), "CJK COMPATIBILITY IDEOGRAPH-%04X", wc);
84       return buf;
85   }
86   else if ((wc >= 0x17000 && wc <= 0x187f7) || /* Tangut            */
87            (wc >= 0x18d00 && wc <= 0x18d08))   /* Tangut Supplement */
88     {
89       g_snprintf (buf, sizeof (buf), "TANGUT IDEOGRAPH-%05X", wc);
90       return buf;
91   }
92   else if (wc >= 0x18800 && wc <= 0x18aff) {
93       g_snprintf (buf, sizeof (buf), "TANGUT COMPONENT-%03u", wc - 0x18800 + 1);
94       return buf;
95   }
96   else if (wc >= 0x18b00 && wc <= 0x18cd5) {
97       g_snprintf (buf, sizeof (buf), "KHITAN SMALL SCRIPT CHARACTER-%05X", wc);
98       return buf;
99   }
100   else if (wc >= 0xac00 && wc <= 0xd7af)
101     {
102       /* compute hangul syllable name as per UAX #15 */
103       gint SIndex = wc - SBase;
104       gint LIndex, VIndex, TIndex;
105 
106       if (SIndex < 0 || SIndex >= SCount)
107         return "";
108 
109       LIndex = SIndex / NCount;
110       VIndex = (SIndex % NCount) / TCount;
111       TIndex = SIndex % TCount;
112 
113       g_snprintf (buf, sizeof (buf), "HANGUL SYLLABLE %s%s%s",
114                   JAMO_L_TABLE[LIndex], JAMO_V_TABLE[VIndex], JAMO_T_TABLE[TIndex]);
115 
116       return buf;
117     }
118   else if (wc >= 0xD800 && wc <= 0xDB7F)
119     return _("<Non Private Use High Surrogate>");
120   else if (wc >= 0xDB80 && wc <= 0xDBFF)
121     return _("<Private Use High Surrogate>");
122   else if (wc >= 0xDC00 && wc <= 0xDFFF)
123     return _("<Low Surrogate>");
124   else if (wc >= 0xE000 && wc <= 0xF8FF)
125     return _("<Private Use>");
126   else if (wc >= 0xF0000 && wc <= 0xFFFFD)
127     return _("<Plane 15 Private Use>");
128   else if (wc >= 0x100000 && wc <= 0x10FFFD)
129     return _("<Plane 16 Private Use>");
130   else
131     {
132       const gchar *x = gucharmap_get_unicode_data_name (wc);
133       if (x == NULL)
134         return _("<not assigned>");
135       else
136         return x;
137     }
138 }
139 
140 const gchar *
gucharmap_get_unicode_category_name(gunichar wc)141 gucharmap_get_unicode_category_name (gunichar wc)
142 {
143   _gucharmap_intl_ensure_initialized ();
144 
145   switch (gucharmap_unichar_type (wc))
146     {
147       case G_UNICODE_CONTROL: return _("Other, Control");
148       case G_UNICODE_FORMAT: return _("Other, Format");
149       case G_UNICODE_UNASSIGNED: return _("Other, Not Assigned");
150       case G_UNICODE_PRIVATE_USE: return _("Other, Private Use");
151       case G_UNICODE_SURROGATE: return _("Other, Surrogate");
152       case G_UNICODE_LOWERCASE_LETTER: return _("Letter, Lowercase");
153       case G_UNICODE_MODIFIER_LETTER: return _("Letter, Modifier");
154       case G_UNICODE_OTHER_LETTER: return _("Letter, Other");
155       case G_UNICODE_TITLECASE_LETTER: return _("Letter, Titlecase");
156       case G_UNICODE_UPPERCASE_LETTER: return _("Letter, Uppercase");
157       case G_UNICODE_COMBINING_MARK: return _("Mark, Spacing Combining");
158       case G_UNICODE_ENCLOSING_MARK: return _("Mark, Enclosing");
159       case G_UNICODE_NON_SPACING_MARK: return _("Mark, Non-Spacing");
160       case G_UNICODE_DECIMAL_NUMBER: return _("Number, Decimal Digit");
161       case G_UNICODE_LETTER_NUMBER: return _("Number, Letter");
162       case G_UNICODE_OTHER_NUMBER: return _("Number, Other");
163       case G_UNICODE_CONNECT_PUNCTUATION: return _("Punctuation, Connector");
164       case G_UNICODE_DASH_PUNCTUATION: return _("Punctuation, Dash");
165       case G_UNICODE_CLOSE_PUNCTUATION: return _("Punctuation, Close");
166       case G_UNICODE_FINAL_PUNCTUATION: return _("Punctuation, Final Quote");
167       case G_UNICODE_INITIAL_PUNCTUATION: return _("Punctuation, Initial Quote");
168       case G_UNICODE_OTHER_PUNCTUATION: return _("Punctuation, Other");
169       case G_UNICODE_OPEN_PUNCTUATION: return _("Punctuation, Open");
170       case G_UNICODE_CURRENCY_SYMBOL: return _("Symbol, Currency");
171       case G_UNICODE_MODIFIER_SYMBOL: return _("Symbol, Modifier");
172       case G_UNICODE_MATH_SYMBOL: return _("Symbol, Math");
173       case G_UNICODE_OTHER_SYMBOL: return _("Symbol, Other");
174       case G_UNICODE_LINE_SEPARATOR: return _("Separator, Line");
175       case G_UNICODE_PARAGRAPH_SEPARATOR: return _("Separator, Paragraph");
176       case G_UNICODE_SPACE_SEPARATOR: return _("Separator, Space");
177       default: return "";
178     }
179 }
180 
181 /* does a binary search on unicode_names */
182 const gchar *
gucharmap_get_unicode_data_name(gunichar uc)183 gucharmap_get_unicode_data_name (gunichar uc)
184 {
185   gint min = 0;
186   gint mid;
187   gint max = G_N_ELEMENTS(unicode_names) - 1;
188 
189   if (uc < unicode_names[0].index || uc > unicode_names[max].index)
190     return "";
191 
192   while (max >= min)
193     {
194       mid = (min + max) / 2;
195       if (uc > unicode_names[mid].index)
196         min = mid + 1;
197       else if (uc < unicode_names[mid].index)
198         max = mid - 1;
199       else
200         return unicode_name_get_name(&unicode_names[mid]);
201     }
202 
203   return NULL;
204 }
205 
206 gint
gucharmap_get_unicode_data_name_count(void)207 gucharmap_get_unicode_data_name_count (void)
208 {
209   return G_N_ELEMENTS (unicode_names);
210 }
211 
212 /* does a binary search on unicode_versions */
213 GucharmapUnicodeVersion
gucharmap_get_unicode_version(gunichar uc)214 gucharmap_get_unicode_version (gunichar uc)
215 {
216   gint min = 0;
217   gint mid;
218   gint max = G_N_ELEMENTS (unicode_versions) - 1;
219 
220   if (uc < unicode_versions[0].start || uc > unicode_versions[max].end)
221     return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
222 
223   while (max >= min)
224     {
225       mid = (min + max) / 2;
226 
227       if (uc > unicode_versions[mid].end)
228         min = mid + 1;
229       else if (uc < unicode_versions[mid].start)
230         max = mid - 1;
231       else if ((uc >= unicode_versions[mid].start) && (uc <= unicode_versions[mid].end))
232         return unicode_versions[mid].version;
233     }
234 
235   return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
236 }
237 
238 const gchar *
gucharmap_unicode_version_to_string(GucharmapUnicodeVersion version)239 gucharmap_unicode_version_to_string (GucharmapUnicodeVersion version)
240 {
241   g_return_val_if_fail (version >= GUCHARMAP_UNICODE_VERSION_UNASSIGNED &&
242                         version <= GUCHARMAP_UNICODE_VERSION_LATEST, NULL);
243 
244   if (G_UNLIKELY (version == GUCHARMAP_UNICODE_VERSION_UNASSIGNED))
245     return NULL;
246 
247   return unicode_version_strings + unicode_version_string_offsets[version - 1];
248 }
249 
250 gint
gucharmap_get_unihan_count(void)251 gucharmap_get_unihan_count (void)
252 {
253   return G_N_ELEMENTS (unihan);
254 }
255 
256 /* does a binary search; also caches most recent, since it will often be
257  * called in succession on the same character */
258 static const Unihan *
_get_unihan(gunichar uc)259 _get_unihan (gunichar uc)
260 {
261   static gunichar most_recent_searched;
262   static const Unihan *most_recent_result;
263   gint min = 0;
264   gint mid;
265   gint max = G_N_ELEMENTS(unihan) - 1;
266 
267 
268   if (uc < unihan[0].index || uc > unihan[max].index)
269     return NULL;
270 
271   if (uc == most_recent_searched)
272     return most_recent_result;
273 
274   most_recent_searched = uc;
275 
276   while (max >= min)
277     {
278       mid = (min + max) / 2;
279       if (uc > unihan[mid].index)
280         min = mid + 1;
281       else if (uc < unihan[mid].index)
282         max = mid - 1;
283       else
284         {
285           most_recent_result = unihan + mid;
286           return unihan + mid;
287         }
288     }
289 
290   most_recent_result = NULL;
291   return NULL;
292 }
293 
294 /* does a binary search; also caches most recent, since it will often be
295  * called in succession on the same character */
296 static const NamesList *
get_nameslist(gunichar uc)297 get_nameslist (gunichar uc)
298 {
299   static gunichar most_recent_searched;
300   static const NamesList *most_recent_result;
301   gint min = 0;
302   gint mid;
303   gint max = G_N_ELEMENTS (names_list) - 1;
304 
305   if (uc < names_list[0].index || uc > names_list[max].index)
306     return NULL;
307 
308   if (uc == most_recent_searched)
309     return most_recent_result;
310 
311   most_recent_searched = uc;
312 
313   while (max >= min)
314     {
315       mid = (min + max) / 2;
316       if (uc > names_list[mid].index)
317         min = mid + 1;
318       else if (uc < names_list[mid].index)
319         max = mid - 1;
320       else
321         {
322           most_recent_result = names_list + mid;
323           return names_list + mid;
324         }
325     }
326 
327   most_recent_result = NULL;
328   return NULL;
329 }
330 
331 G_GNUC_INTERNAL gboolean
_gucharmap_unicode_has_nameslist_entry(gunichar uc)332 _gucharmap_unicode_has_nameslist_entry (gunichar uc)
333 {
334   return get_nameslist (uc) != NULL;
335 }
336 
337 /* returns newly allocated array of gunichar terminated with -1 */
338 gunichar *
gucharmap_get_nameslist_exes(gunichar uc)339 gucharmap_get_nameslist_exes (gunichar uc)
340 {
341   const NamesList *nl;
342   gunichar *exes;
343   gunichar i, count;
344 
345   nl = get_nameslist (uc);
346 
347   if (nl == NULL || nl->exes_index == -1)
348     return NULL;
349 
350   /* count the number of exes */
351   for (i = 0;  names_list_exes[nl->exes_index + i].index == uc;  i++);
352   count = i;
353 
354   exes = g_malloc ((count + 1) * sizeof (gunichar));
355   for (i = 0;  i < count;  i++)
356     exes[i] = names_list_exes[nl->exes_index + i].value;
357   exes[count] = (gunichar)(-1);
358 
359   return exes;
360 }
361 
362 /**
363  * gucharmap_get_nameslist_equals:
364  * @uc: a gunichar
365  *
366  * Returns: (transfer container): newly allocated null-terminated array of gchar*
367  * the items are const, but the array should be freed by the caller
368  */
369 const gchar **
gucharmap_get_nameslist_equals(gunichar uc)370 gucharmap_get_nameslist_equals (gunichar uc)
371 {
372   const NamesList *nl;
373   const gchar **equals;
374   gunichar i, count;
375 
376   nl = get_nameslist (uc);
377 
378   if (nl == NULL || nl->equals_index == -1)
379     return NULL;
380 
381   /* count the number of equals */
382   for (i = 0;  names_list_equals[nl->equals_index + i].index == uc;  i++);
383   count = i;
384 
385   equals = g_malloc ((count + 1) * sizeof (gchar *));
386   for (i = 0;  i < count;  i++)
387     equals[i] = names_list_equals_strings + names_list_equals[nl->equals_index + i].string_index;
388   equals[count] = NULL;
389 
390   return equals;
391 }
392 
393 /**
394  * gucharmap_get_nameslist_stars:
395  * @uc: a #gunichar
396  *
397  * Returns: (transfer container): newly allocated null-terminated array of gchar*
398  * the items are const, but the array should be freed by the caller
399  */
400 const gchar **
gucharmap_get_nameslist_stars(gunichar uc)401 gucharmap_get_nameslist_stars (gunichar uc)
402 {
403   const NamesList *nl;
404   const gchar **stars;
405   gunichar i, count;
406 
407   nl = get_nameslist (uc);
408 
409   if (nl == NULL || nl->stars_index == -1)
410     return NULL;
411 
412   /* count the number of stars */
413   for (i = 0;  names_list_stars[nl->stars_index + i].index == uc;  i++);
414   count = i;
415 
416   stars = g_malloc ((count + 1) * sizeof (gchar *));
417   for (i = 0;  i < count;  i++)
418     stars[i] = names_list_stars_strings + names_list_stars[nl->stars_index + i].string_index;
419   stars[count] = NULL;
420 
421   return stars;
422 }
423 
424 /**
425  * gucharmap_get_nameslist_pounds:
426  * @uc: a #gunichar
427  *
428  * Returns: (transfer container): newly allocated null-terminated array of gchar*
429  * the items are const, but the array should be freed by the caller
430  */
431 const gchar **
gucharmap_get_nameslist_pounds(gunichar uc)432 gucharmap_get_nameslist_pounds (gunichar uc)
433 {
434   const NamesList *nl;
435   const gchar **pounds;
436   gunichar i, count;
437 
438   nl = get_nameslist (uc);
439 
440   if (nl == NULL || nl->pounds_index == -1)
441     return NULL;
442 
443   /* count the number of pounds */
444   for (i = 0;  names_list_pounds[nl->pounds_index + i].index == uc;  i++);
445   count = i;
446 
447   pounds = g_malloc ((count + 1) * sizeof (gchar *));
448   for (i = 0;  i < count;  i++)
449     pounds[i] = names_list_pounds_strings + names_list_pounds[nl->pounds_index + i].string_index;
450   pounds[count] = NULL;
451 
452   return pounds;
453 }
454 
455 /**
456  * gucharmap_get_nameslist_colons:
457  * @uc: a #gunichar
458  *
459  * Returns: (transfer container): newly allocated null-terminated array of gchar*
460  * the items are const, but the array should be freed by the caller
461  */
462 const gchar **
gucharmap_get_nameslist_colons(gunichar uc)463 gucharmap_get_nameslist_colons (gunichar uc)
464 {
465   const NamesList *nl;
466   const gchar **colons;
467   gunichar i, count;
468 
469   nl = get_nameslist (uc);
470 
471   if (nl == NULL || nl->colons_index == -1)
472     return NULL;
473 
474   /* count the number of colons */
475   for (i = 0;  names_list_colons[nl->colons_index + i].index == uc;  i++);
476   count = i;
477 
478   colons = g_malloc ((count + 1) * sizeof (gchar *));
479   for (i = 0;  i < count;  i++)
480     colons[i] = names_list_colons_strings + names_list_colons[nl->colons_index + i].string_index;
481   colons[count] = NULL;
482 
483   return colons;
484 }
485 
486 /* Wrapper, in case we want to support a newer unicode version than glib */
487 gboolean
gucharmap_unichar_validate(gunichar ch)488 gucharmap_unichar_validate (gunichar ch)
489 {
490   return g_unichar_validate (ch);
491 }
492 
493 /**
494  * gucharmap_unichar_to_printable_utf8:
495  * @uc: a unicode character
496  * @outbuf: output buffer, must have at least 10 bytes of space.
497  *          If %NULL, the length will be computed and returned
498  *          and nothing will be written to @outbuf.
499  *
500  * Converts a single character to UTF-8 suitable for rendering. Check the
501  * source to see what this means. ;-)
502  *
503  *
504  * Return value: number of bytes written
505  **/
506 gint
gucharmap_unichar_to_printable_utf8(gunichar uc,gchar * outbuf)507 gucharmap_unichar_to_printable_utf8 (gunichar uc, gchar *outbuf)
508 {
509   /* Unicode Standard 3.2, section 2.6, "By convention, diacritical marks
510    * used by the Unicode Standard may be exhibited in (apparent) isolation
511    * by applying them to U+0020 SPACE or to U+00A0 NO BREAK SPACE." */
512 
513   /* 17:10 < owen> noah: I'm *not* claiming that what Pango does currently
514    *               is right, but convention isn't a requirement. I think
515    *               it's probably better to do the Uniscribe thing and put
516    *               the lone combining mark on a dummy character and require
517    *               ZWJ
518    * 17:11 < noah> owen: do you mean that i should put a ZWJ in there, or
519    *               that pango will do that?
520    * 17:11 < owen> noah: I mean, you should (assuming some future more
521    *               capable version of Pango) put it in there
522    */
523 
524   if (! gucharmap_unichar_validate (uc) || (! gucharmap_unichar_isgraph (uc)
525       && gucharmap_unichar_type (uc) != G_UNICODE_PRIVATE_USE))
526     return 0;
527   else if (gucharmap_unichar_type (uc) == G_UNICODE_COMBINING_MARK
528       || gucharmap_unichar_type (uc) == G_UNICODE_ENCLOSING_MARK
529       || gucharmap_unichar_type (uc) == G_UNICODE_NON_SPACING_MARK)
530     {
531       gint x;
532 
533       outbuf[0] = ' ';
534       outbuf[1] = '\xe2'; /* ZERO */
535       outbuf[2] = '\x80'; /* WIDTH */
536       outbuf[3] = '\x8d'; /* JOINER (0x200D) */
537 
538       x = g_unichar_to_utf8 (uc, outbuf + 4);
539 
540       return x + 4;
541     }
542   else
543     return g_unichar_to_utf8 (uc, outbuf);
544 }
545 
546 /**
547  * gucharmap_unichar_type:
548  * @uc: a Unicode character
549  *
550  * Classifies a Unicode character by type.
551  *
552  * Return value: the type of the character.
553  **/
554 GUnicodeType
gucharmap_unichar_type(gunichar uc)555 gucharmap_unichar_type (gunichar uc)
556 {
557   gint min = 0;
558   gint mid;
559   gint max = sizeof (unicode_categories) / sizeof (UnicodeCategory) - 1;
560 
561   if (uc < unicode_categories[0].start || uc > unicode_categories[max].end)
562     return G_UNICODE_UNASSIGNED;
563 
564   while (max >= min)
565     {
566       mid = (min + max) / 2;
567       if (uc > unicode_categories[mid].end)
568         min = mid + 1;
569       else if (uc < unicode_categories[mid].start)
570         max = mid - 1;
571       else
572         return unicode_categories[mid].category;
573     }
574 
575   return G_UNICODE_UNASSIGNED;
576 }
577 
578 /**
579  * gucharmap_unichar_isdefined:
580  * @uc: a Unicode character
581  *
582  * Determines if a given character is assigned in the Unicode
583  * standard.
584  *
585  * Return value: %TRUE if the character has an assigned value
586  **/
587 gboolean
gucharmap_unichar_isdefined(gunichar uc)588 gucharmap_unichar_isdefined (gunichar uc)
589 {
590   return gucharmap_unichar_type (uc) != G_UNICODE_UNASSIGNED;
591 }
592 
593 /**
594  * gucharmap_unichar_isgraph:
595  * @uc: a Unicode character
596  *
597  * Determines whether a character is printable and not a space
598  * (returns %FALSE for control characters, format characters, and
599  * spaces). g_unichar_isprint() is similar, but returns %TRUE for
600  * spaces. Given some UTF-8 text, obtain a character value with
601  * g_utf8_get_char().
602  *
603  * Return value: %TRUE if @c is printable unless it's a space
604  **/
605 gboolean
gucharmap_unichar_isgraph(gunichar uc)606 gucharmap_unichar_isgraph (gunichar uc)
607 {
608   GUnicodeType t = gucharmap_unichar_type (uc);
609 
610   /* From http://www.unicode.org/versions/Unicode9.0.0/ch09.pdf, p16
611    * "Unlike most other format control characters, however, they should be
612    *  rendered with a visible glyph, even in circumstances where no suitable
613    *  digit or sequence of digits follows them in logical order."
614    * There the standard talks about the ar signs spanning numbers, but
615    * I think this should apply to all Prepended_Concatenation_Mark format
616    * characters.
617    * Instead of parsing the corresponding data file, just hardcode the
618    * (few!) existing characters here.
619    */
620   if (t == G_UNICODE_FORMAT)
621     return (uc >= 0x0600 && uc <= 0x0605) ||
622 	   uc == 0x06DD ||
623            uc == 0x070F ||
624            uc == 0x08E2 ||
625            uc == 0x110BD;
626 
627   return (t != G_UNICODE_CONTROL
628           && t != G_UNICODE_UNASSIGNED
629           && t != G_UNICODE_PRIVATE_USE
630           && t != G_UNICODE_SURROGATE
631           && t != G_UNICODE_SPACE_SEPARATOR);
632 }
633 
634 static gunichar
get_first_non_underscore_char(const char * str)635 get_first_non_underscore_char (const char *str)
636 {
637   const char *p;
638 
639   if (!str)
640     return 0;
641 
642   for (p = str; p && *p; p = g_utf8_find_next_char (p, NULL))
643     {
644       gunichar ch;
645 
646       ch = g_utf8_get_char (p);
647       if (g_unichar_isalpha (ch))
648         return ch;
649     }
650 
651   return 0;
652 }
653 
654 /**
655  * gucharmap_unicode_get_locale_character:
656  *
657  * Determines a character that's commonly used in the current
658  * locale's script.
659  *
660  * Returns: a unicode character
661  */
662 gunichar
gucharmap_unicode_get_locale_character(void)663 gucharmap_unicode_get_locale_character (void)
664 {
665   GtkStockItem item;
666   if (!gtk_stock_lookup (GTK_STOCK_FIND, &item))
667     return 0;
668 
669   return get_first_non_underscore_char (item.label);
670 }
671