1 /*
2 * Copyright © 2004 Noah Levitt
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 3 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 */
18
19 #include <config.h>
20
21 #include <gtk/gtk.h>
22 #include <string.h>
23
24 #include <glib/gi18n-lib.h>
25
26 #include "gucharmap.h"
27 #include "gucharmap-private.h"
28
29 #include "unicode-names.h"
30 #include "unicode-blocks.h"
31 #include "unicode-nameslist.h"
32 #include "unicode-categories.h"
33 #include "unicode-versions.h"
34 #include "unicode-unihan.h"
35
36 /* constants for hangul (de)composition, see UAX #15 */
37 #define SBase 0xAC00
38 #define LCount 19
39 #define VCount 21
40 #define TCount 28
41 #define NCount (VCount * TCount)
42 #define SCount (LCount * NCount)
43
44 static const gchar JAMO_L_TABLE[][4] = {
45 "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
46 "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
47 };
48
49 static const gchar JAMO_V_TABLE[][4] = {
50 "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
51 "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
52 "YU", "EU", "YI", "I"
53 };
54
55 static const gchar JAMO_T_TABLE[][4] = {
56 "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
57 "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
58 "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
59 };
60
61 const gchar *
gucharmap_get_unicode_name(gunichar wc)62 gucharmap_get_unicode_name (gunichar wc)
63 {
64 static gchar buf[64];
65
66 _gucharmap_intl_ensure_initialized ();
67
68 if ((wc >= 0x3400 && wc <= 0x4dbf) /* CJK Unified Ideographs Extension A */
69 || (wc >= 0x4e00 && wc <= 0x9fff) /* CJK Unified Ideographs */
70 || (wc >= 0x20000 && wc <= 0x2a6df) /* CJK Unified Ideographs Extension B */
71 || (wc >= 0x2a700 && wc <= 0x2b738) /* CJK Unified Ideographs Extension C */
72 || (wc >= 0x2b740 && wc <= 0x2b81d) /* CJK Unified Ideographs Extension D */
73 || (wc >= 0x2b820 && wc <= 0x2cea1) /* CJK Unified Ideographs Extension E */
74 || (wc >= 0x2ceb0 && wc <= 0x2ebe0) /* CJK Unified Ideographs Extension F */
75 || (wc >= 0x30000 && wc <= 0x3134a)) /* CJK Unified Ideographs Extension G */
76 {
77 g_snprintf (buf, sizeof (buf), "CJK UNIFIED IDEOGRAPH-%04X", wc);
78 return buf;
79 }
80 else if ((wc >= 0xf900 && wc <= 0xfaff) || /* CJK Compatibility Ideographs */
81 (wc >= 0x2f800 && wc <= 0x2fa1d)) /* CJK Compatibility Ideographs Supplement */
82 {
83 g_snprintf (buf, sizeof (buf), "CJK COMPATIBILITY IDEOGRAPH-%04X", wc);
84 return buf;
85 }
86 else if ((wc >= 0x17000 && wc <= 0x187f7) || /* Tangut */
87 (wc >= 0x18d00 && wc <= 0x18d08)) /* Tangut Supplement */
88 {
89 g_snprintf (buf, sizeof (buf), "TANGUT IDEOGRAPH-%05X", wc);
90 return buf;
91 }
92 else if (wc >= 0x18800 && wc <= 0x18aff) {
93 g_snprintf (buf, sizeof (buf), "TANGUT COMPONENT-%03u", wc - 0x18800 + 1);
94 return buf;
95 }
96 else if (wc >= 0x18b00 && wc <= 0x18cd5) {
97 g_snprintf (buf, sizeof (buf), "KHITAN SMALL SCRIPT CHARACTER-%05X", wc);
98 return buf;
99 }
100 else if (wc >= 0xac00 && wc <= 0xd7af)
101 {
102 /* compute hangul syllable name as per UAX #15 */
103 gint SIndex = wc - SBase;
104 gint LIndex, VIndex, TIndex;
105
106 if (SIndex < 0 || SIndex >= SCount)
107 return "";
108
109 LIndex = SIndex / NCount;
110 VIndex = (SIndex % NCount) / TCount;
111 TIndex = SIndex % TCount;
112
113 g_snprintf (buf, sizeof (buf), "HANGUL SYLLABLE %s%s%s",
114 JAMO_L_TABLE[LIndex], JAMO_V_TABLE[VIndex], JAMO_T_TABLE[TIndex]);
115
116 return buf;
117 }
118 else if (wc >= 0xD800 && wc <= 0xDB7F)
119 return _("<Non Private Use High Surrogate>");
120 else if (wc >= 0xDB80 && wc <= 0xDBFF)
121 return _("<Private Use High Surrogate>");
122 else if (wc >= 0xDC00 && wc <= 0xDFFF)
123 return _("<Low Surrogate>");
124 else if (wc >= 0xE000 && wc <= 0xF8FF)
125 return _("<Private Use>");
126 else if (wc >= 0xF0000 && wc <= 0xFFFFD)
127 return _("<Plane 15 Private Use>");
128 else if (wc >= 0x100000 && wc <= 0x10FFFD)
129 return _("<Plane 16 Private Use>");
130 else
131 {
132 const gchar *x = gucharmap_get_unicode_data_name (wc);
133 if (x == NULL)
134 return _("<not assigned>");
135 else
136 return x;
137 }
138 }
139
140 const gchar *
gucharmap_get_unicode_category_name(gunichar wc)141 gucharmap_get_unicode_category_name (gunichar wc)
142 {
143 _gucharmap_intl_ensure_initialized ();
144
145 switch (gucharmap_unichar_type (wc))
146 {
147 case G_UNICODE_CONTROL: return _("Other, Control");
148 case G_UNICODE_FORMAT: return _("Other, Format");
149 case G_UNICODE_UNASSIGNED: return _("Other, Not Assigned");
150 case G_UNICODE_PRIVATE_USE: return _("Other, Private Use");
151 case G_UNICODE_SURROGATE: return _("Other, Surrogate");
152 case G_UNICODE_LOWERCASE_LETTER: return _("Letter, Lowercase");
153 case G_UNICODE_MODIFIER_LETTER: return _("Letter, Modifier");
154 case G_UNICODE_OTHER_LETTER: return _("Letter, Other");
155 case G_UNICODE_TITLECASE_LETTER: return _("Letter, Titlecase");
156 case G_UNICODE_UPPERCASE_LETTER: return _("Letter, Uppercase");
157 case G_UNICODE_COMBINING_MARK: return _("Mark, Spacing Combining");
158 case G_UNICODE_ENCLOSING_MARK: return _("Mark, Enclosing");
159 case G_UNICODE_NON_SPACING_MARK: return _("Mark, Non-Spacing");
160 case G_UNICODE_DECIMAL_NUMBER: return _("Number, Decimal Digit");
161 case G_UNICODE_LETTER_NUMBER: return _("Number, Letter");
162 case G_UNICODE_OTHER_NUMBER: return _("Number, Other");
163 case G_UNICODE_CONNECT_PUNCTUATION: return _("Punctuation, Connector");
164 case G_UNICODE_DASH_PUNCTUATION: return _("Punctuation, Dash");
165 case G_UNICODE_CLOSE_PUNCTUATION: return _("Punctuation, Close");
166 case G_UNICODE_FINAL_PUNCTUATION: return _("Punctuation, Final Quote");
167 case G_UNICODE_INITIAL_PUNCTUATION: return _("Punctuation, Initial Quote");
168 case G_UNICODE_OTHER_PUNCTUATION: return _("Punctuation, Other");
169 case G_UNICODE_OPEN_PUNCTUATION: return _("Punctuation, Open");
170 case G_UNICODE_CURRENCY_SYMBOL: return _("Symbol, Currency");
171 case G_UNICODE_MODIFIER_SYMBOL: return _("Symbol, Modifier");
172 case G_UNICODE_MATH_SYMBOL: return _("Symbol, Math");
173 case G_UNICODE_OTHER_SYMBOL: return _("Symbol, Other");
174 case G_UNICODE_LINE_SEPARATOR: return _("Separator, Line");
175 case G_UNICODE_PARAGRAPH_SEPARATOR: return _("Separator, Paragraph");
176 case G_UNICODE_SPACE_SEPARATOR: return _("Separator, Space");
177 default: return "";
178 }
179 }
180
181 /* does a binary search on unicode_names */
182 const gchar *
gucharmap_get_unicode_data_name(gunichar uc)183 gucharmap_get_unicode_data_name (gunichar uc)
184 {
185 gint min = 0;
186 gint mid;
187 gint max = G_N_ELEMENTS(unicode_names) - 1;
188
189 if (uc < unicode_names[0].index || uc > unicode_names[max].index)
190 return "";
191
192 while (max >= min)
193 {
194 mid = (min + max) / 2;
195 if (uc > unicode_names[mid].index)
196 min = mid + 1;
197 else if (uc < unicode_names[mid].index)
198 max = mid - 1;
199 else
200 return unicode_name_get_name(&unicode_names[mid]);
201 }
202
203 return NULL;
204 }
205
206 gint
gucharmap_get_unicode_data_name_count(void)207 gucharmap_get_unicode_data_name_count (void)
208 {
209 return G_N_ELEMENTS (unicode_names);
210 }
211
212 /* does a binary search on unicode_versions */
213 GucharmapUnicodeVersion
gucharmap_get_unicode_version(gunichar uc)214 gucharmap_get_unicode_version (gunichar uc)
215 {
216 gint min = 0;
217 gint mid;
218 gint max = G_N_ELEMENTS (unicode_versions) - 1;
219
220 if (uc < unicode_versions[0].start || uc > unicode_versions[max].end)
221 return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
222
223 while (max >= min)
224 {
225 mid = (min + max) / 2;
226
227 if (uc > unicode_versions[mid].end)
228 min = mid + 1;
229 else if (uc < unicode_versions[mid].start)
230 max = mid - 1;
231 else if ((uc >= unicode_versions[mid].start) && (uc <= unicode_versions[mid].end))
232 return unicode_versions[mid].version;
233 }
234
235 return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
236 }
237
238 const gchar *
gucharmap_unicode_version_to_string(GucharmapUnicodeVersion version)239 gucharmap_unicode_version_to_string (GucharmapUnicodeVersion version)
240 {
241 g_return_val_if_fail (version >= GUCHARMAP_UNICODE_VERSION_UNASSIGNED &&
242 version <= GUCHARMAP_UNICODE_VERSION_LATEST, NULL);
243
244 if (G_UNLIKELY (version == GUCHARMAP_UNICODE_VERSION_UNASSIGNED))
245 return NULL;
246
247 return unicode_version_strings + unicode_version_string_offsets[version - 1];
248 }
249
250 gint
gucharmap_get_unihan_count(void)251 gucharmap_get_unihan_count (void)
252 {
253 return G_N_ELEMENTS (unihan);
254 }
255
256 /* does a binary search; also caches most recent, since it will often be
257 * called in succession on the same character */
258 static const Unihan *
_get_unihan(gunichar uc)259 _get_unihan (gunichar uc)
260 {
261 static gunichar most_recent_searched;
262 static const Unihan *most_recent_result;
263 gint min = 0;
264 gint mid;
265 gint max = G_N_ELEMENTS(unihan) - 1;
266
267
268 if (uc < unihan[0].index || uc > unihan[max].index)
269 return NULL;
270
271 if (uc == most_recent_searched)
272 return most_recent_result;
273
274 most_recent_searched = uc;
275
276 while (max >= min)
277 {
278 mid = (min + max) / 2;
279 if (uc > unihan[mid].index)
280 min = mid + 1;
281 else if (uc < unihan[mid].index)
282 max = mid - 1;
283 else
284 {
285 most_recent_result = unihan + mid;
286 return unihan + mid;
287 }
288 }
289
290 most_recent_result = NULL;
291 return NULL;
292 }
293
294 /* does a binary search; also caches most recent, since it will often be
295 * called in succession on the same character */
296 static const NamesList *
get_nameslist(gunichar uc)297 get_nameslist (gunichar uc)
298 {
299 static gunichar most_recent_searched;
300 static const NamesList *most_recent_result;
301 gint min = 0;
302 gint mid;
303 gint max = G_N_ELEMENTS (names_list) - 1;
304
305 if (uc < names_list[0].index || uc > names_list[max].index)
306 return NULL;
307
308 if (uc == most_recent_searched)
309 return most_recent_result;
310
311 most_recent_searched = uc;
312
313 while (max >= min)
314 {
315 mid = (min + max) / 2;
316 if (uc > names_list[mid].index)
317 min = mid + 1;
318 else if (uc < names_list[mid].index)
319 max = mid - 1;
320 else
321 {
322 most_recent_result = names_list + mid;
323 return names_list + mid;
324 }
325 }
326
327 most_recent_result = NULL;
328 return NULL;
329 }
330
331 G_GNUC_INTERNAL gboolean
_gucharmap_unicode_has_nameslist_entry(gunichar uc)332 _gucharmap_unicode_has_nameslist_entry (gunichar uc)
333 {
334 return get_nameslist (uc) != NULL;
335 }
336
337 /* returns newly allocated array of gunichar terminated with -1 */
338 gunichar *
gucharmap_get_nameslist_exes(gunichar uc)339 gucharmap_get_nameslist_exes (gunichar uc)
340 {
341 const NamesList *nl;
342 gunichar *exes;
343 gunichar i, count;
344
345 nl = get_nameslist (uc);
346
347 if (nl == NULL || nl->exes_index == -1)
348 return NULL;
349
350 /* count the number of exes */
351 for (i = 0; names_list_exes[nl->exes_index + i].index == uc; i++);
352 count = i;
353
354 exes = g_malloc ((count + 1) * sizeof (gunichar));
355 for (i = 0; i < count; i++)
356 exes[i] = names_list_exes[nl->exes_index + i].value;
357 exes[count] = (gunichar)(-1);
358
359 return exes;
360 }
361
362 /**
363 * gucharmap_get_nameslist_equals:
364 * @uc: a gunichar
365 *
366 * Returns: (transfer container): newly allocated null-terminated array of gchar*
367 * the items are const, but the array should be freed by the caller
368 */
369 const gchar **
gucharmap_get_nameslist_equals(gunichar uc)370 gucharmap_get_nameslist_equals (gunichar uc)
371 {
372 const NamesList *nl;
373 const gchar **equals;
374 gunichar i, count;
375
376 nl = get_nameslist (uc);
377
378 if (nl == NULL || nl->equals_index == -1)
379 return NULL;
380
381 /* count the number of equals */
382 for (i = 0; names_list_equals[nl->equals_index + i].index == uc; i++);
383 count = i;
384
385 equals = g_malloc ((count + 1) * sizeof (gchar *));
386 for (i = 0; i < count; i++)
387 equals[i] = names_list_equals_strings + names_list_equals[nl->equals_index + i].string_index;
388 equals[count] = NULL;
389
390 return equals;
391 }
392
393 /**
394 * gucharmap_get_nameslist_stars:
395 * @uc: a #gunichar
396 *
397 * Returns: (transfer container): newly allocated null-terminated array of gchar*
398 * the items are const, but the array should be freed by the caller
399 */
400 const gchar **
gucharmap_get_nameslist_stars(gunichar uc)401 gucharmap_get_nameslist_stars (gunichar uc)
402 {
403 const NamesList *nl;
404 const gchar **stars;
405 gunichar i, count;
406
407 nl = get_nameslist (uc);
408
409 if (nl == NULL || nl->stars_index == -1)
410 return NULL;
411
412 /* count the number of stars */
413 for (i = 0; names_list_stars[nl->stars_index + i].index == uc; i++);
414 count = i;
415
416 stars = g_malloc ((count + 1) * sizeof (gchar *));
417 for (i = 0; i < count; i++)
418 stars[i] = names_list_stars_strings + names_list_stars[nl->stars_index + i].string_index;
419 stars[count] = NULL;
420
421 return stars;
422 }
423
424 /**
425 * gucharmap_get_nameslist_pounds:
426 * @uc: a #gunichar
427 *
428 * Returns: (transfer container): newly allocated null-terminated array of gchar*
429 * the items are const, but the array should be freed by the caller
430 */
431 const gchar **
gucharmap_get_nameslist_pounds(gunichar uc)432 gucharmap_get_nameslist_pounds (gunichar uc)
433 {
434 const NamesList *nl;
435 const gchar **pounds;
436 gunichar i, count;
437
438 nl = get_nameslist (uc);
439
440 if (nl == NULL || nl->pounds_index == -1)
441 return NULL;
442
443 /* count the number of pounds */
444 for (i = 0; names_list_pounds[nl->pounds_index + i].index == uc; i++);
445 count = i;
446
447 pounds = g_malloc ((count + 1) * sizeof (gchar *));
448 for (i = 0; i < count; i++)
449 pounds[i] = names_list_pounds_strings + names_list_pounds[nl->pounds_index + i].string_index;
450 pounds[count] = NULL;
451
452 return pounds;
453 }
454
455 /**
456 * gucharmap_get_nameslist_colons:
457 * @uc: a #gunichar
458 *
459 * Returns: (transfer container): newly allocated null-terminated array of gchar*
460 * the items are const, but the array should be freed by the caller
461 */
462 const gchar **
gucharmap_get_nameslist_colons(gunichar uc)463 gucharmap_get_nameslist_colons (gunichar uc)
464 {
465 const NamesList *nl;
466 const gchar **colons;
467 gunichar i, count;
468
469 nl = get_nameslist (uc);
470
471 if (nl == NULL || nl->colons_index == -1)
472 return NULL;
473
474 /* count the number of colons */
475 for (i = 0; names_list_colons[nl->colons_index + i].index == uc; i++);
476 count = i;
477
478 colons = g_malloc ((count + 1) * sizeof (gchar *));
479 for (i = 0; i < count; i++)
480 colons[i] = names_list_colons_strings + names_list_colons[nl->colons_index + i].string_index;
481 colons[count] = NULL;
482
483 return colons;
484 }
485
486 /* Wrapper, in case we want to support a newer unicode version than glib */
487 gboolean
gucharmap_unichar_validate(gunichar ch)488 gucharmap_unichar_validate (gunichar ch)
489 {
490 return g_unichar_validate (ch);
491 }
492
493 /**
494 * gucharmap_unichar_to_printable_utf8:
495 * @uc: a unicode character
496 * @outbuf: output buffer, must have at least 10 bytes of space.
497 * If %NULL, the length will be computed and returned
498 * and nothing will be written to @outbuf.
499 *
500 * Converts a single character to UTF-8 suitable for rendering. Check the
501 * source to see what this means. ;-)
502 *
503 *
504 * Return value: number of bytes written
505 **/
506 gint
gucharmap_unichar_to_printable_utf8(gunichar uc,gchar * outbuf)507 gucharmap_unichar_to_printable_utf8 (gunichar uc, gchar *outbuf)
508 {
509 /* Unicode Standard 3.2, section 2.6, "By convention, diacritical marks
510 * used by the Unicode Standard may be exhibited in (apparent) isolation
511 * by applying them to U+0020 SPACE or to U+00A0 NO BREAK SPACE." */
512
513 /* 17:10 < owen> noah: I'm *not* claiming that what Pango does currently
514 * is right, but convention isn't a requirement. I think
515 * it's probably better to do the Uniscribe thing and put
516 * the lone combining mark on a dummy character and require
517 * ZWJ
518 * 17:11 < noah> owen: do you mean that i should put a ZWJ in there, or
519 * that pango will do that?
520 * 17:11 < owen> noah: I mean, you should (assuming some future more
521 * capable version of Pango) put it in there
522 */
523
524 if (! gucharmap_unichar_validate (uc) || (! gucharmap_unichar_isgraph (uc)
525 && gucharmap_unichar_type (uc) != G_UNICODE_PRIVATE_USE))
526 return 0;
527 else if (gucharmap_unichar_type (uc) == G_UNICODE_COMBINING_MARK
528 || gucharmap_unichar_type (uc) == G_UNICODE_ENCLOSING_MARK
529 || gucharmap_unichar_type (uc) == G_UNICODE_NON_SPACING_MARK)
530 {
531 gint x;
532
533 outbuf[0] = ' ';
534 outbuf[1] = '\xe2'; /* ZERO */
535 outbuf[2] = '\x80'; /* WIDTH */
536 outbuf[3] = '\x8d'; /* JOINER (0x200D) */
537
538 x = g_unichar_to_utf8 (uc, outbuf + 4);
539
540 return x + 4;
541 }
542 else
543 return g_unichar_to_utf8 (uc, outbuf);
544 }
545
546 /**
547 * gucharmap_unichar_type:
548 * @uc: a Unicode character
549 *
550 * Classifies a Unicode character by type.
551 *
552 * Return value: the type of the character.
553 **/
554 GUnicodeType
gucharmap_unichar_type(gunichar uc)555 gucharmap_unichar_type (gunichar uc)
556 {
557 gint min = 0;
558 gint mid;
559 gint max = sizeof (unicode_categories) / sizeof (UnicodeCategory) - 1;
560
561 if (uc < unicode_categories[0].start || uc > unicode_categories[max].end)
562 return G_UNICODE_UNASSIGNED;
563
564 while (max >= min)
565 {
566 mid = (min + max) / 2;
567 if (uc > unicode_categories[mid].end)
568 min = mid + 1;
569 else if (uc < unicode_categories[mid].start)
570 max = mid - 1;
571 else
572 return unicode_categories[mid].category;
573 }
574
575 return G_UNICODE_UNASSIGNED;
576 }
577
578 /**
579 * gucharmap_unichar_isdefined:
580 * @uc: a Unicode character
581 *
582 * Determines if a given character is assigned in the Unicode
583 * standard.
584 *
585 * Return value: %TRUE if the character has an assigned value
586 **/
587 gboolean
gucharmap_unichar_isdefined(gunichar uc)588 gucharmap_unichar_isdefined (gunichar uc)
589 {
590 return gucharmap_unichar_type (uc) != G_UNICODE_UNASSIGNED;
591 }
592
593 /**
594 * gucharmap_unichar_isgraph:
595 * @uc: a Unicode character
596 *
597 * Determines whether a character is printable and not a space
598 * (returns %FALSE for control characters, format characters, and
599 * spaces). g_unichar_isprint() is similar, but returns %TRUE for
600 * spaces. Given some UTF-8 text, obtain a character value with
601 * g_utf8_get_char().
602 *
603 * Return value: %TRUE if @c is printable unless it's a space
604 **/
605 gboolean
gucharmap_unichar_isgraph(gunichar uc)606 gucharmap_unichar_isgraph (gunichar uc)
607 {
608 GUnicodeType t = gucharmap_unichar_type (uc);
609
610 /* From http://www.unicode.org/versions/Unicode9.0.0/ch09.pdf, p16
611 * "Unlike most other format control characters, however, they should be
612 * rendered with a visible glyph, even in circumstances where no suitable
613 * digit or sequence of digits follows them in logical order."
614 * There the standard talks about the ar signs spanning numbers, but
615 * I think this should apply to all Prepended_Concatenation_Mark format
616 * characters.
617 * Instead of parsing the corresponding data file, just hardcode the
618 * (few!) existing characters here.
619 */
620 if (t == G_UNICODE_FORMAT)
621 return (uc >= 0x0600 && uc <= 0x0605) ||
622 uc == 0x06DD ||
623 uc == 0x070F ||
624 uc == 0x08E2 ||
625 uc == 0x110BD;
626
627 return (t != G_UNICODE_CONTROL
628 && t != G_UNICODE_UNASSIGNED
629 && t != G_UNICODE_PRIVATE_USE
630 && t != G_UNICODE_SURROGATE
631 && t != G_UNICODE_SPACE_SEPARATOR);
632 }
633
634 static gunichar
get_first_non_underscore_char(const char * str)635 get_first_non_underscore_char (const char *str)
636 {
637 const char *p;
638
639 if (!str)
640 return 0;
641
642 for (p = str; p && *p; p = g_utf8_find_next_char (p, NULL))
643 {
644 gunichar ch;
645
646 ch = g_utf8_get_char (p);
647 if (g_unichar_isalpha (ch))
648 return ch;
649 }
650
651 return 0;
652 }
653
654 /**
655 * gucharmap_unicode_get_locale_character:
656 *
657 * Determines a character that's commonly used in the current
658 * locale's script.
659 *
660 * Returns: a unicode character
661 */
662 gunichar
gucharmap_unicode_get_locale_character(void)663 gucharmap_unicode_get_locale_character (void)
664 {
665 GtkStockItem item;
666 if (!gtk_stock_lookup (GTK_STOCK_FIND, &item))
667 return 0;
668
669 return get_first_non_underscore_char (item.label);
670 }
671