1 /* GTK - The GIMP Toolkit
2  * Copyright (C) 2015 Takao Fujiwara <takao.fujiwara1@gmail.com>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <gdk/gdk.h>
19 #include <glib.h>
20 #include <glib/gprintf.h>
21 #include <glib/gstdio.h>
22 #include <locale.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include "gtkcomposetable.h"
27 #include "gtkimcontextsimple.h"
28 
29 
30 #define GTK_COMPOSE_TABLE_MAGIC "GtkComposeTable"
31 #define GTK_COMPOSE_TABLE_VERSION (2)
32 
33 /* Maximum length of sequences we parse */
34 
35 #define MAX_COMPOSE_LEN 20
36 
37 typedef struct {
38   gunichar *sequence;
39   char *value;
40 } GtkComposeData;
41 
42 
43 static void
gtk_compose_data_free(GtkComposeData * compose_data)44 gtk_compose_data_free (GtkComposeData *compose_data)
45 {
46   g_free (compose_data->sequence);
47   g_free (compose_data->value);
48   g_slice_free (GtkComposeData, compose_data);
49 }
50 
51 static void
gtk_compose_list_element_free(GtkComposeData * compose_data,gpointer data)52 gtk_compose_list_element_free (GtkComposeData *compose_data, gpointer data)
53 {
54   gtk_compose_data_free (compose_data);
55 }
56 
57 static gboolean
is_codepoint(const char * str)58 is_codepoint (const char *str)
59 {
60   int i;
61 
62   /* 'U' is not code point but 'U00C0' is code point */
63   if (str[0] == '\0' || str[0] != 'U' || str[1] == '\0')
64     return FALSE;
65 
66   for (i = 1; str[i] != '\0'; i++)
67     {
68       if (!g_ascii_isxdigit (str[i]))
69         return FALSE;
70     }
71 
72   return TRUE;
73 }
74 
75 static gboolean
parse_compose_value(GtkComposeData * compose_data,const char * val,const char * line)76 parse_compose_value (GtkComposeData *compose_data,
77                      const char     *val,
78                      const char     *line)
79 {
80   const char *p;
81   GString *value;
82   gunichar ch;
83   char *endp;
84 
85   if (val[0] != '"')
86     {
87       g_warning ("Only strings supported after ':': %s: %s", val, line);
88       goto fail;
89     }
90 
91   value = g_string_new ("");
92 
93   p = val + 1;
94   while (*p)
95     {
96       if (*p == '\0')
97         {
98           g_warning ("Missing closing '\"': %s: %s", val, line);
99           goto fail;
100         }
101       else if (*p == '\"')
102         {
103           break;
104         }
105       else if (*p == '\\')
106         {
107           if (p[1] == '"')
108             {
109               g_string_append_c (value, '"');
110               p += 2;
111             }
112           else if (p[1] == '\\')
113             {
114               g_string_append_c (value, '\\');
115               p += 2;
116             }
117           else if (p[1] >= '0' && p[1] < '8')
118             {
119               ch = g_ascii_strtoll (p + 1, &endp, 8);
120               if (ch == 0)
121                 {
122                   g_warning ("Invalid escape sequence: %s: %s", val, line);
123                   goto fail;
124                 }
125               g_string_append_unichar (value, ch);
126               p = endp;
127             }
128           else if (p[1] == 'x' || p[1] == 'X')
129             {
130               ch = g_ascii_strtoll (p + 2, &endp, 16);
131               if (ch == 0)
132                 {
133                   g_warning ("Invalid escape sequence: %s: %s", val, line);
134                   goto fail;
135                 }
136               g_string_append_unichar (value, ch);
137               p = endp;
138             }
139           else
140             {
141               g_warning ("Invalid escape sequence: %s: %s", val, line);
142               goto fail;
143             }
144         }
145       else
146         {
147           ch = g_utf8_get_char (p);
148           g_string_append_unichar (value, ch);
149           p = g_utf8_next_char (p);
150         }
151     }
152 
153   compose_data->value = g_string_free (value, FALSE);
154 
155   return TRUE;
156 
157 fail:
158   return FALSE;
159 }
160 
161 static gboolean
parse_compose_sequence(GtkComposeData * compose_data,const char * seq,const char * line)162 parse_compose_sequence (GtkComposeData *compose_data,
163                         const char     *seq,
164                         const char     *line)
165 {
166   char **words = g_strsplit (seq, "<", -1);
167   int i;
168   int n = 0;
169 
170   if (g_strv_length (words) < 2)
171     {
172       g_warning ("key sequence format is <a> <b>...: %s", line);
173       goto fail;
174     }
175 
176   for (i = 1; words[i] != NULL; i++)
177     {
178       char *start = words[i];
179       char *end = strchr (words[i], '>');
180       char *match;
181       gunichar codepoint;
182 
183       if (words[i][0] == '\0')
184              continue;
185 
186       if (start == NULL || end == NULL || end <= start)
187         {
188           g_warning ("key sequence format is <a> <b>...: %s", line);
189           goto fail;
190         }
191 
192       match = g_strndup (start, end - start);
193 
194       if (compose_data->sequence == NULL)
195         compose_data->sequence = g_malloc (sizeof (gunichar) * 2);
196       else
197         compose_data->sequence = g_realloc (compose_data->sequence, sizeof (gunichar) * (n + 2));
198 
199       if (is_codepoint (match))
200         {
201           codepoint = (gunichar) g_ascii_strtoll (match + 1, NULL, 16);
202           compose_data->sequence[n] = codepoint;
203           compose_data->sequence[n + 1] = 0;
204         }
205       else
206         {
207           codepoint = (gunichar) gdk_keyval_from_name (match);
208           compose_data->sequence[n] = codepoint;
209           compose_data->sequence[n + 1] = 0;
210         }
211 
212       if (codepoint == GDK_KEY_VoidSymbol)
213         g_warning ("Could not get code point of keysym %s", match);
214       g_free (match);
215       n++;
216     }
217 
218   g_strfreev (words);
219   if (0 == n || n > MAX_COMPOSE_LEN)
220     {
221       g_warning ("Suspicious compose sequence length (%d). Are you sure this is right?: %s",
222                  n, line);
223       return FALSE;
224     }
225 
226   return TRUE;
227 
228 fail:
229   g_strfreev (words);
230   return FALSE;
231 }
232 
233 static void
parse_compose_line(GList ** compose_list,const char * line)234 parse_compose_line (GList       **compose_list,
235                     const char   *line)
236 {
237   char **components = NULL;
238   GtkComposeData *compose_data = NULL;
239 
240   if (line[0] == '\0' || line[0] == '#')
241     return;
242 
243   if (g_str_has_prefix (line, "include "))
244     return;
245 
246   components = g_strsplit (line, ":", 2);
247 
248   if (components[1] == NULL)
249     {
250       g_warning ("No delimiter ':': %s", line);
251       goto fail;
252     }
253 
254   compose_data = g_slice_new0 (GtkComposeData);
255 
256   if (!parse_compose_sequence (compose_data, g_strstrip (components[0]), line))
257     goto fail;
258 
259   if (!parse_compose_value (compose_data, g_strstrip (components[1]), line))
260     goto fail;
261 
262   g_strfreev (components);
263 
264   *compose_list = g_list_append (*compose_list, compose_data);
265 
266   return;
267 
268 fail:
269   g_strfreev (components);
270   if (compose_data)
271     gtk_compose_data_free (compose_data);
272 }
273 
274 extern const GtkComposeTableCompact gtk_compose_table_compact;
275 
276 static GList *
gtk_compose_list_parse_file(const char * compose_file)277 gtk_compose_list_parse_file (const char *compose_file)
278 {
279   char *contents = NULL;
280   char **lines = NULL;
281   gsize length = 0;
282   GError *error = NULL;
283   GList *compose_list = NULL;
284   int i;
285 
286   if (!g_file_get_contents (compose_file, &contents, &length, &error))
287     {
288       g_warning ("%s", error->message);
289       g_error_free (error);
290       return NULL;
291     }
292 
293   lines = g_strsplit (contents, "\n", -1);
294   g_free (contents);
295   for (i = 0; lines[i] != NULL; i++)
296     parse_compose_line (&compose_list, lines[i]);
297   g_strfreev (lines);
298 
299   return compose_list;
300 }
301 
302 static GList *
gtk_compose_list_check_duplicated(GList * compose_list)303 gtk_compose_list_check_duplicated (GList *compose_list)
304 {
305   GList *list;
306   GList *removed_list = NULL;
307   GtkComposeData *compose_data;
308 
309   for (list = compose_list; list != NULL; list = list->next)
310     {
311       static guint16 keysyms[MAX_COMPOSE_LEN + 1];
312       int i;
313       int n_compose = 0;
314       gboolean compose_finish;
315       gunichar output_char;
316       char buf[8] = { 0, };
317 
318       compose_data = list->data;
319 
320       for (i = 0; i < MAX_COMPOSE_LEN + 1; i++)
321         keysyms[i] = 0;
322 
323       for (i = 0; i < MAX_COMPOSE_LEN + 1; i++)
324         {
325           gunichar codepoint = compose_data->sequence[i];
326           keysyms[i] = (guint16) codepoint;
327 
328           if (codepoint == 0)
329             break;
330 
331           n_compose++;
332         }
333 
334       if (gtk_compose_table_compact_check (&gtk_compose_table_compact,
335                                            keysyms, n_compose,
336                                            &compose_finish,
337                                            NULL,
338                                            &output_char) &&
339           compose_finish)
340         {
341           g_unichar_to_utf8 (output_char, buf);
342           if (strcmp (compose_data->value, buf) == 0)
343             removed_list = g_list_prepend (removed_list, compose_data);
344         }
345       else if (gtk_check_algorithmically (keysyms, n_compose, &output_char))
346         {
347           g_unichar_to_utf8 (output_char, buf);
348           if (strcmp (compose_data->value, buf) == 0)
349             removed_list = g_list_prepend (removed_list, compose_data);
350         }
351     }
352 
353   for (list = removed_list; list != NULL; list = list->next)
354     {
355       compose_data = list->data;
356       compose_list = g_list_remove (compose_list, compose_data);
357       gtk_compose_data_free (compose_data);
358     }
359 
360   g_list_free (removed_list);
361 
362   return compose_list;
363 }
364 
365 static GList *
gtk_compose_list_check_uint16(GList * compose_list)366 gtk_compose_list_check_uint16 (GList *compose_list)
367 {
368   GList *list;
369   GList *removed_list = NULL;
370   GtkComposeData *compose_data;
371 
372   for (list = compose_list; list != NULL; list = list->next)
373     {
374       int i;
375 
376       compose_data = list->data;
377       for (i = 0; i < MAX_COMPOSE_LEN; i++)
378         {
379           gunichar codepoint = compose_data->sequence[i];
380 
381           if (codepoint == 0)
382             break;
383 
384           if (codepoint > 0xffff)
385             {
386               removed_list = g_list_prepend (removed_list, compose_data);
387               break;
388             }
389         }
390     }
391 
392   for (list = removed_list; list != NULL; list = list->next)
393     {
394       compose_data = list->data;
395       compose_list = g_list_remove (compose_list, compose_data);
396       gtk_compose_data_free (compose_data);
397     }
398 
399   g_list_free (removed_list);
400 
401   return compose_list;
402 }
403 
404 static GList *
gtk_compose_list_format_for_gtk(GList * compose_list,int * p_max_compose_len,int * p_n_index_stride)405 gtk_compose_list_format_for_gtk (GList *compose_list,
406                                  int   *p_max_compose_len,
407                                  int   *p_n_index_stride)
408 {
409   GList *list;
410   GtkComposeData *compose_data;
411   int max_compose_len = 0;
412   int i;
413   gunichar codepoint;
414 
415   for (list = compose_list; list != NULL; list = list->next)
416     {
417       compose_data = list->data;
418       for (i = 0; i < MAX_COMPOSE_LEN + 1; i++)
419         {
420           codepoint = compose_data->sequence[i];
421           if (codepoint == 0)
422             {
423               if (max_compose_len < i)
424                 max_compose_len = i;
425               break;
426             }
427         }
428     }
429 
430   if (p_max_compose_len)
431     *p_max_compose_len = max_compose_len;
432   if (p_n_index_stride)
433     *p_n_index_stride = max_compose_len + 2;
434 
435   return compose_list;
436 }
437 
438 static int
gtk_compose_data_compare(gpointer a,gpointer b,gpointer data)439 gtk_compose_data_compare (gpointer a,
440                           gpointer b,
441                           gpointer data)
442 {
443   GtkComposeData *compose_data_a = a;
444   GtkComposeData *compose_data_b = b;
445   int max_compose_len = GPOINTER_TO_INT (data);
446   int i;
447   for (i = 0; i < max_compose_len; i++)
448     {
449       gunichar code_a = compose_data_a->sequence[i];
450       gunichar code_b = compose_data_b->sequence[i];
451 
452       if (code_a != code_b)
453         return code_a - code_b;
454     }
455 
456   return 0;
457 }
458 
459 /* Implemented from g_str_hash() */
460 static guint32
gtk_compose_table_data_hash(gconstpointer v,int length)461 gtk_compose_table_data_hash (gconstpointer v, int length)
462 {
463   const guint16 *p, *head;
464   unsigned char c;
465   guint32 h = 5381;
466 
467   for (p = v, head = v; (p - head) < length; p++)
468     {
469       c = 0x00ff & (*p >> 8);
470       h = (h << 5) + h + c;
471       c = 0x00ff & *p;
472       h = (h << 5) + h + c;
473     }
474 
475   return h;
476 }
477 
478 static char *
gtk_compose_hash_get_cache_path(guint32 hash)479 gtk_compose_hash_get_cache_path (guint32 hash)
480 {
481   char *basename = NULL;
482   char *dir = NULL;
483   char *path = NULL;
484 
485   basename = g_strdup_printf ("%08x.cache", hash);
486 
487   dir = g_build_filename (g_get_user_cache_dir (), "gtk-3.0", "compose", NULL);
488   path = g_build_filename (dir, basename, NULL);
489   if (g_mkdir_with_parents (dir, 0755) != 0)
490     {
491       g_warning ("Failed to mkdir %s", dir);
492       g_free (path);
493       path = NULL;
494     }
495 
496   g_free (dir);
497   g_free (basename);
498 
499   return path;
500 }
501 
502 static char *
gtk_compose_table_serialize(GtkComposeTable * compose_table,gsize * count)503 gtk_compose_table_serialize (GtkComposeTable *compose_table,
504                              gsize           *count)
505 {
506   char *p, *contents;
507   gsize length, total_length;
508   guint16 bytes;
509   const char *header = GTK_COMPOSE_TABLE_MAGIC;
510   const guint16 version = GTK_COMPOSE_TABLE_VERSION;
511   guint16 max_seq_len = compose_table->max_seq_len;
512   guint16 index_stride = max_seq_len + 2;
513   guint16 n_seqs = compose_table->n_seqs;
514   guint16 n_chars = compose_table->n_chars;
515   guint32 i;
516 
517   g_return_val_if_fail (compose_table != NULL, NULL);
518   g_return_val_if_fail (max_seq_len > 0, NULL);
519   g_return_val_if_fail (index_stride > 0, NULL);
520 
521   length = strlen (header);
522   total_length = length + sizeof (guint16) * (4 + index_stride * n_seqs) + n_chars;
523   if (count)
524     *count = total_length;
525 
526   p = contents = g_malloc (total_length);
527 
528   memcpy (p, header, length);
529   p += length;
530 
531 #define APPEND_GUINT16(elt) \
532   bytes = GUINT16_TO_BE (elt); \
533   memcpy (p, &bytes, sizeof (guint16)); \
534   p += sizeof (guint16);
535 
536   APPEND_GUINT16 (version);
537   APPEND_GUINT16 (max_seq_len);
538   APPEND_GUINT16 (n_seqs);
539   APPEND_GUINT16 (n_chars);
540 
541   for (i = 0; i < (guint32) index_stride * n_seqs; i++)
542     {
543       APPEND_GUINT16 (compose_table->data[i]);
544     }
545 
546   if (compose_table->n_chars > 0)
547     memcpy (p, compose_table->char_data, compose_table->n_chars);
548 
549 #undef APPEND_GUINT16
550 
551   return contents;
552 }
553 
554 static int
gtk_compose_table_find(gconstpointer data1,gconstpointer data2)555 gtk_compose_table_find (gconstpointer data1,
556                         gconstpointer data2)
557 {
558   const GtkComposeTable *compose_table = (const GtkComposeTable *) data1;
559   guint32 hash = (guint32) GPOINTER_TO_INT (data2);
560   return compose_table->id != hash;
561 }
562 
563 static GtkComposeTable *
gtk_compose_table_load_cache(const char * compose_file)564 gtk_compose_table_load_cache (const char *compose_file)
565 {
566   guint32 hash;
567   char *path = NULL;
568   char *contents = NULL;
569   char *p;
570   GStatBuf original_buf;
571   GStatBuf cache_buf;
572   gsize total_length;
573   GError *error = NULL;
574   guint16 bytes;
575   guint16 version;
576   guint16 max_seq_len;
577   guint16 index_stride;
578   guint16 n_seqs;
579   guint16 n_chars;
580   guint32 i;
581   guint16 *gtk_compose_seqs = NULL;
582   GtkComposeTable *retval;
583   char *char_data = NULL;
584 
585   hash = g_str_hash (compose_file);
586   if ((path = gtk_compose_hash_get_cache_path (hash)) == NULL)
587     return NULL;
588   if (!g_file_test (path, G_FILE_TEST_EXISTS))
589     goto out_load_cache;
590 
591   g_stat (path, &cache_buf);
592   g_lstat (compose_file, &original_buf);
593   if (original_buf.st_mtime > cache_buf.st_mtime)
594     goto out_load_cache;
595   g_stat (compose_file, &original_buf);
596   if (original_buf.st_mtime > cache_buf.st_mtime)
597     goto out_load_cache;
598   if (!g_file_get_contents (path, &contents, &total_length, &error))
599     {
600       g_warning ("Failed to get cache content %s: %s", path, error->message);
601       g_error_free (error);
602       goto out_load_cache;
603     }
604 
605 #define GET_GUINT16(elt) \
606   memcpy (&bytes, p, sizeof (guint16)); \
607   elt = GUINT16_FROM_BE (bytes); \
608   p += sizeof (guint16);
609 
610   p = contents;
611   if (g_ascii_strncasecmp (p, GTK_COMPOSE_TABLE_MAGIC,
612                            strlen (GTK_COMPOSE_TABLE_MAGIC)) != 0)
613     {
614       g_warning ("The file is not a GtkComposeTable cache file %s", path);
615       goto out_load_cache;
616     }
617 
618   p += strlen (GTK_COMPOSE_TABLE_MAGIC);
619   if (p - contents > total_length)
620     {
621       g_warning ("Broken cache content %s at head", path);
622       goto out_load_cache;
623     }
624 
625   GET_GUINT16 (version);
626   if (version != GTK_COMPOSE_TABLE_VERSION)
627     {
628       g_warning ("cache version is different %u != %u",
629                  version, GTK_COMPOSE_TABLE_VERSION);
630       goto out_load_cache;
631     }
632 
633   GET_GUINT16 (max_seq_len);
634   GET_GUINT16 (n_seqs);
635   GET_GUINT16 (n_chars);
636 
637   if (max_seq_len == 0 || n_seqs == 0)
638     {
639       g_warning ("cache size is not correct %d %d", max_seq_len, n_seqs);
640       goto out_load_cache;
641     }
642 
643   index_stride = max_seq_len + 2;
644   gtk_compose_seqs = g_new0 (guint16, n_seqs * index_stride);
645 
646   for (i = 0; i < (guint32) index_stride * n_seqs; i++)
647     {
648       GET_GUINT16 (gtk_compose_seqs[i]);
649     }
650 
651   if (n_chars > 0)
652     {
653       char_data = g_new (char, n_chars + 1);
654       memcpy (char_data, p, n_chars);
655       char_data[n_chars] = '\0';
656     }
657 
658   retval = g_new0 (GtkComposeTable, 1);
659   retval->data = gtk_compose_seqs;
660   retval->max_seq_len = max_seq_len;
661   retval->n_seqs = n_seqs;
662   retval->char_data = char_data;
663   retval->n_chars = n_chars;
664   retval->id = hash;
665 
666   g_free (contents);
667   g_free (path);
668 
669   return retval;
670 
671 #undef GET_GUINT16
672 
673 out_load_cache:
674   g_free (gtk_compose_seqs);
675   g_free (char_data);
676   g_free (contents);
677   g_free (path);
678   return NULL;
679 }
680 
681 static void
gtk_compose_table_save_cache(GtkComposeTable * compose_table)682 gtk_compose_table_save_cache (GtkComposeTable *compose_table)
683 {
684   char *path = NULL;
685   char *contents = NULL;
686   GError *error = NULL;
687   gsize length = 0;
688 
689   if ((path = gtk_compose_hash_get_cache_path (compose_table->id)) == NULL)
690     return;
691 
692   contents = gtk_compose_table_serialize (compose_table, &length);
693   if (contents == NULL)
694     {
695       g_warning ("Failed to serialize compose table %s", path);
696       goto out_save_cache;
697     }
698   if (!g_file_set_contents (path, contents, length, &error))
699     {
700       g_warning ("Failed to save compose table %s: %s", path, error->message);
701       g_error_free (error);
702       goto out_save_cache;
703     }
704 
705 out_save_cache:
706   g_free (contents);
707   g_free (path);
708 }
709 
710 static GtkComposeTable *
gtk_compose_table_new_with_list(GList * compose_list,int max_compose_len,int n_index_stride,guint32 hash)711 gtk_compose_table_new_with_list (GList   *compose_list,
712                                  int      max_compose_len,
713                                  int      n_index_stride,
714                                  guint32  hash)
715 {
716   guint length;
717   guint n = 0;
718   int i, j;
719   guint16 *gtk_compose_seqs = NULL;
720   GList *list;
721   GtkComposeData *compose_data;
722   GtkComposeTable *retval = NULL;
723   gunichar codepoint;
724   GString *char_data;
725 
726   g_return_val_if_fail (compose_list != NULL, NULL);
727 
728   length = g_list_length (compose_list);
729 
730   gtk_compose_seqs = g_new0 (guint16, length * n_index_stride);
731 
732   char_data = g_string_new ("");
733 
734   for (list = compose_list; list != NULL; list = list->next)
735     {
736       compose_data = list->data;
737       for (i = 0; i < max_compose_len; i++)
738         {
739           if (compose_data->sequence[i] == 0)
740             {
741               for (j = i; j < max_compose_len; j++)
742                 gtk_compose_seqs[n++] = 0;
743               break;
744             }
745           gtk_compose_seqs[n++] = (guint16) compose_data->sequence[i];
746         }
747 
748       if (g_utf8_strlen (compose_data->value, -1) > 1)
749         {
750           if (char_data->len > 0)
751             g_string_append_c (char_data, 0);
752 
753           codepoint = char_data->len | (1 << 31);
754 
755           g_string_append (char_data, compose_data->value);
756         }
757       else
758         {
759           codepoint = g_utf8_get_char (compose_data->value);
760           g_assert ((codepoint & (1 << 31)) == 0);
761         }
762 
763       gtk_compose_seqs[n++] = (codepoint & 0xffff0000) >> 16;
764       gtk_compose_seqs[n++] = codepoint & 0xffff;
765     }
766 
767   retval = g_new0 (GtkComposeTable, 1);
768   retval->data = gtk_compose_seqs;
769   retval->max_seq_len = max_compose_len;
770   retval->n_seqs = length;
771   retval->id = hash;
772   retval->n_chars = char_data->len;
773   retval->char_data = g_string_free (char_data, FALSE);
774 
775   return retval;
776 }
777 
778 GtkComposeTable *
gtk_compose_table_new_with_file(const char * compose_file)779 gtk_compose_table_new_with_file (const char *compose_file)
780 {
781   GList *compose_list = NULL;
782   GtkComposeTable *compose_table;
783   int max_compose_len = 0;
784   int n_index_stride = 0;
785 
786   g_assert (compose_file != NULL);
787 
788   compose_list = gtk_compose_list_parse_file (compose_file);
789   if (compose_list == NULL)
790     return NULL;
791   compose_list = gtk_compose_list_check_duplicated (compose_list);
792   compose_list = gtk_compose_list_check_uint16 (compose_list);
793   compose_list = gtk_compose_list_format_for_gtk (compose_list,
794                                                   &max_compose_len,
795                                                   &n_index_stride);
796   compose_list = g_list_sort_with_data (compose_list,
797                                         (GCompareDataFunc) gtk_compose_data_compare,
798                                         GINT_TO_POINTER (max_compose_len));
799   if (compose_list == NULL)
800     {
801       g_warning ("compose file %s does not include any keys besides keys in en-us compose file", compose_file);
802       return NULL;
803     }
804 
805   compose_table = gtk_compose_table_new_with_list (compose_list,
806                                                    max_compose_len,
807                                                    n_index_stride,
808                                                    g_str_hash (compose_file));
809   g_list_free_full (compose_list, (GDestroyNotify) gtk_compose_list_element_free);
810   return compose_table;
811 }
812 
813 GSList *
gtk_compose_table_list_add_array(GSList * compose_tables,const guint16 * data,int max_seq_len,int n_seqs)814 gtk_compose_table_list_add_array (GSList        *compose_tables,
815                                   const guint16 *data,
816                                   int            max_seq_len,
817                                   int            n_seqs)
818 {
819   guint32 hash;
820   GtkComposeTable *compose_table;
821   gsize n_index_stride;
822   gsize length;
823   int i;
824   guint16 *gtk_compose_seqs = NULL;
825 
826   g_return_val_if_fail (data != NULL, compose_tables);
827   g_return_val_if_fail (max_seq_len >= 0, compose_tables);
828   g_return_val_if_fail (n_seqs >= 0, compose_tables);
829 
830   n_index_stride = max_seq_len + 2;
831   if (!g_size_checked_mul (&length, n_index_stride, n_seqs))
832     {
833       g_critical ("Overflow in the compose sequences");
834       return compose_tables;
835     }
836 
837   hash = gtk_compose_table_data_hash (data, length);
838 
839   if (g_slist_find_custom (compose_tables, GINT_TO_POINTER (hash), gtk_compose_table_find) != NULL)
840     return compose_tables;
841 
842   gtk_compose_seqs = g_new0 (guint16, length);
843   for (i = 0; i < length; i++)
844     gtk_compose_seqs[i] = data[i];
845 
846   compose_table = g_new (GtkComposeTable, 1);
847   compose_table->data = gtk_compose_seqs;
848   compose_table->max_seq_len = max_seq_len;
849   compose_table->n_seqs = n_seqs;
850   compose_table->id = hash;
851   compose_table->char_data = NULL;
852   compose_table->n_chars = 0;
853 
854   return g_slist_prepend (compose_tables, compose_table);
855 }
856 
857 GSList *
gtk_compose_table_list_add_file(GSList * compose_tables,const char * compose_file)858 gtk_compose_table_list_add_file (GSList     *compose_tables,
859                                  const char *compose_file)
860 {
861   guint32 hash;
862   GtkComposeTable *compose_table;
863 
864   g_return_val_if_fail (compose_file != NULL, compose_tables);
865 
866   hash = g_str_hash (compose_file);
867   if (g_slist_find_custom (compose_tables, GINT_TO_POINTER (hash), gtk_compose_table_find) != NULL)
868     return compose_tables;
869 
870   compose_table = gtk_compose_table_load_cache (compose_file);
871   if (compose_table != NULL)
872     return g_slist_prepend (compose_tables, compose_table);
873 
874   if ((compose_table = gtk_compose_table_new_with_file (compose_file)) == NULL)
875     return compose_tables;
876 
877   gtk_compose_table_save_cache (compose_table);
878   return g_slist_prepend (compose_tables, compose_table);
879 }
880 
881 static int
compare_seq(const void * key,const void * value)882 compare_seq (const void *key, const void *value)
883 {
884   int i = 0;
885   const guint16 *keysyms = key;
886   const guint16 *seq = value;
887 
888   while (keysyms[i])
889     {
890       if (keysyms[i] < seq[i])
891         return -1;
892       else if (keysyms[i] > seq[i])
893         return 1;
894 
895       i++;
896     }
897 
898   return 0;
899 }
900 
901 /*
902  * gtk_compose_table_check:
903  * @table: the table to check
904  * @compose_buffer: the key vals to match
905  * @n_compose: number of non-zero key vals in @compose_buffer
906  * @compose_finish: (out): return location for whether there may be longer matches
907  * @compose_match: (out): return location for whether there is a match
908  * @output: (out) (caller-allocates): return location for the match values
909  *
910  * Looks for matches for a key sequence in @table.
911  *
912  * Returns: %TRUE if there were any matches, %FALSE otherwise
913  */
914 gboolean
gtk_compose_table_check(const GtkComposeTable * table,const guint16 * compose_buffer,int n_compose,gboolean * compose_finish,gboolean * compose_match,GString * output)915 gtk_compose_table_check (const GtkComposeTable *table,
916                          const guint16         *compose_buffer,
917                          int                    n_compose,
918                          gboolean              *compose_finish,
919                          gboolean              *compose_match,
920                          GString               *output)
921 {
922   int row_stride = table->max_seq_len + 2;
923   guint16 *seq;
924 
925   *compose_finish = FALSE;
926   *compose_match = FALSE;
927 
928   g_string_set_size (output, 0);
929 
930   /* Will never match, if the sequence in the compose buffer is longer
931    * than the sequences in the table.  Further, compare_seq (key, val)
932    * will overrun val if key is longer than val.
933    */
934   if (n_compose > table->max_seq_len)
935     return FALSE;
936 
937   seq = bsearch (compose_buffer,
938                  table->data, table->n_seqs,
939                  sizeof (guint16) * row_stride,
940                  compare_seq);
941 
942   if (seq)
943     {
944       guint16 *prev_seq;
945 
946       /* Back up to the first sequence that matches to make sure
947        * we find the exact match if there is one.
948        */
949       while (seq > table->data)
950         {
951           prev_seq = seq - row_stride;
952           if (compare_seq (compose_buffer, prev_seq) != 0)
953             break;
954           seq = prev_seq;
955         }
956 
957       if (n_compose == table->max_seq_len ||
958           seq[n_compose] == 0) /* complete sequence */
959         {
960           guint16 *next_seq;
961           gunichar value;
962 
963           value = (seq[table->max_seq_len] << 16) | seq[table->max_seq_len + 1];
964           if ((value & (1 << 31)) != 0)
965             g_string_append (output, &table->char_data[value & ~(1 << 31)]);
966           else
967             g_string_append_unichar (output, value);
968 
969           *compose_match = TRUE;
970 
971           /* We found a tentative match. See if there are any longer
972            * sequences containing this subsequence
973            */
974           next_seq = seq + row_stride;
975           if (next_seq < table->data + row_stride * table->n_seqs)
976             {
977               if (compare_seq (compose_buffer, next_seq) == 0)
978                 return TRUE;
979             }
980 
981           *compose_finish = TRUE;
982           return TRUE;
983         }
984 
985       return TRUE;
986     }
987 
988   return FALSE;
989 }
990 
991 static int
compare_seq_index(const void * key,const void * value)992 compare_seq_index (const void *key, const void *value)
993 {
994   const guint16 *keysyms = key;
995   const guint16 *seq = value;
996 
997   if (keysyms[0] < seq[0])
998     return -1;
999   else if (keysyms[0] > seq[0])
1000     return 1;
1001 
1002   return 0;
1003 }
1004 
1005 gboolean
gtk_compose_table_compact_check(const GtkComposeTableCompact * table,const guint16 * compose_buffer,int n_compose,gboolean * compose_finish,gboolean * compose_match,gunichar * output_char)1006 gtk_compose_table_compact_check (const GtkComposeTableCompact  *table,
1007                                  const guint16                 *compose_buffer,
1008                                  int                            n_compose,
1009                                  gboolean                      *compose_finish,
1010                                  gboolean                      *compose_match,
1011                                  gunichar                      *output_char)
1012 {
1013   int row_stride;
1014   guint16 *seq_index;
1015   guint16 *seq;
1016   int i;
1017   gboolean match;
1018   gunichar value;
1019 
1020   if (compose_finish)
1021     *compose_finish = FALSE;
1022   if (compose_match)
1023     *compose_match = FALSE;
1024   if (output_char)
1025     *output_char = 0;
1026 
1027   /* Will never match, if the sequence in the compose buffer is longer
1028    * than the sequences in the table.  Further, compare_seq (key, val)
1029    * will overrun val if key is longer than val.
1030    */
1031   if (n_compose > table->max_seq_len)
1032     return FALSE;
1033 
1034   seq_index = bsearch (compose_buffer,
1035                        table->data,
1036                        table->n_index_size,
1037                        sizeof (guint16) * table->n_index_stride,
1038                        compare_seq_index);
1039 
1040   if (!seq_index)
1041     return FALSE;
1042 
1043   if (n_compose == 1)
1044     return TRUE;
1045 
1046   seq = NULL;
1047   match = FALSE;
1048   value = 0;
1049 
1050   for (i = n_compose - 1; i < table->max_seq_len; i++)
1051     {
1052       row_stride = i + 1;
1053 
1054       if (seq_index[i + 1] - seq_index[i] > 0)
1055         {
1056           seq = bsearch (compose_buffer + 1,
1057                          table->data + seq_index[i],
1058                          (seq_index[i + 1] - seq_index[i]) / row_stride,
1059                          sizeof (guint16) *  row_stride,
1060                          compare_seq);
1061 
1062           if (seq)
1063             {
1064               if (i == n_compose - 1)
1065                 {
1066                   value = seq[row_stride - 1];
1067                   match = TRUE;
1068                 }
1069               else
1070                 {
1071                   if (output_char)
1072                     *output_char = value;
1073                   if (match)
1074                     {
1075                       if (compose_match)
1076                         *compose_match = TRUE;
1077                     }
1078 
1079                   return TRUE;
1080                 }
1081             }
1082         }
1083     }
1084 
1085   if (match)
1086     {
1087       if (compose_match)
1088         *compose_match = TRUE;
1089       if (compose_finish)
1090         *compose_finish = TRUE;
1091       if (output_char)
1092         *output_char = value;
1093 
1094       return TRUE;
1095     }
1096 
1097   return FALSE;
1098 }
1099 
1100 /* Checks if a keysym is a dead key.
1101  * Dead key keysym values are defined in ../gdk/gdkkeysyms.h and the
1102  * first is GDK_KEY_dead_grave. As X.Org is updated, more dead keys
1103  * are added and we need to update the upper limit.
1104  */
1105 #define IS_DEAD_KEY(k) \
1106     ((k) >= GDK_KEY_dead_grave && (k) <= GDK_KEY_dead_greek)
1107 
1108 /* This function receives a sequence of Unicode characters and tries to
1109  * normalize it (NFC). We check for the case where the resulting string
1110  * has length 1 (single character).
1111  * NFC normalisation normally rearranges diacritic marks, unless these
1112  * belong to the same Canonical Combining Class.
1113  * If they belong to the same canonical combining class, we produce all
1114  * permutations of the diacritic marks, then attempt to normalize.
1115  */
1116 static gboolean
check_normalize_nfc(gunichar * combination_buffer,int n_compose)1117 check_normalize_nfc (gunichar *combination_buffer,
1118                      int       n_compose)
1119 {
1120   gunichar *combination_buffer_temp;
1121   char *combination_utf8_temp = NULL;
1122   char *nfc_temp = NULL;
1123   int n_combinations;
1124   gunichar temp_swap;
1125   int i;
1126 
1127   combination_buffer_temp = g_alloca (n_compose * sizeof (gunichar));
1128 
1129   n_combinations = 1;
1130 
1131   for (i = 1; i < n_compose; i++)
1132      n_combinations *= i;
1133 
1134   /* Xorg reuses dead_tilde for the perispomeni diacritic mark.
1135    * We check if base character belongs to Greek Unicode block,
1136    * and if so, we replace tilde with perispomeni.
1137    */
1138   if (combination_buffer[0] >= 0x390 && combination_buffer[0] <= 0x3FF)
1139     {
1140       for (i = 1; i < n_compose; i++ )
1141         if (combination_buffer[i] == 0x303)
1142           combination_buffer[i] = 0x342;
1143     }
1144 
1145   memcpy (combination_buffer_temp, combination_buffer, n_compose * sizeof (gunichar) );
1146 
1147   for (i = 0; i < n_combinations; i++)
1148     {
1149       g_unicode_canonical_ordering (combination_buffer_temp, n_compose);
1150       combination_utf8_temp = g_ucs4_to_utf8 (combination_buffer_temp, n_compose, NULL, NULL, NULL);
1151       nfc_temp = g_utf8_normalize (combination_utf8_temp, -1, G_NORMALIZE_NFC);
1152 
1153       if (g_utf8_strlen (nfc_temp, -1) == 1)
1154         {
1155           memcpy (combination_buffer, combination_buffer_temp, n_compose * sizeof (gunichar) );
1156 
1157           g_free (combination_utf8_temp);
1158           g_free (nfc_temp);
1159 
1160           return TRUE;
1161         }
1162 
1163       g_free (combination_utf8_temp);
1164       g_free (nfc_temp);
1165 
1166       if (n_compose > 2)
1167         {
1168           temp_swap = combination_buffer_temp[i % (n_compose - 1) + 1];
1169           combination_buffer_temp[i % (n_compose - 1) + 1] = combination_buffer_temp[(i+1) % (n_compose - 1) + 1];
1170           combination_buffer_temp[(i+1) % (n_compose - 1) + 1] = temp_swap;
1171         }
1172       else
1173         break;
1174     }
1175 
1176   return FALSE;
1177 }
1178 
1179 gboolean
gtk_check_algorithmically(const guint16 * compose_buffer,int n_compose,gunichar * output_char)1180 gtk_check_algorithmically (const guint16 *compose_buffer,
1181                            int            n_compose,
1182                            gunichar      *output_char)
1183 
1184 {
1185   int i;
1186   gunichar *combination_buffer;
1187   char *combination_utf8, *nfc;
1188 
1189   combination_buffer = alloca (sizeof (gunichar) * (n_compose + 1));
1190 
1191   if (output_char)
1192     *output_char = 0;
1193 
1194   for (i = 0; i < n_compose && IS_DEAD_KEY (compose_buffer[i]); i++)
1195     ;
1196 
1197   /* Allow at most 2 dead keys */
1198   if (i > 2)
1199     return FALSE;
1200 
1201   /* Can't combine if there's no base character */
1202   if (i == n_compose)
1203     return TRUE;
1204 
1205   if (i > 0 && i == n_compose - 1)
1206     {
1207       combination_buffer[0] = gdk_keyval_to_unicode (compose_buffer[i]);
1208       combination_buffer[n_compose] = 0;
1209       i--;
1210       while (i >= 0)
1211         {
1212           switch (compose_buffer[i])
1213             {
1214 #define CASE(keysym, unicode) \
1215             case GDK_KEY_dead_##keysym: combination_buffer[i+1] = unicode; break
1216 
1217             CASE (grave, 0x0300);
1218             CASE (acute, 0x0301);
1219             CASE (circumflex, 0x0302);
1220             CASE (tilde, 0x0303);       /* Also used with perispomeni, 0x342. */
1221             CASE (macron, 0x0304);
1222             CASE (breve, 0x0306);
1223             CASE (abovedot, 0x0307);
1224             CASE (diaeresis, 0x0308);
1225             CASE (abovering, 0x30A);
1226             CASE (hook, 0x0309);
1227             CASE (doubleacute, 0x030B);
1228             CASE (caron, 0x030C);
1229             CASE (cedilla, 0x0327);
1230             CASE (ogonek, 0x0328);      /* Legacy use for dasia, 0x314.*/
1231             CASE (iota, 0x0345);
1232             CASE (voiced_sound, 0x3099);        /* Per Markus Kuhn keysyms.txt file. */
1233             CASE (semivoiced_sound, 0x309A);    /* Per Markus Kuhn keysyms.txt file. */
1234             CASE (belowdot, 0x0323);
1235             CASE (horn, 0x031B);        /* Legacy use for psili, 0x313 (or 0x343). */
1236             CASE (stroke, 0x335);
1237             CASE (abovecomma, 0x0313);  /* Equivalent to psili */
1238             CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */
1239             CASE (doublegrave, 0x30F);
1240             CASE (belowring, 0x325);
1241             CASE (belowmacron, 0x331);
1242             CASE (belowcircumflex, 0x32D);
1243             CASE (belowtilde, 0x330);
1244             CASE (belowbreve, 0x32e);
1245             CASE (belowdiaeresis, 0x324);
1246             CASE (invertedbreve, 0x32f);
1247             CASE (belowcomma, 0x326);
1248             CASE (lowline, 0x332);
1249             CASE (aboveverticalline, 0x30D);
1250             CASE (belowverticalline, 0x329);
1251             CASE (longsolidusoverlay, 0x338);
1252             CASE (a, 0x363);
1253             CASE (A, 0x363);
1254             CASE (e, 0x364);
1255             CASE (E, 0x364);
1256             CASE (i, 0x365);
1257             CASE (I, 0x365);
1258             CASE (o, 0x366);
1259             CASE (O, 0x366);
1260             CASE (u, 0x367);
1261             CASE (U, 0x367);
1262             CASE (small_schwa, 0x1DEA);
1263             CASE (capital_schwa, 0x1DEA);
1264 #undef CASE
1265             default:
1266               combination_buffer[i+1] = gdk_keyval_to_unicode (compose_buffer[i]);
1267             }
1268           i--;
1269         }
1270 
1271       /* If the buffer normalizes to a single character, then modify the order
1272        * of combination_buffer accordingly, if necessary, and return TRUE.
1273        */
1274       if (check_normalize_nfc (combination_buffer, n_compose))
1275         {
1276           combination_utf8 = g_ucs4_to_utf8 (combination_buffer, -1, NULL, NULL, NULL);
1277           nfc = g_utf8_normalize (combination_utf8, -1, G_NORMALIZE_NFC);
1278 
1279           if (output_char)
1280             *output_char = g_utf8_get_char (nfc);
1281 
1282           g_free (combination_utf8);
1283           g_free (nfc);
1284 
1285           return TRUE;
1286         }
1287     }
1288 
1289   return FALSE;
1290 }
1291 
1292