1 /*
2 * Copyright (C) 2008 Red Hat, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Author(s):
19 * Behdad Esfahbod
20 */
21
22 #include <config.h>
23
24 #include "vteunistr.h"
25
26 #include <string.h>
27
28
29 /* Overview:
30 *
31 * The way vteunistr is implemented is very simple: Unicode only defines
32 * codepoints less than 0x110000. That leaves plenty of room in a guint32 to
33 * use for other things. So, whenever our "string" contains only one Unicode
34 * character, we use its code as our vteunistr. Otherwise, we register the
35 * string in a central registry and assign a unique number to it and use that.
36 * This number is "our own private internal non-unicode code for this
37 * sequence of characters".
38 *
39 * The rest of the problem would be how to efficiently implement this
40 * registry. It does *NOT* really have to be efficient at all, as it will
41 * only be accessed in case of combining marks. And the strings are pretty
42 * short (two or three characters). But our implementation is quite efficient
43 * anyway.
44 *
45 * The access pattern of using vteunistr's is that we have a vteunistr in a
46 * terminal cell, a new gunichar comes in and we decide to combine with it,
47 * and we combine them and get a new vteunistr. So, that is exactly how we
48 * encode vteunistr's: all we need to know about a vteunistr to be able to
49 * reconstruct its string is the vteunistr and the gunichar that joined to
50 * form it. That's what VteUnistrDecomp is. That is the decomposition.
51 *
52 * We start giving new vteunistr's unique numbers starting at
53 * %VTE_UNISTR_START+1 and going up. We keep the decompositions in a GArray,
54 * called unistr_decomp. The first entry of the array is unused (that's why
55 * we start from %VTE_UNISTR_START plus one). The decomposition table provides
56 * enough information to efficiently answer questions like "what's the first
57 * gunichar in this vteunistr?", "what's the sequence of gunichar's in this
58 * vteunistr?", and "how many gunichar's are there in this vteunistr?".
59 *
60 * We still do not have any efficient way to construct new vteunistr's though.
61 * Given a vteunistr and a gunichar, we have to walk over the entire
62 * decomposition table to see if we have already registered (encoded) this
63 * combination. To make that operation fast, we use a reverse map, that is,
64 * a GHashTable named unistr_comp. The hash table maps a decomposition to its
65 * encoded vteunistr value. The value obviously fits in a pointer and does
66 * not need memory allocation. We also want to avoid allocating memory for
67 * the key of the hash table entries, as we already have those decompositions
68 * in the memory in the unistr_decomp array. We cannot use direct pointers
69 * though as when growing, the GArray may resize and move to a new memory
70 * buffer, rendering all our pointers invalid. For this reason, we keep the
71 * index into the array as our hash table key. When we want to perform a
72 * lookup in the hash table, we insert the decomposition that we are searching
73 * for as item zero in the unistr_decomp table, then lookup for an equal entry
74 * of it in the hash table. Finally, if the hash lookup fails, we add the new
75 * decomposition to the lookup array and the hash table, and return the newly
76 * encoded vteunistr value.
77 */
78
79 #define VTE_UNISTR_START 0x80000000
80
81 static vteunistr unistr_next = VTE_UNISTR_START + 1;
82
83 struct VteUnistrDecomp {
84 vteunistr prefix;
85 gunichar suffix;
86 };
87
88 GArray *unistr_decomp;
89 GHashTable *unistr_comp;
90
91 #define DECOMP_FROM_INDEX(i) g_array_index (unistr_decomp, struct VteUnistrDecomp, (i))
92 #define DECOMP_FROM_UNISTR(s) DECOMP_FROM_INDEX ((s) - VTE_UNISTR_START)
93
94 static guint
unistr_comp_hash(gconstpointer key)95 unistr_comp_hash (gconstpointer key)
96 {
97 struct VteUnistrDecomp *decomp;
98 decomp = &DECOMP_FROM_INDEX (GPOINTER_TO_UINT (key));
99 return decomp->prefix ^ decomp->suffix;
100 }
101
102 static gboolean
unistr_comp_equal(gconstpointer a,gconstpointer b)103 unistr_comp_equal (gconstpointer a, gconstpointer b)
104 {
105 return 0 == memcmp (&DECOMP_FROM_INDEX (GPOINTER_TO_UINT (a)),
106 &DECOMP_FROM_INDEX (GPOINTER_TO_UINT (b)),
107 sizeof (struct VteUnistrDecomp));
108 }
109
110 vteunistr
_vte_unistr_append_unichar(vteunistr s,gunichar c)111 _vte_unistr_append_unichar (vteunistr s, gunichar c)
112 {
113 struct VteUnistrDecomp decomp;
114 vteunistr ret = 0;
115
116 decomp.prefix = s;
117 decomp.suffix = c;
118
119 if (G_UNLIKELY (!unistr_decomp)) {
120 unistr_decomp = g_array_new (FALSE, TRUE, sizeof (struct VteUnistrDecomp));
121 g_array_set_size (unistr_decomp, 1);
122 unistr_comp = g_hash_table_new (unistr_comp_hash, unistr_comp_equal);
123 } else {
124 DECOMP_FROM_INDEX (0) = decomp;
125 ret = GPOINTER_TO_UINT (g_hash_table_lookup (unistr_comp, GUINT_TO_POINTER (0)));
126 }
127
128 if (G_UNLIKELY (!ret)) {
129 /* sanity check to avoid OOM */
130 if (G_UNLIKELY (_vte_unistr_strlen (s) > 10 || unistr_next - VTE_UNISTR_START > 100000))
131 return s;
132
133 ret = unistr_next++;
134 g_array_append_val (unistr_decomp, decomp);
135 g_hash_table_insert (unistr_comp,
136 GUINT_TO_POINTER (ret - VTE_UNISTR_START),
137 GUINT_TO_POINTER (ret));
138 }
139
140 return ret;
141 }
142
143 gunichar
_vte_unistr_get_base(vteunistr s)144 _vte_unistr_get_base (vteunistr s)
145 {
146 g_return_val_if_fail (s < unistr_next, s);
147 while (G_UNLIKELY (s >= VTE_UNISTR_START))
148 s = DECOMP_FROM_UNISTR (s).prefix;
149 return (gunichar) s;
150 }
151
152 void
_vte_unistr_append_to_string(vteunistr s,GString * gs)153 _vte_unistr_append_to_string (vteunistr s, GString *gs)
154 {
155 g_return_if_fail (s < unistr_next);
156 if (G_UNLIKELY (s >= VTE_UNISTR_START)) {
157 struct VteUnistrDecomp *decomp;
158 decomp = &DECOMP_FROM_UNISTR (s);
159 _vte_unistr_append_to_string (decomp->prefix, gs);
160 s = decomp->suffix;
161 }
162 g_string_append_unichar (gs, (gunichar) s);
163 }
164
165 int
_vte_unistr_strlen(vteunistr s)166 _vte_unistr_strlen (vteunistr s)
167 {
168 int len = 1;
169 g_return_val_if_fail (s < unistr_next, len);
170 while (G_UNLIKELY (s >= VTE_UNISTR_START)) {
171 s = DECOMP_FROM_UNISTR (s).prefix;
172 len++;
173 }
174 return len;
175 }
176