1 /*
2 * Copyright (c) 2003
3 * David Leonard. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of David Leonard nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #if HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34
35 #if STDC_HEADERS
36 # include <stdio.h>
37 #endif
38
39 #include <see/type.h>
40 #include <see/string.h>
41 #include <see/try.h>
42 #include <see/mem.h>
43 #include <see/intern.h>
44 #include <see/error.h>
45 #include <see/interpreter.h>
46 #include <see/system.h>
47 #include <see/object.h>
48
49 #include "stringdefs.h"
50 #include "dprint.h"
51
52 /*
53 * Internalised strings.
54 *
55 * 'Interning' a string means to replace it with a unique pointer
56 * to a previously stored string.
57 * When a previously unseen string is interned, a copy is made.
58 * The result is that string comparison becomes a pointer comparison
59 * (much faster) and is primarily used for identifiers.
60 *
61 * This module uses a three-level intern strategy: the first level is the
62 * static list of library strings generated by string.defs. The second level
63 * is the application-wide "global" intern table, which applications must set
64 * up early and not change after the creation of any interpreter. The
65 * third level is the interpreter-local intern cache.
66 *
67 * This strategy allow the sharing of application static strings,
68 * while avoiding the need for mutual exclusion techniques between
69 * interpreters (since the library and application static strings are
70 * read-only).
71 */
72
73 #define HASHTABSZ 257
74 #define HASHLENMAX 8 /* prefix of string hashed on */
75
76 struct intern { /* element in the intern hash table */
77 struct intern *next;
78 struct SEE_string *string;
79 };
80 typedef struct intern *(intern_tab_t[HASHTABSZ]);
81
82 /* Prototypes */
83 static struct intern * make(struct SEE_interpreter *, struct SEE_string *);
84 static unsigned int hash(const struct SEE_string *);
85 static struct intern ** find(intern_tab_t *, struct SEE_string *,
86 unsigned int);
87 static int internalized(struct SEE_interpreter *interp,
88 const struct SEE_string *s);
89 static void global_init(void);
90
91 /** System-wide intern table */
92 static intern_tab_t global_intern_tab;
93 static int global_intern_tab_initialized;
94
95 #ifndef NDEBUG
96 static int global_intern_tab_locked = 0;
97 int SEE_debug_intern;
98 #endif
99
100 #ifndef NDEBUG
101 static int
string_only_contains_ascii(s)102 string_only_contains_ascii(s)
103 const char *s;
104 {
105 for (; *s; s++)
106 if (*s & 0x80)
107 return 0;
108 return 1;
109 }
110 #endif
111
112 /**
113 * Make an intern entry in the hash table containing the string s,
114 * and flag s as being interned.
115 */
116 static struct intern *
make(interp,s)117 make(interp, s)
118 struct SEE_interpreter *interp; /* may be NULL */
119 struct SEE_string *s;
120 {
121 struct intern *i;
122
123 i = SEE_NEW(interp, struct intern);
124 i->string = s;
125 s->flags |= SEE_STRING_FLAG_INTERNED;
126 i->next = NULL;
127 return i;
128 }
129
130 /** Compute the hash value of a UTF-16 string */
131 static unsigned int
hash(s)132 hash(s)
133 const struct SEE_string *s;
134 {
135 unsigned int j, h = 0;
136 for (j = 0; j < HASHLENMAX && j < s->length; j++)
137 h = (h << 1) ^ s->data[j];
138 return h % HASHTABSZ;
139 }
140
141 /**
142 * Compute the hash value of an ASCII string.
143 * Returns the same value as if the ASCII string had been converted to a
144 * UTF-16 string and passed to hash().
145 * Assumes the input string is indeed ASCII (bytes in 0x00..0x7f).
146 */
147 static unsigned int
hash_ascii(s,lenret)148 hash_ascii(s, lenret)
149 const char *s;
150 unsigned int *lenret;
151 {
152 unsigned int j, h = 0;
153 const char *t;
154
155 for (j = 0, t = s; j < HASHLENMAX && *t; j++, t++)
156 h = (h << 1) ^ *t;
157 while (*t) t++;
158 *lenret = t - s;
159 return h % HASHTABSZ;
160 }
161
162 /** Find an interned string */
163 static struct intern **
find(intern_tab,s,hash)164 find(intern_tab, s, hash)
165 intern_tab_t *intern_tab;
166 struct SEE_string *s;
167 unsigned int hash;
168 {
169 struct intern **x;
170
171 x = &(*intern_tab)[hash];
172 while (*x && SEE_string_cmp((*x)->string, s) != 0)
173 x = &((*x)->next);
174 return x;
175 }
176
177 /**
178 * Returns true if a SEE_string matches the ASCII string s.
179 * Assumes the string s is ASCII (0x00..0x7f).
180 */
181 static int
ascii_eq(str,s)182 ascii_eq(str, s)
183 struct SEE_string *str;
184 const char *s;
185 {
186 unsigned int len = str->length;
187 SEE_char_t *c = str->data;
188
189 while (len--)
190 if (!*s)
191 return 0;
192 else if (*c++ != *s++)
193 return 0;
194 return *s == 0;
195 }
196
197 /** Find an interned ASCII string */
198 static struct intern **
find_ascii(intern_tab,s,hash)199 find_ascii(intern_tab, s, hash)
200 intern_tab_t *intern_tab;
201 const char *s;
202 unsigned int hash;
203 {
204 struct intern **x;
205
206 x = &(*intern_tab)[hash];
207 while (*x && !ascii_eq((*x)->string, s))
208 x = &((*x)->next);
209 return x;
210 }
211
212 /** Create an interpreter-local intern table */
213 void
_SEE_intern_init(interp)214 _SEE_intern_init(interp)
215 struct SEE_interpreter *interp;
216 {
217 intern_tab_t *intern_tab;
218 unsigned int i;
219
220 global_init();
221 #ifndef NDEBUG
222 global_intern_tab_locked = 1;
223 #endif
224
225 intern_tab = SEE_NEW(interp, intern_tab_t);
226 for (i = 0; i < HASHTABSZ; i++)
227 (*intern_tab)[i] = NULL;
228
229 interp->intern_tab = intern_tab;
230 }
231
232 /* Returns true if the string is already internalized */
233 static int
internalized(interp,s)234 internalized(interp, s)
235 struct SEE_interpreter *interp;
236 const struct SEE_string *s;
237 {
238 /*
239 * A string is internalized if
240 * - is already internalized in this interpreter or the global hash
241 * - is one of the static resource strings
242 */
243 return
244 ((!s->interpreter || s->interpreter == interp) &&
245 (s->flags & SEE_STRING_FLAG_INTERNED)) ||
246 (s >= STRn(0) && s < STRn(SEE_nstringtab));
247 }
248
249 /**
250 * Intern a string relative to an interpreter. Also reads the global table
251 * Note that a different pointer to s is *ALWAYS* returned unless s was
252 * originally returned by SEE_intern. In other words, on the first occurrence
253 * of a string, it is duplicated. This makes it safe to intern strings that
254 * will be later grown. However, you MUST not alter content of strings
255 * returned from this function.
256 */
257 struct SEE_string *
SEE_intern(interp,s)258 SEE_intern(interp, s)
259 struct SEE_interpreter *interp;
260 struct SEE_string *s;
261 {
262 struct intern **x;
263 unsigned int h;
264 #ifndef NDEBUG
265 const char *where = NULL;
266 # define WHERE(f) do { if (SEE_debug_intern) where=f; } while (0)
267 #else
268 # define WHERE(f) /* nothing */
269 #endif
270
271 /* Allow interning NULL to lessen the number of error checks */
272 if (!s)
273 return NULL;
274
275 if (internalized(interp, s)) {
276 #ifndef NDEBUG
277 if (SEE_debug_intern) {
278 dprintf("INTERN ");
279 dprints(s);
280 dprintf(" -> %p [interned]\n", s);
281 }
282 #endif
283 return s;
284 }
285
286 /* If the string is from another interpreter, then it must
287 * have been intern'd already. This is to prevent race conditions
288 * with string whose content is changing. */
289 SEE_ASSERT(interp, !s->interpreter || s->interpreter == interp ||
290 (s->flags & SEE_STRING_FLAG_INTERNED));
291
292 /* Look in system-wide intern table first */
293 h = hash(s);
294 x = find(&global_intern_tab, s, h);
295 WHERE("global");
296 if (!*x) {
297 x = find(interp->intern_tab, s, h);
298 WHERE("local");
299 if (!*x) {
300 *x = make(interp, _SEE_string_dup_fix(interp, s));
301 WHERE("new");
302 }
303 }
304 #ifndef NDEBUG
305 if (SEE_debug_intern) {
306 dprintf("INTERN ");
307 dprints(s);
308 dprintf(" -> %p [%s h=%d]\n", (*x)->string, where, h);
309 }
310 #endif
311 return (*x)->string;
312
313 }
314
315 /** Efficiently converts an ASCII C string into an internalised SEE string */
316 struct SEE_string *
SEE_intern_ascii(interp,s)317 SEE_intern_ascii(interp, s)
318 struct SEE_interpreter *interp;
319 const char *s;
320 {
321 struct SEE_string *str;
322 const char *t;
323 SEE_char_t *c;
324 unsigned int h, len;
325 struct intern **x;
326 #ifndef NDEBUG
327 const char *where = NULL;
328 #endif
329
330 SEE_ASSERT(interp, s != NULL);
331 SEE_ASSERT(interp, string_only_contains_ascii(s));
332
333 h = hash_ascii(s, &len);
334 x = find_ascii(&global_intern_tab, s, h);
335 WHERE("global");
336 if (!*x) {
337 x = find_ascii(interp->intern_tab, s, h);
338 WHERE("local");
339 if (!*x) {
340 WHERE("new");
341 str = SEE_NEW(interp, struct SEE_string);
342 str->length = len;
343 str->data = SEE_NEW_STRING_ARRAY(interp, SEE_char_t, len);
344 for (c = str->data, t = s; *t;)
345 *c++ = *t++;
346 str->interpreter = interp;
347 str->stringclass = NULL;
348 str->flags = 0;
349 SEE_ASSERT(interp, hash(str) == h);
350 *x = make(interp, str);
351 }
352 }
353 #ifndef NDEBUG
354 if (SEE_debug_intern)
355 dprintf("INTERN %s -> %p [%s h=%d ascii]\n",
356 s, (*x)->string, where, h);
357 #endif
358 return (*x)->string;
359 }
360
361 /*
362 * Interns a string, and frees the original string.
363 */
364 void
SEE_intern_and_free(interp,sp)365 SEE_intern_and_free(interp, sp)
366 struct SEE_interpreter *interp;
367 struct SEE_string **sp;
368 {
369 struct SEE_string *is;
370
371 is = SEE_intern(interp, *sp);
372 SEE_ASSERT(interp, is != *sp);
373 #ifndef NDEBUG
374 if (SEE_debug_intern) {
375 dprintf("INTERN ");
376 dprints(*sp);
377 dprintf(" -> %p [hit & free]\n", is);
378 }
379 #endif
380 SEE_string_free(interp, sp);
381 *sp = is;
382 }
383
384 static void
global_init()385 global_init()
386 {
387 unsigned int i, h;
388 struct intern **x;
389
390 if (global_intern_tab_initialized)
391 return;
392
393 /* Add all the predefined strings to the global intern table */
394 for (i = 0; i < SEE_nstringtab; i++) {
395 h = hash(STRn(i));
396 x = find(&global_intern_tab, STRn(i), h);
397 if (*x == NULL)
398 *x = make(NULL, STRn(i));
399 }
400 global_intern_tab_initialized = 1;
401 }
402
403 /**
404 * Adds an ASCII string into the system-wide intern table if
405 * not already there.
406 * Should not be called after any interpeters are created.
407 */
408 struct SEE_string *
SEE_intern_global(s)409 SEE_intern_global(s)
410 const char *s;
411 {
412 struct SEE_string *str;
413 const char *t;
414 SEE_char_t *c;
415 unsigned int h, len;
416 struct intern **x;
417
418 #ifndef NDEBUG
419 if (global_intern_tab_locked)
420 SEE_ABORT(NULL, "SEE_intern_global: table is now read-only");
421 #endif
422 global_init();
423
424 h = hash_ascii(s, &len);
425 x = find_ascii(&global_intern_tab, s, h);
426 if (*x) return (*x)->string;
427
428 str = SEE_NEW(NULL, struct SEE_string);
429 str->length = len;
430 str->data = SEE_NEW_STRING_ARRAY(NULL, SEE_char_t, len);
431 for (c = str->data, t = s; *t;)
432 *c++ = *t++;
433 str->interpreter = NULL;
434 str->stringclass = NULL;
435 str->flags = 0;
436 *x = make(NULL, str);
437 return (*x)->string;
438 }
439
440 /**
441 * Raises an assertion failure if the passed string is not internalised.
442 * This function used by the SEE_OBJECT_*() macros.
443 */
444 struct SEE_string *
_SEE_intern_assert(interp,s)445 _SEE_intern_assert(interp, s)
446 struct SEE_interpreter *interp;
447 struct SEE_string *s;
448 {
449 #ifndef NDEBUG
450 if (s)
451 SEE_ASSERT(interp, internalized(interp, s));
452 #endif
453 return s;
454 }
455