1 /*************************************************************************/
2 /* Copyright (c) 2004 */
3 /* Daniel Sleator, David Temperley, and John Lafferty */
4 /* All rights reserved */
5 /* */
6 /* Use of the link grammar parsing system is subject to the terms of the */
7 /* license set forth in the LICENSE file included with this software. */
8 /* This license allows free redistribution and use in source and binary */
9 /* forms, with or without modification, subject to certain conditions. */
10 /* */
11 /*************************************************************************/
12
13 #include <stdint.h> // uintptr_t
14
15 #include "const-prime.h"
16 #include "memory-pool.h"
17 #include "string-set.h"
18 #include "utilities.h"
19
20 /**
21 * Suppose you have a program that generates strings and keeps pointers to them.
22 The program never needs to change these strings once they're generated.
23 If it generates the same string again, then it can reuse the one it
24 generated before. This is what this package supports.
25
26 String_set is the object. The functions are:
27
28 char * string_set_add(char * source_string, String_set * ss);
29 This function returns a pointer to a string with the same
30 contents as the source_string. If that string is already
31 in the table, then it uses that copy, otherwise it generates
32 and inserts a new one.
33
34 char * string_set_lookup(char * source_string, String_set * ss);
35 This function returns a pointer to a string with the same
36 contents as the source_string. If that string is not already
37 in the table, returns NULL;
38
39 String_set * string_set_create(void);
40 Create a new empty String_set.
41
42 string_set_delete(String_set *ss);
43 Free all the space associated with this string set.
44
45 The implementation uses probed hashing (i.e. not bucket).
46 */
47
48 #define STR_POOL
49 #define MEM_POOL_INIT (8*1024)
50 #define MEM_POOL_INCR (16*1024)
51 struct str_mem_pool_s
52 {
53 str_mem_pool *prev;
54 size_t size;
55 char block[0];
56 };
57
hash_string(const char * str,const String_set * ss)58 static unsigned int hash_string(const char *str, const String_set *ss)
59 {
60 unsigned int accum = 0;
61 for (;*str != '\0'; str++)
62 accum = (139 * accum) + (unsigned char)*str;
63 return accum;
64 }
65
66 #ifdef STR_POOL
ss_pool_alloc(size_t pool_size_add,String_set * ss)67 static void ss_pool_alloc(size_t pool_size_add, String_set *ss)
68 {
69 str_mem_pool *new_mem_pool = malloc(pool_size_add);
70 new_mem_pool->size = pool_size_add;
71
72 new_mem_pool->prev = ss->string_pool;
73 ss->string_pool = new_mem_pool;
74 ss->alloc_next = ss->string_pool->block;
75
76 ss->pool_free_count = pool_size_add - sizeof(str_mem_pool);
77 ASAN_POISON_MEMORY_REGION(ss->string_pool+sizeof(str_mem_pool), ss->pool_free_count);
78 }
79
ss_pool_delete(String_set * ss)80 static void ss_pool_delete(String_set *ss)
81 {
82 str_mem_pool *m, *m_prev;
83
84 for (m = ss->string_pool; m != NULL; m = m_prev)
85 {
86 ASAN_UNPOISON_MEMORY_REGION(m, m->size);
87 m_prev = m->prev;
88 free(m);
89 }
90 }
91
92 #define STR_ALIGNMENT 16
93
ss_stralloc(size_t str_size,String_set * ss)94 static char *ss_stralloc(size_t str_size, String_set *ss)
95 {
96 ss->pool_free_count -= str_size;
97 if (ss->pool_free_count < 0)
98 ss_pool_alloc((str_size&MEM_POOL_INCR) + MEM_POOL_INCR, ss);
99
100 char *str_address = ss->alloc_next;
101 ASAN_UNPOISON_MEMORY_REGION(str_address, str_size);
102 ss->alloc_next += str_size;
103 ss->alloc_next = (char *)ALIGN((uintptr_t)ss->alloc_next, STR_ALIGNMENT);
104 size_t total_size = str_size + (ss->alloc_next - str_address);
105
106 ss->pool_free_count -= total_size;
107 return str_address;
108 }
109 #else
110 #define ss_stralloc(x, ss) malloc(x)
111 #define ss_pool_alloc(a, b)
112 #endif
113
string_set_create(void)114 String_set * string_set_create(void)
115 {
116 String_set *ss;
117 ss = (String_set *) malloc(sizeof(String_set));
118 ss->prime_idx = 0;
119 ss->size = s_prime[ss->prime_idx];
120 ss->mod_func = prime_mod_func[ss->prime_idx];
121 ss->table = malloc(ss->size * sizeof(ss_slot));
122 memset(ss->table, 0, ss->size*sizeof(ss_slot));
123 ss->count = 0;
124 ss->string_pool = NULL;
125 ss_pool_alloc(MEM_POOL_INIT, ss);
126 ss->available_count = MAX_STRING_SET_TABLE_SIZE(ss->size);
127
128 return ss;
129 }
130
place_found(const char * str,const ss_slot * slot,unsigned int hash,String_set * ss)131 static bool place_found(const char *str, const ss_slot *slot, unsigned int hash,
132 String_set *ss)
133 {
134 if (slot->str == NULL) return true;
135 if (hash != slot->hash) return false;
136 return (strcmp(slot->str, str) == 0);
137 }
138
139 /**
140 * lookup the given string in the table. Return an index
141 * to the place it is, or the place where it should be.
142 */
find_place(const char * str,unsigned int h,String_set * ss)143 static unsigned int find_place(const char *str, unsigned int h, String_set *ss)
144 {
145 unsigned int coll_num = 0;
146 unsigned int key = ss->mod_func(h);
147
148 /* Quadratic probing. */
149 while (!place_found(str, &ss->table[key], h, ss))
150 {
151 key += 2 * ++coll_num - 1;
152 if (key >= ss->size) key = ss->mod_func(key);
153 }
154
155 return key;
156 }
157
grow_table(String_set * ss)158 static void grow_table(String_set *ss)
159 {
160 String_set old;
161 size_t i;
162 unsigned int p;
163
164 old = *ss;
165 ss->prime_idx++;
166 ss->size = s_prime[ss->prime_idx];
167 ss->mod_func = prime_mod_func[ss->prime_idx];
168 ss->table = malloc(ss->size * sizeof(ss_slot));
169 memset(ss->table, 0, ss->size*sizeof(ss_slot));
170 for (i=0; i<old.size; i++)
171 {
172 if (old.table[i].str != NULL)
173 {
174 p = find_place(old.table[i].str, old.table[i].hash, ss);
175 ss->table[p] = old.table[i];
176 }
177 }
178 ss->available_count = MAX_STRING_SET_TABLE_SIZE(ss->size);
179
180 /* printf("growing from %zu to %zu\n", old.size, ss->size); */
181 free(old.table);
182 }
183
string_set_add(const char * source_string,String_set * ss)184 const char * string_set_add(const char * source_string, String_set * ss)
185 {
186 assert(source_string != NULL, "STRING_SET: Can't insert a null string");
187
188 unsigned int h = hash_string(source_string, ss);
189 unsigned int p = find_place(source_string, h, ss);
190
191 if (ss->table[p].str != NULL) return ss->table[p].str;
192
193 size_t len = strlen(source_string) + 1;
194 char *str;
195
196 #ifdef DEBUG
197 /* Store the String_set structure address for debug verifications */
198 size_t mlen = (len&~(sizeof(ss)-1)) + 2*sizeof(ss);
199 str = ss_stralloc(mlen, ss);
200 *(String_set **)&str[mlen-sizeof(ss)] = ss;
201 #else /* !DEBUG */
202 str = ss_stralloc(len, ss);
203 #endif /* DEBUG */
204 memcpy(str, source_string, len);
205 ss->table[p].str = str;
206 ss->table[p].hash = h;
207 ss->count++;
208 ss->available_count--;
209
210 /* We just added it to the table. */
211 if (ss->available_count == 0) grow_table(ss); /* Too full */
212
213 return str;
214 }
215
string_set_lookup(const char * source_string,String_set * ss)216 const char * string_set_lookup(const char * source_string, String_set * ss)
217 {
218 unsigned int h = hash_string(source_string, ss);
219 unsigned int p = find_place(source_string, h, ss);
220
221 return ss->table[p].str;
222 }
223
string_set_delete(String_set * ss)224 void string_set_delete(String_set *ss)
225 {
226 if (ss == NULL) return;
227
228 #ifdef STR_POOL
229 ss_pool_delete(ss);
230 #else
231 size_t i;
232
233 for (i=0; i<ss->size; i++)
234 {
235 if (ss->table[i].str != NULL) free((void *)ss->table[i].str);
236 }
237 #endif /* STR_POOL */
238
239 free(ss->table);
240 free(ss);
241 }
242