1 /*************************************************************************/
2 /* Copyright (c) 2004                                                    */
3 /* Daniel Sleator, David Temperley, and John Lafferty                    */
4 /* All rights reserved                                                   */
5 /*                                                                       */
6 /* Use of the link grammar parsing system is subject to the terms of the */
7 /* license set forth in the LICENSE file included with this software.    */
8 /* This license allows free redistribution and use in source and binary  */
9 /* forms, with or without modification, subject to certain conditions.   */
10 /*                                                                       */
11 /*************************************************************************/
12 
13 #include <stdint.h>                     // uintptr_t
14 
15 #include "const-prime.h"
16 #include "memory-pool.h"
17 #include "string-set.h"
18 #include "utilities.h"
19 
20 /**
21  * Suppose you have a program that generates strings and keeps pointers to them.
22    The program never needs to change these strings once they're generated.
23    If it generates the same string again, then it can reuse the one it
24    generated before.  This is what this package supports.
25 
26    String_set is the object.  The functions are:
27 
28    char * string_set_add(char * source_string, String_set * ss);
29      This function returns a pointer to a string with the same
30      contents as the source_string.  If that string is already
31      in the table, then it uses that copy, otherwise it generates
32      and inserts a new one.
33 
34    char * string_set_lookup(char * source_string, String_set * ss);
35      This function returns a pointer to a string with the same
36      contents as the source_string.  If that string is not already
37      in the table, returns NULL;
38 
39    String_set * string_set_create(void);
40      Create a new empty String_set.
41 
42    string_set_delete(String_set *ss);
43      Free all the space associated with this string set.
44 
45    The implementation uses probed hashing (i.e. not bucket).
46  */
47 
48 #define STR_POOL
49 #define MEM_POOL_INIT (8*1024)
50 #define MEM_POOL_INCR (16*1024)
51 struct str_mem_pool_s
52 {
53 	str_mem_pool *prev;
54 	size_t size;
55 	char block[0];
56 };
57 
hash_string(const char * str,const String_set * ss)58 static unsigned int hash_string(const char *str, const String_set *ss)
59 {
60 	unsigned int accum = 0;
61 	for (;*str != '\0'; str++)
62 		accum = (139 * accum) + (unsigned char)*str;
63 	return accum;
64 }
65 
66 #ifdef STR_POOL
ss_pool_alloc(size_t pool_size_add,String_set * ss)67 static void ss_pool_alloc(size_t pool_size_add, String_set *ss)
68 {
69 	str_mem_pool *new_mem_pool = malloc(pool_size_add);
70 	new_mem_pool->size = pool_size_add;
71 
72 	new_mem_pool->prev = ss->string_pool;
73 	ss->string_pool = new_mem_pool;
74 	ss->alloc_next = ss->string_pool->block;
75 
76 	ss->pool_free_count = pool_size_add - sizeof(str_mem_pool);
77 	ASAN_POISON_MEMORY_REGION(ss->string_pool+sizeof(str_mem_pool), ss->pool_free_count);
78 }
79 
ss_pool_delete(String_set * ss)80 static void ss_pool_delete(String_set *ss)
81 {
82 	str_mem_pool *m, *m_prev;
83 
84 	for (m = ss->string_pool; m != NULL; m = m_prev)
85 	{
86 		ASAN_UNPOISON_MEMORY_REGION(m, m->size);
87 		m_prev = m->prev;
88 		free(m);
89 	}
90 }
91 
92 #define STR_ALIGNMENT 16
93 
ss_stralloc(size_t str_size,String_set * ss)94 static char *ss_stralloc(size_t str_size, String_set *ss)
95 {
96 	ss->pool_free_count -= str_size;
97 	if (ss->pool_free_count < 0)
98 		ss_pool_alloc((str_size&MEM_POOL_INCR) + MEM_POOL_INCR, ss);
99 
100 	char *str_address = ss->alloc_next;
101 	ASAN_UNPOISON_MEMORY_REGION(str_address, str_size);
102 	ss->alloc_next += str_size;
103 	ss->alloc_next = (char *)ALIGN((uintptr_t)ss->alloc_next, STR_ALIGNMENT);
104 	size_t total_size = str_size + (ss->alloc_next - str_address);
105 
106 	ss->pool_free_count -= total_size;
107 	return str_address;
108 }
109 #else
110 #define ss_stralloc(x, ss) malloc(x)
111 #define ss_pool_alloc(a, b)
112 #endif
113 
string_set_create(void)114 String_set * string_set_create(void)
115 {
116 	String_set *ss;
117 	ss = (String_set *) malloc(sizeof(String_set));
118 	ss->prime_idx = 0;
119 	ss->size = s_prime[ss->prime_idx];
120 	ss->mod_func = prime_mod_func[ss->prime_idx];
121 	ss->table = malloc(ss->size * sizeof(ss_slot));
122 	memset(ss->table, 0, ss->size*sizeof(ss_slot));
123 	ss->count = 0;
124 	ss->string_pool = NULL;
125 	ss_pool_alloc(MEM_POOL_INIT, ss);
126 	ss->available_count = MAX_STRING_SET_TABLE_SIZE(ss->size);
127 
128 	return ss;
129 }
130 
place_found(const char * str,const ss_slot * slot,unsigned int hash,String_set * ss)131 static bool place_found(const char *str, const ss_slot *slot, unsigned int hash,
132                          String_set *ss)
133 {
134 	if (slot->str == NULL) return true;
135 	if (hash != slot->hash) return false;
136 	return (strcmp(slot->str, str) == 0);
137 }
138 
139 /**
140  * lookup the given string in the table.  Return an index
141  * to the place it is, or the place where it should be.
142  */
find_place(const char * str,unsigned int h,String_set * ss)143 static unsigned int find_place(const char *str, unsigned int h, String_set *ss)
144 {
145 	unsigned int coll_num = 0;
146 	unsigned int key = ss->mod_func(h);
147 
148 	/* Quadratic probing. */
149 	while (!place_found(str, &ss->table[key], h, ss))
150 	{
151 		key += 2 * ++coll_num - 1;
152 		if (key >= ss->size) key = ss->mod_func(key);
153 	}
154 
155 	return key;
156 }
157 
grow_table(String_set * ss)158 static void grow_table(String_set *ss)
159 {
160 	String_set old;
161 	size_t i;
162 	unsigned int p;
163 
164 	old = *ss;
165 	ss->prime_idx++;
166 	ss->size = s_prime[ss->prime_idx];
167 	ss->mod_func = prime_mod_func[ss->prime_idx];
168 	ss->table = malloc(ss->size * sizeof(ss_slot));
169 	memset(ss->table, 0, ss->size*sizeof(ss_slot));
170 	for (i=0; i<old.size; i++)
171 	{
172 		if (old.table[i].str != NULL)
173 		{
174 			p = find_place(old.table[i].str, old.table[i].hash, ss);
175 			ss->table[p] = old.table[i];
176 		}
177 	}
178 	ss->available_count = MAX_STRING_SET_TABLE_SIZE(ss->size);
179 
180 	/* printf("growing from %zu to %zu\n", old.size, ss->size); */
181 	free(old.table);
182 }
183 
string_set_add(const char * source_string,String_set * ss)184 const char * string_set_add(const char * source_string, String_set * ss)
185 {
186 	assert(source_string != NULL, "STRING_SET: Can't insert a null string");
187 
188 	unsigned int h = hash_string(source_string, ss);
189 	unsigned int p = find_place(source_string, h, ss);
190 
191 	if (ss->table[p].str != NULL) return ss->table[p].str;
192 
193 	size_t len = strlen(source_string) + 1;
194 	char *str;
195 
196 #ifdef DEBUG
197 	/* Store the String_set structure address for debug verifications */
198 	size_t mlen = (len&~(sizeof(ss)-1)) + 2*sizeof(ss);
199 	str = ss_stralloc(mlen, ss);
200 	*(String_set **)&str[mlen-sizeof(ss)] = ss;
201 #else /* !DEBUG */
202 	str = ss_stralloc(len, ss);
203 #endif /* DEBUG */
204 	memcpy(str, source_string, len);
205 	ss->table[p].str = str;
206 	ss->table[p].hash = h;
207 	ss->count++;
208 	ss->available_count--;
209 
210 	/* We just added it to the table. */
211 	if (ss->available_count == 0) grow_table(ss); /* Too full */
212 
213 	return str;
214 }
215 
string_set_lookup(const char * source_string,String_set * ss)216 const char * string_set_lookup(const char * source_string, String_set * ss)
217 {
218 	unsigned int h = hash_string(source_string, ss);
219 	unsigned int p = find_place(source_string, h, ss);
220 
221 	return ss->table[p].str;
222 }
223 
string_set_delete(String_set * ss)224 void string_set_delete(String_set *ss)
225 {
226 	if (ss == NULL) return;
227 
228 #ifdef STR_POOL
229 	ss_pool_delete(ss);
230 #else
231 	size_t i;
232 
233 	for (i=0; i<ss->size; i++)
234 	{
235 		if (ss->table[i].str != NULL) free((void *)ss->table[i].str);
236 	}
237 #endif /* STR_POOL */
238 
239 	free(ss->table);
240 	free(ss);
241 }
242