1
2 #include "OVLexicon.h"
3 #include "OVOneToOne.h"
4 #include "OVHeapArray.h"
5 #include "OVreturns.h"
6
7
8 /* should only be accessed by special methods */
9
10 typedef struct {
11 ov_word offset;
12 ov_word next; /* NOTE: 1-based, 0 is the sentinel */
13 ov_word ref_cnt;
14 ov_uword hash;
15 ov_size size;
16 } lex_entry;
17
18 struct _OVLexicon {
19 OVHeap *heap;
20 OVOneToOne *up; /* maps hash_key to index of first zero-based entry */
21 lex_entry *entry; /* maintained as a 1-based index */
22 ov_uword n_entry, n_active;
23 ov_char8 *data;
24 ov_uword data_size;
25 ov_uword data_unused;
26 ov_word free_index; /* NOTE: 1-based, 0 is the sentinel */
27 };
28
OVLexicon_GetNActive(OVLexicon * uk)29 ov_uword OVLexicon_GetNActive(OVLexicon * uk)
30 {
31 return uk->n_active;
32 }
33
OVLexicon_New(OVHeap * heap)34 OVLexicon *OVLexicon_New(OVHeap * heap)
35 {
36 OVLexicon *I = NULL;
37 if(heap) {
38 I = OVHeap_ALLOC(heap, OVLexicon);
39 if(I) {
40 I->heap = heap;
41 I->up = OVOneToOne_New(heap);
42 if(!(I->up)) {
43 OVLexicon_DEL_AUTO_NULL(I);
44 }
45 }
46 }
47 return I;
48 }
49
OVLexicon_Del(OVLexicon * I)50 void OVLexicon_Del(OVLexicon * I)
51 {
52 if(I) {
53 OVOneToOne_DEL_AUTO_NULL(I->up);
54 if(I->entry) {
55 I->entry++; /* allow for 1-based indexing */
56 OVHeapArray_FREE_AUTO_NULL(I->entry);
57 }
58 OVHeapArray_FREE_AUTO_NULL(I->data);
59 OVHeap_Free(I->heap, I);
60 }
61 }
62
OVLexicon_CheckStorage(OVLexicon * uk,ov_word entry_size,ov_size data_size)63 static OVstatus OVLexicon_CheckStorage(OVLexicon * uk, ov_word entry_size,
64 ov_size data_size)
65 {
66 if(!uk->entry) { /* auto zero to initialize ref_cnt fields */
67 uk->entry = OVHeapArray_CALLOC(uk->heap, lex_entry, entry_size);
68 if(!uk->entry) {
69 return_OVstatus_OUT_OF_MEMORY;
70 }
71 uk->entry--; /* allow for 1-based indexing */
72
73 } else {
74 uk->entry++; /* allow for 1-based indexing */
75 if(!OVHeapArray_CHECK(uk->entry, lex_entry, (ov_size) (entry_size - 1))) {
76 return_OVstatus_OUT_OF_MEMORY;
77 }
78 uk->entry--; /* allow for 1-based indexing */
79 }
80
81 if(!uk->data) {
82 uk->data = OVHeapArray_MALLOC(uk->heap, ov_char8, data_size);
83 if(!uk->data) {
84 return_OVstatus_OUT_OF_MEMORY;
85 }
86 } else if(!OVHeapArray_CHECK(uk->data, ov_char8, data_size - 1)) {
87 return_OVstatus_OUT_OF_MEMORY;
88 }
89 return_OVstatus_SUCCESS;
90 }
91
92 /*============================================================================
93 * _GetCStringHash -- returns a djb2 string hash key of the input
94 * PARAMS
95 * string str
96 * RETURNS
97 * string hash key
98 */
_GetCStringHash(ov_uchar8 * str)99 static ov_word _GetCStringHash(ov_uchar8 * str)
100 {
101 const ov_uchar8 *p = str;
102 ov_word x;
103 ov_size len = 0;
104 ov_uchar8 c;
105
106 x = *p << 7;
107 while((c = *(p++))) {
108 x = (x << 5) + x + c; /* djb2 (G5 = 2.8, P3 = 18.7, P4=2.9) FASTEST OVERALL */
109 len++;
110 }
111 x ^= len;
112 return x;
113 }
114
OVLexicon_Pack(OVLexicon * uk)115 OVstatus OVLexicon_Pack(OVLexicon * uk)
116 {
117
118 if(uk->entry && uk->data && uk->n_entry && uk->data_unused) {
119 ov_size new_count = 0;
120 ov_size new_size = 0;
121
122 { /* compute storage requirements */
123 ov_size a;
124 ov_size n_entry = uk->n_entry;
125 lex_entry *cur_entry = uk->entry + 1; /* NOTE: 1-based array */
126 for(a = 0; a < n_entry; a++) {
127 if(cur_entry->ref_cnt > 0) {
128 new_size += cur_entry->size;
129 new_count++;
130 }
131 cur_entry++;
132 }
133 }
134 if((!new_count) && (!new_size)) { /* if lexicon is completely empty, then purge both */
135 uk->entry++; /* alloc for 1-based indexing */
136 OVHeapArray_FREE_AUTO_NULL(uk->entry);
137 OVHeapArray_FREE_AUTO_NULL(uk->data);
138 OVOneToOne_Reset(uk->up);
139 uk->n_entry = 0;
140 uk->n_active = 0;
141 uk->data_unused = 0;
142 uk->data_size = 0;
143 uk->free_index = 0;
144 } else { /* otherwise, pack the string fields, and track the free entries */
145 OVstatus status;
146 ov_char8 *old_data = uk->data;
147 uk->data = NULL;
148
149 if(OVreturn_IS_ERROR(status = OVLexicon_CheckStorage(uk, uk->n_entry, new_size))) {
150 uk->data = old_data;
151 return status;
152 }
153
154 { /* copy data */
155 ov_word a;
156 ov_size n_entry = uk->n_entry, new_size = 0;
157 lex_entry *cur_entry = uk->entry + 1; /* NOTE: 1-based array */
158 ov_char8 *data = old_data;
159 ov_char8 *new_data = uk->data;
160 ov_word free_index = 0;
161 ov_size entry_size;
162
163 for(a = 1; a <= (ov_word) n_entry; a++) {
164 if(cur_entry->ref_cnt > 0) {
165 entry_size = cur_entry->size;
166 memcpy(new_data, data + cur_entry->offset, entry_size);
167 cur_entry->offset = new_size;
168 new_size += entry_size;
169 new_data += entry_size;
170 } else {
171 /* remember for later */
172 cur_entry->next = free_index;
173 cur_entry->ref_cnt = 0;
174 free_index = a;
175 }
176 cur_entry++;
177 }
178
179 OVHeapArray_FREE(old_data);
180 uk->data_unused = 0;
181 uk->data_size = new_size;
182 uk->free_index = free_index;
183
184 }
185 }
186 }
187 return_OVstatus_SUCCESS;
188 }
189
OVLexicon_DecRef(OVLexicon * uk,ov_word id)190 OVstatus OVLexicon_DecRef(OVLexicon * uk, ov_word id)
191 {
192 if((!uk->entry) || (id < 1) || (id > (ov_word) uk->n_entry)) { /* range checking */
193 if(id)
194 printf("OVLexicon_DecRef-Warning: key %zd not found, this might be a bug\n", id);
195 return_OVstatus_NOT_FOUND;
196 } else {
197 lex_entry *cur_entry = uk->entry + id;
198 ov_word ref_cnt = (--cur_entry->ref_cnt);
199 if(ref_cnt < 0) {
200 printf("OVLexicon_DecRef-Warning: key %zd with ref_cnt %zd, this might be a bug\n", id, ref_cnt);
201 return_OVstatus_INVALID_REF_CNT;
202 } else if(!ref_cnt) {
203 OVreturn_word result = OVOneToOne_GetForward(uk->up, cur_entry->hash);
204 if(OVreturn_IS_OK(result)) {
205 ov_word index = result.word;
206 if(index != id) {
207 lex_entry *entry = uk->entry;
208 ov_word next;
209 while(index && (next = entry[index].next) != id)
210 index = next;
211 if(index)
212 entry[index].next = entry[id].next; /* excise from list */
213 } else {
214 /* remove entry from OneToOne */
215 OVOneToOne_DelReverse(uk->up, id);
216 /* if non-terminal, then add next entry to OneToOne */
217 if(cur_entry->next) {
218 OVOneToOne_Set(uk->up, cur_entry->hash, cur_entry->next); /* NOTE: no error checking performed! */
219 }
220 }
221 }
222 uk->data_unused += cur_entry->size;
223 uk->n_active--;
224 if(uk->data_unused >= (uk->data_size >> 1)) /* if 50% unutilized, then pack */
225 OVLexicon_Pack(uk);
226 }
227 return_OVstatus_SUCCESS;
228 }
229 }
230
OVLexicon_IncRef(OVLexicon * uk,ov_word id)231 OVstatus OVLexicon_IncRef(OVLexicon * uk, ov_word id)
232 {
233 if((!uk->entry) || (id < 1) || (id > (ov_word) uk->n_entry)) { /* range checking */
234 return_OVstatus_NOT_FOUND;
235 } else {
236 lex_entry *entry = uk->entry + id;
237 ov_word ref_cnt = (++entry->ref_cnt);
238 if(ref_cnt < 2) { /* was reference count zero or less? */
239 /* safety precauations */
240 entry->ref_cnt = 0;
241 entry->size = 0;
242 entry->offset = 0;
243 return_OVstatus_INVALID_REF_CNT;
244 } else {
245 return_OVstatus_SUCCESS;
246 }
247 }
248 }
249
OVLexicon_GetFromCString(OVLexicon * uk,const ov_char8 * str)250 OVreturn_word OVLexicon_GetFromCString(OVLexicon * uk, const ov_char8 * str)
251 {
252 ov_word hash = _GetCStringHash((ov_uchar8 *) str);
253 OVreturn_word search = OVOneToOne_GetForward(uk->up, hash);
254 ov_word index = 0;
255 ov_word cur_index = 0;
256 ov_char8 *c;
257
258 if(OVreturn_IS_OK(search)) {
259 /* the hash key has been registered, so follow the chain
260 * looking for a match... */
261 ov_char8 *data = uk->data;
262 lex_entry *entry = uk->entry, *entry_ptr;
263 cur_index = (index = search.word);
264 while(index) { /* found */
265 c = data + (entry_ptr = (entry + index))->offset;
266 if(strcmp(c, str) != 0) { /* verify match */
267 index = entry_ptr->next; /* not a match? keep looking */
268 } else {
269 break;
270 }
271 }
272 }
273 if(!index) {
274 /* no match was found (or hash is new) so add this string to the lexicon */
275 ov_size st_size = strlen(str) + 1;
276 lex_entry *entry, *entry_ptr, *cur_entry_ptr;
277 OVstatus status;
278 {
279 ov_size new_size = uk->data_size + st_size;
280 ov_size new_n_entry = uk->n_entry;
281 if(!uk->free_index)
282 new_n_entry++;
283 /* allocate storage as necessary */
284 if(OVreturn_IS_ERROR(status = OVLexicon_CheckStorage(uk, new_n_entry, new_size))) {
285 OVreturn_word result;
286 result.status = status.status;
287 result.word = 0;
288 return result;
289 }
290 }
291 /* WARNING: uk->entry, uk->data_size, and uk->data may have changed! */
292
293 if(!uk->free_index)
294 index = (++uk->n_entry); /* NOTE: 1-based indices */
295 else {
296 index = uk->free_index;
297 uk->free_index = uk->entry[index].next;
298 }
299 uk->n_active++;
300
301 if(!cur_index) {
302 /* if the hash key was new, add it to the OneToOne, and setup entry */
303 if(OVreturn_IS_ERROR(status = OVOneToOne_Set(uk->up, hash, index))) {
304 OVreturn_word result;
305 uk->entry[index].next = uk->free_index; /* record this as a free entry */
306 uk->free_index = index;
307 uk->n_active--;
308 result.status = status.status;
309 result.word = 0;
310 return result;
311 }
312 entry_ptr = uk->entry + index;
313 entry_ptr->next = 0;
314
315 } else {
316 /* otherwise, simply add this entry on to the list in position 2 */
317
318 cur_entry_ptr = (entry = uk->entry) + cur_index;
319 entry_ptr = entry + index;
320 entry_ptr->next = cur_entry_ptr->next;
321 cur_entry_ptr->next = index;
322 }
323 /* increase the counts and sizes to accomodate this new entry & copy info */
324 entry_ptr->hash = hash;
325 entry_ptr->size = st_size;
326 entry_ptr->offset = uk->data_size;
327 entry_ptr->ref_cnt++; /* increase ref_cnt */
328 strcpy(uk->data + uk->data_size, str);
329 uk->data_size += st_size;
330 } else {
331 lex_entry *entry_ptr;
332 entry_ptr = uk->entry + index;
333 entry_ptr->ref_cnt++; /* increase ref_cnt */
334 }
335 {
336 OVreturn_word result = { OVstatus_SUCCESS };
337 result.word = index;
338 return result;
339 }
340 }
341
OVLexicon_BorrowFromCString(OVLexicon * uk,const ov_char8 * str)342 OVreturn_word OVLexicon_BorrowFromCString(OVLexicon * uk, const ov_char8 * str)
343 {
344 /* get hash token for this word */
345 ov_word hash = _GetCStringHash((ov_uchar8 *) str);
346 OVreturn_word search = OVOneToOne_GetForward(uk->up, hash);
347 ov_word index = 0;
348 ov_char8 *c;
349
350 /* error is something like OVstatus_NULL_PTR, OVstatus_NOT_FOUND; see ov/src/OVreturns.h */
351 if(OVreturn_IS_ERROR(search)) {
352 return search;
353 } else {
354 /* the hash key has been registered, so follow the chain
355 * looking for a match... */
356 ov_char8 *data = uk->data;
357 lex_entry *entry = uk->entry, *entry_ptr;
358 index = search.word;
359 while(index) { /* found */
360 c = data + (entry_ptr = (entry + index))->offset;
361 if(strcmp(c, str) != 0) { /* verify match */
362 index = entry_ptr->next; /* not a match? keep looking */
363 } else {
364 break;
365 }
366 }
367 }
368
369 if(!index) {
370 OVreturn_word result = { OVstatus_NOT_FOUND };
371 result.word = index;
372 return result;
373 } else {
374 OVreturn_word result = { OVstatus_SUCCESS };
375 result.word = index;
376 return result;
377 }
378 }
379
OVLexicon_FetchCString(OVLexicon * uk,ov_word id)380 ov_char8 *OVLexicon_FetchCString(OVLexicon * uk, ov_word id)
381 {
382 if(id <= (ov_word) uk->n_entry)
383 return uk->data + uk->entry[id].offset;
384 return NULL;
385 }
386