1 //
2 // Copyright (C) 2011-2017 Nick Gasson
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 //
17
18 #include "util.h"
19 #include "fbuf.h"
20 #include "ident.h"
21
22 #include <assert.h>
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdint.h>
27 #include <ctype.h>
28
29 #define MAP_DEPTH 3
30
31 typedef struct clist clist_t;
32 typedef struct trie trie_t;
33
34 struct clist {
35 char value;
36 trie_t *down;
37 clist_t *left;
38 clist_t *right;
39 };
40
41 struct trie {
42 char value;
43 uint16_t write_gen;
44 uint16_t depth;
45 uint32_t write_index;
46 trie_t *up;
47 clist_t *list;
48 trie_t *map[0];
49 };
50
51 struct ident_rd_ctx {
52 fbuf_t *file;
53 size_t cache_sz;
54 size_t cache_alloc;
55 ident_t *cache;
56 };
57
58 struct ident_wr_ctx {
59 fbuf_t *file;
60 uint32_t next_index;
61 uint16_t generation;
62 };
63
64 typedef struct {
65 trie_t trie;
66 trie_t *map[256];
67 } root_t;
68
69 static root_t root = {
70 {
71 .value = '\0',
72 .write_gen = 0,
73 .write_index = 0,
74 .depth = 1,
75 .up = NULL
76 }
77 };
78
alloc_node(char ch,trie_t * prev)79 static trie_t *alloc_node(char ch, trie_t *prev)
80 {
81 const size_t mapsz = (prev->depth < MAP_DEPTH) ? 256 * sizeof(trie_t *) : 0;
82
83 trie_t *t = xmalloc(sizeof(trie_t) + mapsz);
84 t->value = ch;
85 t->depth = prev->depth + 1;
86 t->up = prev;
87 t->write_gen = 0;
88 t->list = NULL;
89
90 if (mapsz > 0)
91 memset(t->map, '\0', mapsz);
92
93 if (prev->depth <= MAP_DEPTH)
94 prev->map[(unsigned char)ch] = t;
95 else {
96 clist_t *c = xmalloc(sizeof(clist_t));
97 c->value = ch;
98 c->down = t;
99 c->left = NULL;
100 c->right = NULL;
101
102 clist_t *it, **where;
103 for (it = prev->list, where = &(prev->list);
104 it != NULL;
105 where = (ch < it->value ? &(it->left) : &(it->right)),
106 it = *where)
107 ;
108
109 *where = c;
110 }
111
112 return t;
113 }
114
build_trie(const char * str,trie_t * prev,trie_t ** end)115 static void build_trie(const char *str, trie_t *prev, trie_t **end)
116 {
117 assert(*str != '\0');
118 assert(prev != NULL);
119
120 trie_t *t = alloc_node(*str, prev);
121
122 if (*(++str) == '\0')
123 *end = t;
124 else
125 build_trie(str, t, end);
126 }
127
search_node(trie_t * t,char ch)128 static clist_t *search_node(trie_t *t, char ch)
129 {
130 clist_t *it;
131 for (it = t->list;
132 (it != NULL) && (it->value != ch);
133 it = (ch < it->value ? it->left : it->right))
134 ;
135
136 return it;
137 }
138
search_trie(const char ** str,trie_t * t,trie_t ** end)139 static bool search_trie(const char **str, trie_t *t, trie_t **end)
140 {
141 assert(**str != '\0');
142 assert(t != NULL);
143
144 trie_t *next = NULL;
145
146 if (t->depth <= MAP_DEPTH)
147 next = t->map[(unsigned char)**str];
148 else {
149 clist_t *it = search_node(t, **str);
150 next = (it != NULL) ? it->down : NULL;
151 }
152
153 if (next == NULL) {
154 *end = t;
155 return false;
156 }
157 else {
158 (*str)++;
159
160 if (**str == '\0') {
161 *end = next;
162 return true;
163 }
164 else
165 return search_trie(str, next, end);
166 }
167 }
168
ident_new(const char * str)169 ident_t ident_new(const char *str)
170 {
171 assert(str != NULL);
172 assert(*str != '\0');
173
174 trie_t *result;
175 if (!search_trie(&str, &(root.trie), &result))
176 build_trie(str, result, &result);
177
178 return result;
179 }
180
ident_interned(const char * str)181 bool ident_interned(const char *str)
182 {
183 assert(str != NULL);
184 assert(*str != '\0');
185
186 trie_t *result;
187 return search_trie(&str, &(root.trie), &result);
188 }
189
istr(ident_t ident)190 const char *istr(ident_t ident)
191 {
192 if (ident == NULL)
193 return NULL;
194
195 char *p = get_fmt_buf(ident->depth) + ident->depth - 1;
196 *p = '\0';
197
198 trie_t *it;
199 for (it = ident; it->value != '\0'; it = it->up)
200 *(--p) = it->value;
201
202 return p;
203 }
204
ident_write_begin(fbuf_t * f)205 ident_wr_ctx_t ident_write_begin(fbuf_t *f)
206 {
207 static uint16_t ident_wr_gen = 1;
208 assert(ident_wr_gen > 0);
209
210 struct ident_wr_ctx *ctx = xmalloc(sizeof(struct ident_wr_ctx));
211 ctx->file = f;
212 ctx->generation = ident_wr_gen++;
213 ctx->next_index = 0;
214
215 return ctx;
216 }
217
ident_write_end(ident_wr_ctx_t ctx)218 void ident_write_end(ident_wr_ctx_t ctx)
219 {
220 free(ctx);
221 }
222
ident_write(ident_t ident,ident_wr_ctx_t ctx)223 void ident_write(ident_t ident, ident_wr_ctx_t ctx)
224 {
225 if (ident == NULL) {
226 write_u32(UINT32_MAX, ctx->file);
227 write_u8(0, ctx->file);
228 }
229 else if (ident->write_gen == ctx->generation)
230 write_u32(ident->write_index, ctx->file);
231 else {
232 write_u32(UINT32_MAX, ctx->file);
233 write_raw(istr(ident), ident->depth, ctx->file);
234
235 ident->write_gen = ctx->generation;
236 ident->write_index = ctx->next_index++;
237
238 assert(ctx->next_index != UINT32_MAX);
239 }
240 }
241
ident_read_begin(fbuf_t * f)242 ident_rd_ctx_t ident_read_begin(fbuf_t *f)
243 {
244 struct ident_rd_ctx *ctx = xmalloc(sizeof(struct ident_rd_ctx));
245 ctx->file = f;
246 ctx->cache_alloc = 256;
247 ctx->cache_sz = 0;
248 ctx->cache = xmalloc(ctx->cache_alloc * sizeof(ident_t));
249
250 return ctx;
251 }
252
ident_read_end(ident_rd_ctx_t ctx)253 void ident_read_end(ident_rd_ctx_t ctx)
254 {
255 free(ctx->cache);
256 free(ctx);
257 }
258
ident_read(ident_rd_ctx_t ctx)259 ident_t ident_read(ident_rd_ctx_t ctx)
260 {
261 const uint32_t index = read_u32(ctx->file);
262 if (index == UINT32_MAX) {
263 if (ctx->cache_sz == ctx->cache_alloc) {
264 ctx->cache_alloc *= 2;
265 ctx->cache = xrealloc(ctx->cache, ctx->cache_alloc * sizeof(ident_t));
266 }
267
268 trie_t *p = &(root.trie);
269 char ch;
270 while ((ch = read_u8(ctx->file)) != '\0') {
271 trie_t *next = NULL;
272 if (p->depth <= MAP_DEPTH)
273 next = p->map[(unsigned char)ch];
274 else {
275 clist_t *it = search_node(p, ch);
276 next = (it != NULL) ? it->down : NULL;
277 }
278
279 if (next != NULL)
280 p = next;
281 else
282 p = alloc_node(ch, p);
283 }
284
285 if (p == &(root.trie))
286 return NULL;
287 else {
288 ctx->cache[ctx->cache_sz++] = p;
289 return p;
290 }
291 }
292 else if (likely(index < ctx->cache_sz))
293 return ctx->cache[index];
294 else
295 fatal("ident index in %s is corrupt: index=%d cache_sz=%d",
296 fbuf_file_name(ctx->file), index, (int)ctx->cache_sz);
297 }
298
ident_uniq(const char * prefix)299 ident_t ident_uniq(const char *prefix)
300 {
301 static int counter = 0;
302
303 const char *start = prefix;
304 trie_t *end;
305 if (search_trie(&start, &(root.trie), &end)) {
306 const size_t len = strlen(prefix) + 16;
307 char buf[len];
308 snprintf(buf, len, "%s%d", prefix, counter++);
309
310 return ident_new(buf);
311 }
312 else {
313 trie_t *result;
314 build_trie(start, end, &result);
315 return result;
316 }
317 }
318
ident_prefix(ident_t a,ident_t b,char sep)319 ident_t ident_prefix(ident_t a, ident_t b, char sep)
320 {
321 if (a == NULL)
322 return b;
323 else if (b == NULL)
324 return a;
325
326 trie_t *result;
327
328 if (sep != '\0') {
329 // Append separator
330 const char sep_str[] = { sep, '\0' };
331 const char *p_sep_str = sep_str;
332 if (!search_trie(&p_sep_str, a, &result))
333 build_trie(p_sep_str, result, &result);
334 }
335 else
336 result = a;
337
338 // Append b
339 const char *bstr = istr(b);
340 if (!search_trie(&bstr, result, &result))
341 build_trie(bstr, result, &result);
342
343 return result;
344 }
345
ident_strip(ident_t a,ident_t b)346 ident_t ident_strip(ident_t a, ident_t b)
347 {
348 assert(a != NULL);
349 assert(b != NULL);
350
351 while (a->value == b->value && b->value != '\0') {
352 a = a->up;
353 b = b->up;
354 }
355
356 return (b->value == '\0' ? a : NULL);
357 }
358
ident_char(ident_t i,unsigned n)359 char ident_char(ident_t i, unsigned n)
360 {
361 if (i == NULL)
362 return '\0';
363 else if (n == 0)
364 return i->value;
365 else
366 return ident_char(i->up, n - 1);
367 }
368
ident_len(ident_t i)369 size_t ident_len(ident_t i)
370 {
371 if (i == NULL || i->value == '\0')
372 return 0;
373 else
374 return ident_len(i->up) + 1;
375 }
376
ident_suffix_until(ident_t i,char c,ident_t shared,char escape)377 ident_t ident_suffix_until(ident_t i, char c, ident_t shared, char escape)
378 {
379 assert(i != NULL);
380
381 bool escaping = false;
382 ident_t r = i;
383 while (i->value != '\0' && i->up != shared) {
384 if (!escaping && i->value == c)
385 r = i->up;
386 else if (i->value == escape)
387 escaping = !escaping;
388 i = i->up;
389 }
390
391 return r;
392 }
393
ident_until(ident_t i,char c)394 ident_t ident_until(ident_t i, char c)
395 {
396 return ident_suffix_until(i, c, NULL, '\0');
397 }
398
ident_runtil(ident_t i,char c)399 ident_t ident_runtil(ident_t i, char c)
400 {
401 assert(i != NULL);
402
403 for (ident_t r = i; r->value != '\0'; r = r->up) {
404 if (r->value == c)
405 return r->up;
406 }
407
408 return i;
409 }
410
ident_from(ident_t i,char c)411 ident_t ident_from(ident_t i, char c)
412 {
413 assert(i != NULL);
414
415 char buf[i->depth + 1];
416 char *p = buf + i->depth;
417 *p-- = '\0';
418
419 char *from = NULL;
420 while (i->value != '\0') {
421 if (i->value == c)
422 from = p + 1;
423 *p-- = i->value;
424 i = i->up;
425 }
426
427 return (from == NULL) ? NULL : ident_new(from);
428 }
429
ident_rfrom(ident_t i,char c)430 ident_t ident_rfrom(ident_t i, char c)
431 {
432 assert(i != NULL);
433
434 char buf[i->depth + 1];
435 char *p = buf + i->depth;
436 *p-- = '\0';
437
438 while (i->value != '\0') {
439 if (i->value == c)
440 return ident_new(p + 1);
441 *p-- = i->value;
442 i = i->up;
443 }
444
445 return NULL;
446 }
447
icmp(ident_t i,const char * s)448 bool icmp(ident_t i, const char *s)
449 {
450 assert(i != NULL);
451
452 trie_t *result;
453 if (!search_trie(&s, &(root.trie), &result))
454 return false;
455 else
456 return result == i;
457 }
458
ident_compare(ident_t a,ident_t b)459 int ident_compare(ident_t a, ident_t b)
460 {
461 if (a->up == b->up)
462 return a->value - b->value;
463 else if (a->depth > b->depth) {
464 int cmp = ident_compare(a->up, b);
465 return cmp == 0 ? a->value : cmp;
466 }
467 else if (b->depth > a->depth) {
468 int cmp = ident_compare(a, b->up);
469 return cmp == 0 ? 0 - b->value : cmp;
470 }
471 else
472 return ident_compare(a->up, b->up);
473 }
474
ident_glob_walk(const trie_t * i,const char * g,const char * const end)475 static bool ident_glob_walk(const trie_t *i, const char *g,
476 const char *const end)
477 {
478 if (i->value == '\0')
479 return (g < end);
480 else if (g < end)
481 return false;
482 else if (*g == '*')
483 return ident_glob_walk(i->up, g, end)
484 || ident_glob_walk(i->up, g - 1, end);
485 else if (i->value == *g)
486 return ident_glob_walk(i->up, g - 1, end);
487 else
488 return false;
489 }
490
ident_glob(ident_t i,const char * glob,int length)491 bool ident_glob(ident_t i, const char *glob, int length)
492 {
493 assert(i != NULL);
494
495 if (length < 0)
496 length = strlen(glob);
497
498 return ident_glob_walk(i, glob + length - 1, glob);
499 }
500
ident_contains(ident_t i,const char * search)501 bool ident_contains(ident_t i, const char *search)
502 {
503 assert(i != NULL);
504
505 for (ident_t r = i; r->value != '\0'; r = r->up) {
506 for (const char *p = search; *p != '\0'; p++) {
507 if (r->value == *p)
508 return true;
509 }
510 }
511
512 return false;
513 }
514
ident_downcase(ident_t i)515 ident_t ident_downcase(ident_t i)
516 {
517 // TODO: this could be implemented more efficiently
518
519 if (i == NULL)
520 return NULL;
521
522 char *p = get_fmt_buf(i->depth) + i->depth - 1;
523 *p = '\0';
524
525 trie_t *it;
526 for (it = i; it->value != '\0'; it = it->up)
527 *(--p) = tolower((int)it->value);
528
529 return ident_new(p);
530 }
531
ident_list_add(ident_list_t ** list,ident_t i)532 void ident_list_add(ident_list_t **list, ident_t i)
533 {
534 ident_list_t *c = xmalloc(sizeof(ident_list_t));
535 c->ident = i;
536 c->next = *list;
537
538 *list = c;
539 }
540
ident_list_push(ident_list_t ** list,ident_t i)541 void ident_list_push(ident_list_t **list, ident_t i)
542 {
543 ident_list_t *c = xmalloc(sizeof(ident_list_t));
544 c->ident = i;
545 c->next = NULL;
546
547 if (*list == NULL)
548 *list = c;
549 else {
550 ident_list_t *it;
551 for (it = *list; it->next != NULL; it = it->next)
552 ;
553 it->next = c;
554 }
555 }
556
ident_list_free(ident_list_t * list)557 void ident_list_free(ident_list_t *list)
558 {
559 ident_list_t *it = list;
560 while (it != NULL) {
561 ident_list_t *next = it->next;
562 free(it);
563 it = next;
564 }
565 }
566
_ident_list_cleanup(ident_list_t ** list)567 void _ident_list_cleanup(ident_list_t **list)
568 {
569 ident_list_free(*list);
570 *list = NULL;
571 }
572
ident_list_find(const ident_list_t * list,ident_t i)573 bool ident_list_find(const ident_list_t *list, ident_t i)
574 {
575 for (; list != NULL; list = list->next) {
576 if (list->ident == i)
577 return true;
578 }
579
580 return false;
581 }
582