1 //
2 //  Copyright (C) 2011-2017  Nick Gasson
3 //
4 //  This program is free software: you can redistribute it and/or modify
5 //  it under the terms of the GNU General Public License as published by
6 //  the Free Software Foundation, either version 3 of the License, or
7 //  (at your option) any later version.
8 //
9 //  This program is distributed in the hope that it will be useful,
10 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 //  GNU General Public License for more details.
13 //
14 //  You should have received a copy of the GNU General Public License
15 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 //
17 
18 #include "util.h"
19 #include "fbuf.h"
20 #include "ident.h"
21 
22 #include <assert.h>
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdint.h>
27 #include <ctype.h>
28 
29 #define MAP_DEPTH 3
30 
31 typedef struct clist clist_t;
32 typedef struct trie  trie_t;
33 
34 struct clist {
35    char     value;
36    trie_t  *down;
37    clist_t *left;
38    clist_t *right;
39 };
40 
41 struct trie {
42    char      value;
43    uint16_t  write_gen;
44    uint16_t  depth;
45    uint32_t  write_index;
46    trie_t   *up;
47    clist_t  *list;
48    trie_t   *map[0];
49 };
50 
51 struct ident_rd_ctx {
52    fbuf_t  *file;
53    size_t   cache_sz;
54    size_t   cache_alloc;
55    ident_t *cache;
56 };
57 
58 struct ident_wr_ctx {
59    fbuf_t   *file;
60    uint32_t  next_index;
61    uint16_t  generation;
62 };
63 
64 typedef struct {
65    trie_t  trie;
66    trie_t *map[256];
67 } root_t;
68 
69 static root_t root = {
70    {
71       .value       = '\0',
72       .write_gen   = 0,
73       .write_index = 0,
74       .depth       = 1,
75       .up          = NULL
76    }
77 };
78 
alloc_node(char ch,trie_t * prev)79 static trie_t *alloc_node(char ch, trie_t *prev)
80 {
81    const size_t mapsz = (prev->depth < MAP_DEPTH) ? 256 * sizeof(trie_t *) : 0;
82 
83    trie_t *t = xmalloc(sizeof(trie_t) + mapsz);
84    t->value     = ch;
85    t->depth     = prev->depth + 1;
86    t->up        = prev;
87    t->write_gen = 0;
88    t->list      = NULL;
89 
90    if (mapsz > 0)
91       memset(t->map, '\0', mapsz);
92 
93    if (prev->depth <= MAP_DEPTH)
94       prev->map[(unsigned char)ch] = t;
95    else {
96       clist_t *c = xmalloc(sizeof(clist_t));
97       c->value    = ch;
98       c->down     = t;
99       c->left     = NULL;
100       c->right    = NULL;
101 
102       clist_t *it, **where;
103       for (it = prev->list, where = &(prev->list);
104            it != NULL;
105            where = (ch < it->value ? &(it->left) : &(it->right)),
106               it = *where)
107          ;
108 
109       *where = c;
110    }
111 
112    return t;
113 }
114 
build_trie(const char * str,trie_t * prev,trie_t ** end)115 static void build_trie(const char *str, trie_t *prev, trie_t **end)
116 {
117    assert(*str != '\0');
118    assert(prev != NULL);
119 
120    trie_t *t = alloc_node(*str, prev);
121 
122    if (*(++str) == '\0')
123       *end = t;
124    else
125       build_trie(str, t, end);
126 }
127 
search_node(trie_t * t,char ch)128 static clist_t *search_node(trie_t *t, char ch)
129 {
130    clist_t *it;
131    for (it = t->list;
132         (it != NULL) && (it->value != ch);
133         it = (ch < it->value ? it->left : it->right))
134       ;
135 
136    return it;
137 }
138 
search_trie(const char ** str,trie_t * t,trie_t ** end)139 static bool search_trie(const char **str, trie_t *t, trie_t **end)
140 {
141    assert(**str != '\0');
142    assert(t != NULL);
143 
144    trie_t *next = NULL;
145 
146    if (t->depth <= MAP_DEPTH)
147       next = t->map[(unsigned char)**str];
148    else {
149       clist_t *it = search_node(t, **str);
150       next = (it != NULL) ? it->down : NULL;
151    }
152 
153    if (next == NULL) {
154       *end = t;
155       return false;
156    }
157    else {
158       (*str)++;
159 
160       if (**str == '\0') {
161          *end = next;
162          return true;
163       }
164       else
165          return search_trie(str, next, end);
166    }
167 }
168 
ident_new(const char * str)169 ident_t ident_new(const char *str)
170 {
171    assert(str != NULL);
172    assert(*str != '\0');
173 
174    trie_t *result;
175    if (!search_trie(&str, &(root.trie), &result))
176       build_trie(str, result, &result);
177 
178    return result;
179 }
180 
ident_interned(const char * str)181 bool ident_interned(const char *str)
182 {
183    assert(str != NULL);
184    assert(*str != '\0');
185 
186    trie_t *result;
187    return search_trie(&str, &(root.trie), &result);
188 }
189 
istr(ident_t ident)190 const char *istr(ident_t ident)
191 {
192    if (ident == NULL)
193       return NULL;
194 
195    char *p = get_fmt_buf(ident->depth) + ident->depth - 1;
196    *p = '\0';
197 
198    trie_t *it;
199    for (it = ident; it->value != '\0'; it = it->up)
200       *(--p) = it->value;
201 
202    return p;
203 }
204 
ident_write_begin(fbuf_t * f)205 ident_wr_ctx_t ident_write_begin(fbuf_t *f)
206 {
207    static uint16_t ident_wr_gen = 1;
208    assert(ident_wr_gen > 0);
209 
210    struct ident_wr_ctx *ctx = xmalloc(sizeof(struct ident_wr_ctx));
211    ctx->file       = f;
212    ctx->generation = ident_wr_gen++;
213    ctx->next_index = 0;
214 
215    return ctx;
216 }
217 
ident_write_end(ident_wr_ctx_t ctx)218 void ident_write_end(ident_wr_ctx_t ctx)
219 {
220    free(ctx);
221 }
222 
ident_write(ident_t ident,ident_wr_ctx_t ctx)223 void ident_write(ident_t ident, ident_wr_ctx_t ctx)
224 {
225    if (ident == NULL) {
226       write_u32(UINT32_MAX, ctx->file);
227       write_u8(0, ctx->file);
228    }
229    else if (ident->write_gen == ctx->generation)
230       write_u32(ident->write_index, ctx->file);
231    else {
232       write_u32(UINT32_MAX, ctx->file);
233       write_raw(istr(ident), ident->depth, ctx->file);
234 
235       ident->write_gen   = ctx->generation;
236       ident->write_index = ctx->next_index++;
237 
238       assert(ctx->next_index != UINT32_MAX);
239    }
240 }
241 
ident_read_begin(fbuf_t * f)242 ident_rd_ctx_t ident_read_begin(fbuf_t *f)
243 {
244    struct ident_rd_ctx *ctx = xmalloc(sizeof(struct ident_rd_ctx));
245    ctx->file        = f;
246    ctx->cache_alloc = 256;
247    ctx->cache_sz    = 0;
248    ctx->cache       = xmalloc(ctx->cache_alloc * sizeof(ident_t));
249 
250    return ctx;
251 }
252 
ident_read_end(ident_rd_ctx_t ctx)253 void ident_read_end(ident_rd_ctx_t ctx)
254 {
255    free(ctx->cache);
256    free(ctx);
257 }
258 
ident_read(ident_rd_ctx_t ctx)259 ident_t ident_read(ident_rd_ctx_t ctx)
260 {
261    const uint32_t index = read_u32(ctx->file);
262    if (index == UINT32_MAX) {
263       if (ctx->cache_sz == ctx->cache_alloc) {
264          ctx->cache_alloc *= 2;
265          ctx->cache = xrealloc(ctx->cache, ctx->cache_alloc * sizeof(ident_t));
266       }
267 
268       trie_t *p = &(root.trie);
269       char ch;
270       while ((ch = read_u8(ctx->file)) != '\0') {
271          trie_t *next = NULL;
272          if (p->depth <= MAP_DEPTH)
273             next = p->map[(unsigned char)ch];
274          else {
275             clist_t *it = search_node(p, ch);
276             next = (it != NULL) ? it->down : NULL;
277          }
278 
279          if (next != NULL)
280             p = next;
281          else
282             p = alloc_node(ch, p);
283       }
284 
285       if (p == &(root.trie))
286          return NULL;
287       else {
288          ctx->cache[ctx->cache_sz++] = p;
289          return p;
290       }
291    }
292    else if (likely(index < ctx->cache_sz))
293       return ctx->cache[index];
294    else
295       fatal("ident index in %s is corrupt: index=%d cache_sz=%d",
296             fbuf_file_name(ctx->file), index, (int)ctx->cache_sz);
297 }
298 
ident_uniq(const char * prefix)299 ident_t ident_uniq(const char *prefix)
300 {
301    static int counter = 0;
302 
303    const char *start = prefix;
304    trie_t *end;
305    if (search_trie(&start, &(root.trie), &end)) {
306       const size_t len = strlen(prefix) + 16;
307       char buf[len];
308       snprintf(buf, len, "%s%d", prefix, counter++);
309 
310       return ident_new(buf);
311    }
312    else {
313       trie_t *result;
314       build_trie(start, end, &result);
315       return result;
316    }
317 }
318 
ident_prefix(ident_t a,ident_t b,char sep)319 ident_t ident_prefix(ident_t a, ident_t b, char sep)
320 {
321    if (a == NULL)
322       return b;
323    else if (b == NULL)
324       return a;
325 
326    trie_t *result;
327 
328    if (sep != '\0') {
329       // Append separator
330       const char sep_str[] = { sep, '\0' };
331       const char *p_sep_str = sep_str;
332       if (!search_trie(&p_sep_str, a, &result))
333          build_trie(p_sep_str, result, &result);
334    }
335    else
336       result = a;
337 
338    // Append b
339    const char *bstr = istr(b);
340    if (!search_trie(&bstr, result, &result))
341       build_trie(bstr, result, &result);
342 
343    return result;
344 }
345 
ident_strip(ident_t a,ident_t b)346 ident_t ident_strip(ident_t a, ident_t b)
347 {
348    assert(a != NULL);
349    assert(b != NULL);
350 
351    while (a->value == b->value && b->value != '\0') {
352       a = a->up;
353       b = b->up;
354    }
355 
356    return (b->value == '\0' ? a : NULL);
357 }
358 
ident_char(ident_t i,unsigned n)359 char ident_char(ident_t i, unsigned n)
360 {
361    if (i == NULL)
362       return '\0';
363    else if (n == 0)
364       return i->value;
365    else
366       return ident_char(i->up, n - 1);
367 }
368 
ident_len(ident_t i)369 size_t ident_len(ident_t i)
370 {
371    if (i == NULL || i->value == '\0')
372       return 0;
373    else
374       return ident_len(i->up) + 1;
375 }
376 
ident_suffix_until(ident_t i,char c,ident_t shared,char escape)377 ident_t ident_suffix_until(ident_t i, char c, ident_t shared, char escape)
378 {
379    assert(i != NULL);
380 
381    bool escaping = false;
382    ident_t r = i;
383    while (i->value != '\0' && i->up != shared) {
384       if (!escaping && i->value == c)
385          r = i->up;
386       else if (i->value == escape)
387          escaping = !escaping;
388       i = i->up;
389    }
390 
391    return r;
392 }
393 
ident_until(ident_t i,char c)394 ident_t ident_until(ident_t i, char c)
395 {
396    return ident_suffix_until(i, c, NULL, '\0');
397 }
398 
ident_runtil(ident_t i,char c)399 ident_t ident_runtil(ident_t i, char c)
400 {
401    assert(i != NULL);
402 
403    for (ident_t r = i; r->value != '\0'; r = r->up) {
404       if (r->value == c)
405          return r->up;
406    }
407 
408    return i;
409 }
410 
ident_from(ident_t i,char c)411 ident_t ident_from(ident_t i, char c)
412 {
413    assert(i != NULL);
414 
415    char buf[i->depth + 1];
416    char *p = buf + i->depth;
417    *p-- = '\0';
418 
419    char *from = NULL;
420    while (i->value != '\0') {
421       if (i->value == c)
422          from = p + 1;
423       *p-- = i->value;
424       i = i->up;
425    }
426 
427    return (from == NULL) ? NULL : ident_new(from);
428 }
429 
ident_rfrom(ident_t i,char c)430 ident_t ident_rfrom(ident_t i, char c)
431 {
432    assert(i != NULL);
433 
434    char buf[i->depth + 1];
435    char *p = buf + i->depth;
436    *p-- = '\0';
437 
438    while (i->value != '\0') {
439       if (i->value == c)
440          return ident_new(p + 1);
441       *p-- = i->value;
442       i = i->up;
443    }
444 
445    return NULL;
446 }
447 
icmp(ident_t i,const char * s)448 bool icmp(ident_t i, const char *s)
449 {
450    assert(i != NULL);
451 
452    trie_t *result;
453    if (!search_trie(&s, &(root.trie), &result))
454       return false;
455    else
456       return result == i;
457 }
458 
ident_compare(ident_t a,ident_t b)459 int ident_compare(ident_t a, ident_t b)
460 {
461    if (a->up == b->up)
462       return a->value - b->value;
463    else if (a->depth > b->depth) {
464       int cmp = ident_compare(a->up, b);
465       return cmp == 0 ? a->value : cmp;
466    }
467    else if (b->depth > a->depth) {
468       int cmp = ident_compare(a, b->up);
469       return cmp == 0 ? 0 - b->value : cmp;
470    }
471    else
472       return ident_compare(a->up, b->up);
473 }
474 
ident_glob_walk(const trie_t * i,const char * g,const char * const end)475 static bool ident_glob_walk(const trie_t *i, const char *g,
476                             const char *const end)
477 {
478    if (i->value == '\0')
479       return (g < end);
480    else if (g < end)
481       return false;
482    else if (*g == '*')
483       return ident_glob_walk(i->up, g, end)
484          || ident_glob_walk(i->up, g - 1, end);
485    else if (i->value == *g)
486       return ident_glob_walk(i->up, g - 1, end);
487    else
488       return false;
489 }
490 
ident_glob(ident_t i,const char * glob,int length)491 bool ident_glob(ident_t i, const char *glob, int length)
492 {
493    assert(i != NULL);
494 
495    if (length < 0)
496       length = strlen(glob);
497 
498    return ident_glob_walk(i, glob + length - 1, glob);
499 }
500 
ident_contains(ident_t i,const char * search)501 bool ident_contains(ident_t i, const char *search)
502 {
503    assert(i != NULL);
504 
505    for (ident_t r = i; r->value != '\0'; r = r->up) {
506       for (const char *p = search; *p != '\0'; p++) {
507          if (r->value == *p)
508             return true;
509       }
510    }
511 
512    return false;
513 }
514 
ident_downcase(ident_t i)515 ident_t ident_downcase(ident_t i)
516 {
517    // TODO: this could be implemented more efficiently
518 
519    if (i == NULL)
520       return NULL;
521 
522    char *p = get_fmt_buf(i->depth) + i->depth - 1;
523    *p = '\0';
524 
525    trie_t *it;
526    for (it = i; it->value != '\0'; it = it->up)
527       *(--p) = tolower((int)it->value);
528 
529    return ident_new(p);
530 }
531 
ident_list_add(ident_list_t ** list,ident_t i)532 void ident_list_add(ident_list_t **list, ident_t i)
533 {
534    ident_list_t *c = xmalloc(sizeof(ident_list_t));
535    c->ident = i;
536    c->next  = *list;
537 
538    *list = c;
539 }
540 
ident_list_push(ident_list_t ** list,ident_t i)541 void ident_list_push(ident_list_t **list, ident_t i)
542 {
543    ident_list_t *c = xmalloc(sizeof(ident_list_t));
544    c->ident = i;
545    c->next  = NULL;
546 
547    if (*list == NULL)
548       *list = c;
549    else {
550       ident_list_t *it;
551       for (it = *list; it->next != NULL; it = it->next)
552          ;
553       it->next = c;
554    }
555 }
556 
ident_list_free(ident_list_t * list)557 void ident_list_free(ident_list_t *list)
558 {
559    ident_list_t *it = list;
560    while (it != NULL) {
561       ident_list_t *next = it->next;
562       free(it);
563       it = next;
564    }
565 }
566 
_ident_list_cleanup(ident_list_t ** list)567 void _ident_list_cleanup(ident_list_t **list)
568 {
569    ident_list_free(*list);
570    *list = NULL;
571 }
572 
ident_list_find(const ident_list_t * list,ident_t i)573 bool ident_list_find(const ident_list_t *list, ident_t i)
574 {
575    for (; list != NULL; list = list->next) {
576       if (list->ident == i)
577          return true;
578    }
579 
580    return false;
581 }
582