1 #ifndef __TRIEMAP_H__
2 #define __TRIEMAP_H__
3 
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <stdbool.h>
9 
10 #include "util/arr.h"
11 
12 typedef uint16_t tm_len_t;
13 
14 #define TM_NODE_DELETED 0x01
15 #define TM_NODE_TERMINAL 0x02
16 #define TM_NODE_SORTED 0x04
17 
18 /* This special pointer is returned when TrieMap_Find cannot find anything */
19 extern void *TRIEMAP_NOTFOUND;
20 
21 #pragma pack(1)
22 
23 /* TrieMapNode represents a single node in a trie. The actual size of it is
24  * bigger, as the children are allocated after str[].
25  * The value pointer is optional, and NULL can be used if you are just
26  * interested in the triemap as a set for strings
27  */
28 typedef struct {
29   // the string length of this node. can be 0
30   tm_len_t len;
31   // the number of child nodes
32   tm_len_t numChildren : 9;
33 
34   uint8_t flags : 7;
35 
36   void *value;
37 
38   // the string of the current node
39   char str[];
40   // ... here come the first letters of each child childChars[]
41   // ... now come the children, to be accessed with __trieMapNode_children
42 } TrieMapNode;
43 
44 #pragma pack()
45 
46 typedef struct {
47   TrieMapNode *root;
48   size_t cardinality;  // number of terms
49   size_t size;         // number of nodes
50 } TrieMap;
51 
52 TrieMap *NewTrieMap();
53 
54 typedef void *(*TrieMapReplaceFunc)(void *oldval, void *newval);
55 
56 /* Add a new string to a trie. Returns 1 if the key is new to the trie or 0 if
57  * it already existed.
58  *
59  * If value is given, it is saved as a pyaload inside the trie node.
60  * If the key already exists, we replace the old value with the new value, using
61  * free() to free the old value.
62  *
63  * If cb is given, instead of replacing and freeing, we call the callback with
64  * the old and new value, and the function should return the value to set in the
65  * node, and take care of freeing any unwanted pointers. The returned value
66  * can be NULL and doesn't have to be either the old or new value.
67  */
68 int TrieMap_Add(TrieMap *t, char *str, tm_len_t len, void *value, TrieMapReplaceFunc cb);
69 
70 /* Find the entry with a given string and length, and return its value, even if
71  * that was NULL.
72  *
73  * NOTE: If the key does not exist in the trie, we return the special
74  * constant value TRIEMAP_NOTFOUND, so checking if the key exists is done by
75  * comparing to it, becase NULL can be a valid result.
76  */
77 void *TrieMap_Find(TrieMap *t, char *str, tm_len_t len);
78 
79 /* Find nodes that have a given prefix. Results are placed in an array.
80  */
81 int TrieMap_FindPrefixes(TrieMap *t, const char *str, tm_len_t len, arrayof(void*) *results);
82 
83 /* Mark a node as deleted. It also optimizes the trie by merging nodes if
84  * needed. If freeCB is given, it will be used to free the value of the deleted
85  * node. If it doesn't, we simply call free() */
86 int TrieMap_Delete(TrieMap *t, char *str, tm_len_t len, void (*freeCB)(void *));
87 
88 /* Free the trie's root and all its children recursively. If freeCB is given, we
89  * call it to free individual payload values. If not, free() is used instead. */
90 void TrieMap_Free(TrieMap *t, void (*freeCB)(void *));
91 
92 /* Get a random key from the trie by doing a random walk down and up the tree
93  * for a minimum number of steps. Returns 0 if the tree is empty and we couldn't
94  * find a random node.
95  * Assign's the key to str and saves its len (the key is NOT null terminated).
96  * NOTE: It is the caller's responsibility to free the key string
97  */
98 int TrieMap_RandomKey(TrieMap *t, char **str, tm_len_t *len, void **ptr);
99 
100 /* Get the value of a random element under a specific prefix. NULL if the prefix was not found */
101 void *TrieMap_RandomValueByPrefix(TrieMap *t, const char *prefix, tm_len_t pflen);
102 
103 size_t TrieMap_MemUsage(TrieMap *t);
104 
105 /**************  Iterator API  - not ported from the textual trie yet
106  * ***********/
107 /* trie iterator stack node. for internal use only */
108 typedef struct {
109   int state;
110   TrieMapNode *n;
111   tm_len_t stringOffset;
112   tm_len_t childOffset;
113 } __tmi_stackNode;
114 
115 typedef struct {
116   char *buf;
117   tm_len_t bufLen;
118   tm_len_t bufOffset;
119 
120   __tmi_stackNode *stack;
121   tm_len_t stackOffset;
122   tm_len_t stackCap;
123 
124   const char *prefix;
125   tm_len_t prefixLen;
126   int inSuffix;
127 } TrieMapIterator;
128 
129 void __tmi_Push(TrieMapIterator *it, TrieMapNode *node);
130 void __tmi_Pop(TrieMapIterator *it);
131 
132 /* Iterate the trie for all the suffixes of a given prefix. This returns an
133  * iterator object even if the prefix was not found, and subsequent calls to
134  * TrieMapIterator_Next are needed to get the results from the iteration. If the
135  * prefix is not found, the first call to next will return 0 */
136 TrieMapIterator *TrieMap_Iterate(TrieMap *t, const char *prefix, tm_len_t prefixLen);
137 
138 /* Free a trie iterator */
139 void TrieMapIterator_Free(TrieMapIterator *it);
140 
141 /* Iterate to the next matching entry in the trie. Returns 1 if we can continue,
142  * or 0 if we're done and should exit */
143 int TrieMapIterator_Next(TrieMapIterator *it, char **ptr, tm_len_t *len, void **value);
144 
145 typedef void(TrieMapRangeCallback)(const char *, size_t, void *, void *);
146 
147 void TrieMap_IterateRange(TrieMap *trie, const char *min, int minlen, bool includeMin,
148                           const char *max, int maxlen, bool includeMax,
149                           TrieMapRangeCallback callback, void *ctx);
150 
151 #endif
152