1 /* Rax -- A radix tree implementation.
2  *
3  * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  *   * Redistributions of source code must retain the above copyright notice,
10  *     this list of conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *   * Neither the name of Redis nor the names of its contributors may be used
15  *     to endorse or promote products derived from this software without
16  *     specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #ifndef RAX_H
32 #define RAX_H
33 
34 #include <stdint.h>
35 
36 /* Representation of a radix tree as implemented in this file, that contains
37  * the strings "foo", "foobar" and "footer" after the insertion of each
38  * word. When the node represents a key inside the radix tree, we write it
39  * between [], otherwise it is written between ().
40  *
41  * This is the vanilla representation:
42  *
43  *              (f) ""
44  *                \
45  *                (o) "f"
46  *                  \
47  *                  (o) "fo"
48  *                    \
49  *                  [t   b] "foo"
50  *                  /     \
51  *         "foot" (e)     (a) "foob"
52  *                /         \
53  *      "foote" (r)         (r) "fooba"
54  *              /             \
55  *    "footer" []             [] "foobar"
56  *
57  * However, this implementation implements a very common optimization where
58  * successive nodes having a single child are "compressed" into the node
59  * itself as a string of characters, each representing a next-level child,
60  * and only the link to the node representing the last character node is
61  * provided inside the representation. So the above representation is turend
62  * into:
63  *
64  *                  ["foo"] ""
65  *                     |
66  *                  [t   b] "foo"
67  *                  /     \
68  *        "foot" ("er")    ("ar") "foob"
69  *                 /          \
70  *       "footer" []          [] "foobar"
71  *
72  * However this optimization makes the implementation a bit more complex.
73  * For instance if a key "first" is added in the above radix tree, a
74  * "node splitting" operation is needed, since the "foo" prefix is no longer
75  * composed of nodes having a single child one after the other. This is the
76  * above tree and the resulting node splitting after this event happens:
77  *
78  *
79  *                    (f) ""
80  *                    /
81  *                 (i o) "f"
82  *                 /   \
83  *    "firs"  ("rst")  (o) "fo"
84  *              /        \
85  *    "first" []       [t   b] "foo"
86  *                     /     \
87  *           "foot" ("er")    ("ar") "foob"
88  *                    /          \
89  *          "footer" []          [] "foobar"
90  *
91  * Similarly after deletion, if a new chain of nodes having a single child
92  * is created (the chain must also not include nodes that represent keys),
93  * it must be compressed back into a single node.
94  *
95  */
96 
97 #define RAX_NODE_MAX_SIZE ((1<<29)-1)
98 typedef struct raxNode {
99     uint32_t iskey:1;     /* Does this node contain a key? */
100     uint32_t isnull:1;    /* Associated value is NULL (don't store it). */
101     uint32_t iscompr:1;   /* Node is compressed. */
102     uint32_t size:29;     /* Number of children, or compressed string len. */
103     /* Data layout is as follows:
104      *
105      * If node is not compressed we have 'size' bytes, one for each children
106      * character, and 'size' raxNode pointers, point to each child node.
107      * Note how the character is not stored in the children but in the
108      * edge of the parents:
109      *
110      * [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
111      *
112      * if node is compressed (iscompr bit is 1) the node has 1 children.
113      * In that case the 'size' bytes of the string stored immediately at
114      * the start of the data section, represent a sequence of successive
115      * nodes linked one after the other, for which only the last one in
116      * the sequence is actually represented as a node, and pointed to by
117      * the current compressed node.
118      *
119      * [header iscompr=1][xyz][z-ptr](value-ptr?)
120      *
121      * Both compressed and not compressed nodes can represent a key
122      * with associated data in the radix tree at any level (not just terminal
123      * nodes).
124      *
125      * If the node has an associated key (iskey=1) and is not NULL
126      * (isnull=0), then after the raxNode pointers poiting to the
127      * children, an additional value pointer is present (as you can see
128      * in the representation above as "value-ptr" field).
129      */
130     unsigned char data[];
131 } raxNode;
132 
133 typedef struct rax {
134     raxNode *head;
135     uint64_t numele;
136     uint64_t numnodes;
137 } rax;
138 
139 /* Stack data structure used by raxLowWalk() in order to, optionally, return
140  * a list of parent nodes to the caller. The nodes do not have a "parent"
141  * field for space concerns, so we use the auxiliary stack when needed. */
142 #define RAX_STACK_STATIC_ITEMS 32
143 typedef struct raxStack {
144     void **stack; /* Points to static_items or an heap allocated array. */
145     size_t items, maxitems; /* Number of items contained and total space. */
146     /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
147      * and use this static array of pointers instead. */
148     void *static_items[RAX_STACK_STATIC_ITEMS];
149     int oom; /* True if pushing into this stack failed for OOM at some point. */
150 } raxStack;
151 
152 /* Optional callback used for iterators and be notified on each rax node,
153  * including nodes not representing keys. If the callback returns true
154  * the callback changed the node pointer in the iterator structure, and the
155  * iterator implementation will have to replace the pointer in the radix tree
156  * internals. This allows the callback to reallocate the node to perform
157  * very special operations, normally not needed by normal applications.
158  *
159  * This callback is used to perform very low level analysis of the radix tree
160  * structure, scanning each possible node (but the root node), or in order to
161  * reallocate the nodes to reduce the allocation fragmentation (this is the
162  * Redis application for this callback).
163  *
164  * This is currently only supported in forward iterations (raxNext) */
165 typedef int (*raxNodeCallback)(raxNode **noderef);
166 
167 /* Radix tree iterator state is encapsulated into this data structure. */
168 #define RAX_ITER_STATIC_LEN 128
169 #define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
170                                        element for the first iteration and
171                                        clear the flag. */
172 #define RAX_ITER_EOF (1<<1)    /* End of iteration reached. */
173 #define RAX_ITER_SAFE (1<<2)   /* Safe iterator, allows operations while
174                                   iterating. But it is slower. */
175 typedef struct raxIterator {
176     int flags;
177     rax *rt;                /* Radix tree we are iterating. */
178     unsigned char *key;     /* The current string. */
179     void *data;             /* Data associated to this key. */
180     size_t key_len;         /* Current key length. */
181     size_t key_max;         /* Max key len the current key buffer can hold. */
182     unsigned char key_static_string[RAX_ITER_STATIC_LEN];
183     raxNode *node;          /* Current node. Only for unsafe iteration. */
184     raxStack stack;         /* Stack used for unsafe iteration. */
185     raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
186 } raxIterator;
187 
188 /* A special pointer returned for not found items. */
189 extern void *raxNotFound;
190 
191 /* Exported API. */
192 rax *raxNew(void);
193 int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
194 int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
195 int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
196 void *raxFind(rax *rax, unsigned char *s, size_t len);
197 void raxFree(rax *rax);
198 void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
199 void raxStart(raxIterator *it, rax *rt);
200 int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
201 int raxNext(raxIterator *it);
202 int raxPrev(raxIterator *it);
203 int raxRandomWalk(raxIterator *it, size_t steps);
204 int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
205 void raxStop(raxIterator *it);
206 int raxEOF(raxIterator *it);
207 void raxShow(rax *rax);
208 uint64_t raxSize(rax *rax);
209 unsigned long raxTouch(raxNode *n);
210 void raxSetDebugMsg(int onoff);
211 
212 /* Internal API. May be used by the node callback in order to access rax nodes
213  * in a low level way, so this function is exported as well. */
214 void raxSetData(raxNode *n, void *data);
215 
216 #endif
217