1 /*
2  * Elastic Binary Trees - macros for Indirect Multi-Byte data nodes.
3  * Version 6.0.6
4  * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation, version 2.1
9  * exclusively.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef _EBIMTREE_H
22 #define _EBIMTREE_H
23 
24 #include <string.h>
25 #include "ebtree.h"
26 #include "ebpttree.h"
27 
28 /* These functions and macros rely on Pointer nodes and use the <key> entry as
29  * a pointer to an indirect key. Most operations are performed using ebpt_*.
30  */
31 
32 /* The following functions are not inlined by default. They are declared
33  * in ebimtree.c, which simply relies on their inline version.
34  */
35 REGPRM3 struct ebpt_node *ebim_lookup(struct eb_root *root, const void *x, unsigned int len);
36 REGPRM3 struct ebpt_node *ebim_insert(struct eb_root *root, struct ebpt_node *new, unsigned int len);
37 
38 /* Find the first occurrence of a key of a least <len> bytes matching <x> in the
39  * tree <root>. The caller is responsible for ensuring that <len> will not exceed
40  * the common parts between the tree's keys and <x>. In case of multiple matches,
41  * the leftmost node is returned. This means that this function can be used to
42  * lookup string keys by prefix if all keys in the tree are zero-terminated. If
43  * no match is found, NULL is returned. Returns first node if <len> is zero.
44  */
45 static forceinline struct ebpt_node *
__ebim_lookup(struct eb_root * root,const void * x,unsigned int len)46 __ebim_lookup(struct eb_root *root, const void *x, unsigned int len)
47 {
48 	struct ebpt_node *node;
49 	eb_troot_t *troot;
50 	int pos, side;
51 	int node_bit;
52 
53 	troot = root->b[EB_LEFT];
54 	if (unlikely(troot == NULL))
55 		goto ret_null;
56 
57 	if (unlikely(len == 0))
58 		goto walk_down;
59 
60 	pos = 0;
61 	while (1) {
62 		if (eb_gettag(troot) == EB_LEAF) {
63 			node = container_of(eb_untag(troot, EB_LEAF),
64 					    struct ebpt_node, node.branches);
65 			if (memcmp(node->key + pos, x, len) != 0)
66 				goto ret_null;
67 			else
68 				goto ret_node;
69 		}
70 		node = container_of(eb_untag(troot, EB_NODE),
71 				    struct ebpt_node, node.branches);
72 
73 		node_bit = node->node.bit;
74 		if (node_bit < 0) {
75 			/* We have a dup tree now. Either it's for the same
76 			 * value, and we walk down left, or it's a different
77 			 * one and we don't have our key.
78 			 */
79 			if (memcmp(node->key + pos, x, len) != 0)
80 				goto ret_null;
81 			else
82 				goto walk_left;
83 		}
84 
85 		/* OK, normal data node, let's walk down. We check if all full
86 		 * bytes are equal, and we start from the last one we did not
87 		 * completely check. We stop as soon as we reach the last byte,
88 		 * because we must decide to go left/right or abort.
89 		 */
90 		node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
91 		if (node_bit < 0) {
92 			/* This surprising construction gives better performance
93 			 * because gcc does not try to reorder the loop. Tested to
94 			 * be fine with 2.95 to 4.2.
95 			 */
96 			while (1) {
97 				if (*(unsigned char*)(node->key + pos++) ^ *(unsigned char*)(x++))
98 					goto ret_null; /* more than one full byte is different */
99 				if (--len == 0)
100 					goto walk_left; /* return first node if all bytes matched */
101 				node_bit += 8;
102 				if (node_bit >= 0)
103 					break;
104 			}
105 		}
106 
107 		/* here we know that only the last byte differs, so node_bit < 8.
108 		 * We have 2 possibilities :
109 		 *   - more than the last bit differs => return NULL
110 		 *   - walk down on side = (x[pos] >> node_bit) & 1
111 		 */
112 		side = *(unsigned char *)x >> node_bit;
113 		if (((*(unsigned char*)(node->key + pos) >> node_bit) ^ side) > 1)
114 			goto ret_null;
115 		side &= 1;
116 		troot = node->node.branches.b[side];
117 	}
118  walk_left:
119 	troot = node->node.branches.b[EB_LEFT];
120  walk_down:
121 	while (eb_gettag(troot) != EB_LEAF)
122 		troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
123 	node = container_of(eb_untag(troot, EB_LEAF),
124 			    struct ebpt_node, node.branches);
125  ret_node:
126 	return node;
127  ret_null:
128 	return NULL;
129 }
130 
131 /* Insert ebpt_node <new> into subtree starting at node root <root>.
132  * Only new->key needs be set with the key. The ebpt_node is returned.
133  * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
134  * len is specified in bytes.
135  */
136 static forceinline struct ebpt_node *
__ebim_insert(struct eb_root * root,struct ebpt_node * new,unsigned int len)137 __ebim_insert(struct eb_root *root, struct ebpt_node *new, unsigned int len)
138 {
139 	struct ebpt_node *old;
140 	unsigned int side;
141 	eb_troot_t *troot;
142 	eb_troot_t *root_right;
143 	int diff;
144 	int bit;
145 	int old_node_bit;
146 
147 	side = EB_LEFT;
148 	troot = root->b[EB_LEFT];
149 	root_right = root->b[EB_RGHT];
150 	if (unlikely(troot == NULL)) {
151 		/* Tree is empty, insert the leaf part below the left branch */
152 		root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
153 		new->node.leaf_p = eb_dotag(root, EB_LEFT);
154 		new->node.node_p = NULL; /* node part unused */
155 		return new;
156 	}
157 
158 	len <<= 3;
159 
160 	/* The tree descent is fairly easy :
161 	 *  - first, check if we have reached a leaf node
162 	 *  - second, check if we have gone too far
163 	 *  - third, reiterate
164 	 * Everywhere, we use <new> for the node node we are inserting, <root>
165 	 * for the node we attach it to, and <old> for the node we are
166 	 * displacing below <new>. <troot> will always point to the future node
167 	 * (tagged with its type). <side> carries the side the node <new> is
168 	 * attached to below its parent, which is also where previous node
169 	 * was attached.
170 	 */
171 
172 	bit = 0;
173 	while (1) {
174 		if (unlikely(eb_gettag(troot) == EB_LEAF)) {
175 			eb_troot_t *new_left, *new_rght;
176 			eb_troot_t *new_leaf, *old_leaf;
177 
178 			old = container_of(eb_untag(troot, EB_LEAF),
179 					    struct ebpt_node, node.branches);
180 
181 			new_left = eb_dotag(&new->node.branches, EB_LEFT);
182 			new_rght = eb_dotag(&new->node.branches, EB_RGHT);
183 			new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
184 			old_leaf = eb_dotag(&old->node.branches, EB_LEAF);
185 
186 			new->node.node_p = old->node.leaf_p;
187 
188 			/* Right here, we have 3 possibilities :
189 			 * - the tree does not contain the key, and we have
190 			 *   new->key < old->key. We insert new above old, on
191 			 *   the left ;
192 			 *
193 			 * - the tree does not contain the key, and we have
194 			 *   new->key > old->key. We insert new above old, on
195 			 *   the right ;
196 			 *
197 			 * - the tree does contain the key, which implies it
198 			 *   is alone. We add the new key next to it as a
199 			 *   first duplicate.
200 			 *
201 			 * The last two cases can easily be partially merged.
202 			 */
203 			bit = equal_bits(new->key, old->key, bit, len);
204 
205 			/* Note: we can compare more bits than the current node's because as
206 			 * long as they are identical, we know we descend along the correct
207 			 * side. However we don't want to start to compare past the end.
208 			 */
209 			diff = 0;
210 			if (((unsigned)bit >> 3) < len)
211 				diff = cmp_bits(new->key, old->key, bit);
212 
213 			if (diff < 0) {
214 				new->node.leaf_p = new_left;
215 				old->node.leaf_p = new_rght;
216 				new->node.branches.b[EB_LEFT] = new_leaf;
217 				new->node.branches.b[EB_RGHT] = old_leaf;
218 			} else {
219 				/* we may refuse to duplicate this key if the tree is
220 				 * tagged as containing only unique keys.
221 				 */
222 				if (diff == 0 && eb_gettag(root_right))
223 					return old;
224 
225 				/* new->key >= old->key, new goes the right */
226 				old->node.leaf_p = new_left;
227 				new->node.leaf_p = new_rght;
228 				new->node.branches.b[EB_LEFT] = old_leaf;
229 				new->node.branches.b[EB_RGHT] = new_leaf;
230 
231 				if (diff == 0) {
232 					new->node.bit = -1;
233 					root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
234 					return new;
235 				}
236 			}
237 			break;
238 		}
239 
240 		/* OK we're walking down this link */
241 		old = container_of(eb_untag(troot, EB_NODE),
242 				   struct ebpt_node, node.branches);
243 		old_node_bit = old->node.bit;
244 
245 		/* Stop going down when we don't have common bits anymore. We
246 		 * also stop in front of a duplicates tree because it means we
247 		 * have to insert above. Note: we can compare more bits than
248 		 * the current node's because as long as they are identical, we
249 		 * know we descend along the correct side.
250 		 */
251 		if (old_node_bit < 0) {
252 			/* we're above a duplicate tree, we must compare till the end */
253 			bit = equal_bits(new->key, old->key, bit, len);
254 			goto dup_tree;
255 		}
256 		else if (bit < old_node_bit) {
257 			bit = equal_bits(new->key, old->key, bit, old_node_bit);
258 		}
259 
260 		if (bit < old_node_bit) { /* we don't have all bits in common */
261 			/* The tree did not contain the key, so we insert <new> before the node
262 			 * <old>, and set ->bit to designate the lowest bit position in <new>
263 			 * which applies to ->branches.b[].
264 			 */
265 			eb_troot_t *new_left, *new_rght;
266 			eb_troot_t *new_leaf, *old_node;
267 
268 		dup_tree:
269 			new_left = eb_dotag(&new->node.branches, EB_LEFT);
270 			new_rght = eb_dotag(&new->node.branches, EB_RGHT);
271 			new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
272 			old_node = eb_dotag(&old->node.branches, EB_NODE);
273 
274 			new->node.node_p = old->node.node_p;
275 
276 			/* Note: we can compare more bits than the current node's because as
277 			 * long as they are identical, we know we descend along the correct
278 			 * side. However we don't want to start to compare past the end.
279 			 */
280 			diff = 0;
281 			if (((unsigned)bit >> 3) < len)
282 				diff = cmp_bits(new->key, old->key, bit);
283 
284 			if (diff < 0) {
285 				new->node.leaf_p = new_left;
286 				old->node.node_p = new_rght;
287 				new->node.branches.b[EB_LEFT] = new_leaf;
288 				new->node.branches.b[EB_RGHT] = old_node;
289 			}
290 			else if (diff > 0) {
291 				old->node.node_p = new_left;
292 				new->node.leaf_p = new_rght;
293 				new->node.branches.b[EB_LEFT] = old_node;
294 				new->node.branches.b[EB_RGHT] = new_leaf;
295 			}
296 			else {
297 				struct eb_node *ret;
298 				ret = eb_insert_dup(&old->node, &new->node);
299 				return container_of(ret, struct ebpt_node, node);
300 			}
301 			break;
302 		}
303 
304 		/* walk down */
305 		root = &old->node.branches;
306 		side = (((unsigned char *)new->key)[old_node_bit >> 3] >> (~old_node_bit & 7)) & 1;
307 		troot = root->b[side];
308 	}
309 
310 	/* Ok, now we are inserting <new> between <root> and <old>. <old>'s
311 	 * parent is already set to <new>, and the <root>'s branch is still in
312 	 * <side>. Update the root's leaf till we have it. Note that we can also
313 	 * find the side by checking the side of new->node.node_p.
314 	 */
315 
316 	/* We need the common higher bits between new->key and old->key.
317 	 * This number of bits is already in <bit>.
318 	 */
319 	new->node.bit = bit;
320 	root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
321 	return new;
322 }
323 
324 #endif /* _EBIMTREE_H */
325