1 /*
2  * tree234.c: reasonably generic counted 2-3-4 tree routines.
3  *
4  * This file is copyright 1999-2001 Simon Tatham.
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or
11  * sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <assert.h>
31 
32 #include "tree234.h"
33 
34 #define smalloc malloc
35 #define sfree free
36 
37 #define snew(typ) ( (typ *) smalloc (sizeof (typ)) )
38 
39 #ifdef TEST
40 #define LOG(x) (printf x)
41 #else
42 #define LOG(x)
43 #endif
44 
45 typedef struct node234_Tag node234;
46 
47 struct tree234_Tag {
48     node234 *root;
49     cmpfn234 cmp;
50 };
51 
52 struct node234_Tag {
53     node234 *parent;
54     node234 *kids[4];
55     int counts[4];
56     void *elems[3];
57 };
58 
59 /*
60  * Create a 2-3-4 tree.
61  */
newtree234(cmpfn234 cmp)62 tree234 *newtree234(cmpfn234 cmp) {
63     tree234 *ret = snew(tree234);
64     LOG(("created tree %p\n", ret));
65     ret->root = NULL;
66     ret->cmp = cmp;
67     return ret;
68 }
69 
70 /*
71  * Free a 2-3-4 tree (not including freeing the elements).
72  */
freenode234(node234 * n)73 static void freenode234(node234 *n) {
74     if (!n)
75 	return;
76     freenode234(n->kids[0]);
77     freenode234(n->kids[1]);
78     freenode234(n->kids[2]);
79     freenode234(n->kids[3]);
80     sfree(n);
81 }
freetree234(tree234 * t)82 void freetree234(tree234 *t) {
83     freenode234(t->root);
84     sfree(t);
85 }
86 
87 /*
88  * Internal function to count a node.
89  */
countnode234(node234 * n)90 static int countnode234(node234 *n) {
91     int count = 0;
92     int i;
93     if (!n)
94 	return 0;
95     for (i = 0; i < 4; i++)
96 	count += n->counts[i];
97     for (i = 0; i < 3; i++)
98 	if (n->elems[i])
99 	    count++;
100     return count;
101 }
102 
103 /*
104  * Count the elements in a tree.
105  */
count234(tree234 * t)106 int count234(tree234 *t) {
107     if (t->root)
108 	return countnode234(t->root);
109     else
110 	return 0;
111 }
112 
113 /*
114  * Propagate a node overflow up a tree until it stops. Returns 0 or
115  * 1, depending on whether the root had to be split or not.
116  */
add234_insert(node234 * left,void * e,node234 * right,node234 ** root,node234 * n,int ki)117 static int add234_insert(node234 *left, void *e, node234 *right,
118 			 node234 **root, node234 *n, int ki) {
119     int lcount, rcount;
120     /*
121      * We need to insert the new left/element/right set in n at
122      * child position ki.
123      */
124     lcount = countnode234(left);
125     rcount = countnode234(right);
126     while (n) {
127 	LOG(("  at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
128 	     n,
129 	     n->kids[0], n->counts[0], n->elems[0],
130 	     n->kids[1], n->counts[1], n->elems[1],
131 	     n->kids[2], n->counts[2], n->elems[2],
132 	     n->kids[3], n->counts[3]));
133 	LOG(("  need to insert %p/%d \"%s\" %p/%d at position %d\n",
134 	     left, lcount, e, right, rcount, ki));
135 	if (n->elems[1] == NULL) {
136 	    /*
137 	     * Insert in a 2-node; simple.
138 	     */
139 	    if (ki == 0) {
140 		LOG(("  inserting on left of 2-node\n"));
141 		n->kids[2] = n->kids[1];     n->counts[2] = n->counts[1];
142 		n->elems[1] = n->elems[0];
143 		n->kids[1] = right;          n->counts[1] = rcount;
144 		n->elems[0] = e;
145 		n->kids[0] = left;           n->counts[0] = lcount;
146 	    } else { /* ki == 1 */
147 		LOG(("  inserting on right of 2-node\n"));
148 		n->kids[2] = right;          n->counts[2] = rcount;
149 		n->elems[1] = e;
150 		n->kids[1] = left;           n->counts[1] = lcount;
151 	    }
152 	    if (n->kids[0]) n->kids[0]->parent = n;
153 	    if (n->kids[1]) n->kids[1]->parent = n;
154 	    if (n->kids[2]) n->kids[2]->parent = n;
155 	    LOG(("  done\n"));
156 	    break;
157 	} else if (n->elems[2] == NULL) {
158 	    /*
159 	     * Insert in a 3-node; simple.
160 	     */
161 	    if (ki == 0) {
162 		LOG(("  inserting on left of 3-node\n"));
163 		n->kids[3] = n->kids[2];    n->counts[3] = n->counts[2];
164 		n->elems[2] = n->elems[1];
165 		n->kids[2] = n->kids[1];    n->counts[2] = n->counts[1];
166 		n->elems[1] = n->elems[0];
167 		n->kids[1] = right;         n->counts[1] = rcount;
168 		n->elems[0] = e;
169 		n->kids[0] = left;          n->counts[0] = lcount;
170 	    } else if (ki == 1) {
171 		LOG(("  inserting in middle of 3-node\n"));
172 		n->kids[3] = n->kids[2];    n->counts[3] = n->counts[2];
173 		n->elems[2] = n->elems[1];
174 		n->kids[2] = right;         n->counts[2] = rcount;
175 		n->elems[1] = e;
176 		n->kids[1] = left;          n->counts[1] = lcount;
177 	    } else { /* ki == 2 */
178 		LOG(("  inserting on right of 3-node\n"));
179 		n->kids[3] = right;         n->counts[3] = rcount;
180 		n->elems[2] = e;
181 		n->kids[2] = left;          n->counts[2] = lcount;
182 	    }
183 	    if (n->kids[0]) n->kids[0]->parent = n;
184 	    if (n->kids[1]) n->kids[1]->parent = n;
185 	    if (n->kids[2]) n->kids[2]->parent = n;
186 	    if (n->kids[3]) n->kids[3]->parent = n;
187 	    LOG(("  done\n"));
188 	    break;
189 	} else {
190 	    node234 *m = snew(node234);
191 	    m->parent = n->parent;
192 	    LOG(("  splitting a 4-node; created new node %p\n", m));
193 	    /*
194 	     * Insert in a 4-node; split into a 2-node and a
195 	     * 3-node, and move focus up a level.
196 	     *
197 	     * I don't think it matters which way round we put the
198 	     * 2 and the 3. For simplicity, we'll put the 3 first
199 	     * always.
200 	     */
201 	    if (ki == 0) {
202 		m->kids[0] = left;          m->counts[0] = lcount;
203 		m->elems[0] = e;
204 		m->kids[1] = right;         m->counts[1] = rcount;
205 		m->elems[1] = n->elems[0];
206 		m->kids[2] = n->kids[1];    m->counts[2] = n->counts[1];
207 		e = n->elems[1];
208 		n->kids[0] = n->kids[2];    n->counts[0] = n->counts[2];
209 		n->elems[0] = n->elems[2];
210 		n->kids[1] = n->kids[3];    n->counts[1] = n->counts[3];
211 	    } else if (ki == 1) {
212 		m->kids[0] = n->kids[0];    m->counts[0] = n->counts[0];
213 		m->elems[0] = n->elems[0];
214 		m->kids[1] = left;          m->counts[1] = lcount;
215 		m->elems[1] = e;
216 		m->kids[2] = right;         m->counts[2] = rcount;
217 		e = n->elems[1];
218 		n->kids[0] = n->kids[2];    n->counts[0] = n->counts[2];
219 		n->elems[0] = n->elems[2];
220 		n->kids[1] = n->kids[3];    n->counts[1] = n->counts[3];
221 	    } else if (ki == 2) {
222 		m->kids[0] = n->kids[0];    m->counts[0] = n->counts[0];
223 		m->elems[0] = n->elems[0];
224 		m->kids[1] = n->kids[1];    m->counts[1] = n->counts[1];
225 		m->elems[1] = n->elems[1];
226 		m->kids[2] = left;          m->counts[2] = lcount;
227 		/* e = e; */
228 		n->kids[0] = right;         n->counts[0] = rcount;
229 		n->elems[0] = n->elems[2];
230 		n->kids[1] = n->kids[3];    n->counts[1] = n->counts[3];
231 	    } else { /* ki == 3 */
232 		m->kids[0] = n->kids[0];    m->counts[0] = n->counts[0];
233 		m->elems[0] = n->elems[0];
234 		m->kids[1] = n->kids[1];    m->counts[1] = n->counts[1];
235 		m->elems[1] = n->elems[1];
236 		m->kids[2] = n->kids[2];    m->counts[2] = n->counts[2];
237 		n->kids[0] = left;          n->counts[0] = lcount;
238 		n->elems[0] = e;
239 		n->kids[1] = right;         n->counts[1] = rcount;
240 		e = n->elems[2];
241 	    }
242 	    m->kids[3] = n->kids[3] = n->kids[2] = NULL;
243 	    m->counts[3] = n->counts[3] = n->counts[2] = 0;
244 	    m->elems[2] = n->elems[2] = n->elems[1] = NULL;
245 	    if (m->kids[0]) m->kids[0]->parent = m;
246 	    if (m->kids[1]) m->kids[1]->parent = m;
247 	    if (m->kids[2]) m->kids[2]->parent = m;
248 	    if (n->kids[0]) n->kids[0]->parent = n;
249 	    if (n->kids[1]) n->kids[1]->parent = n;
250 	    LOG(("  left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m,
251 		 m->kids[0], m->counts[0], m->elems[0],
252 		 m->kids[1], m->counts[1], m->elems[1],
253 		 m->kids[2], m->counts[2]));
254 	    LOG(("  right (%p): %p/%d \"%s\" %p/%d\n", n,
255 		 n->kids[0], n->counts[0], n->elems[0],
256 		 n->kids[1], n->counts[1]));
257 	    left = m;  lcount = countnode234(left);
258 	    right = n; rcount = countnode234(right);
259 	}
260 	if (n->parent)
261 	    ki = (n->parent->kids[0] == n ? 0 :
262 		  n->parent->kids[1] == n ? 1 :
263 		  n->parent->kids[2] == n ? 2 : 3);
264 	n = n->parent;
265     }
266 
267     /*
268      * If we've come out of here by `break', n will still be
269      * non-NULL and all we need to do is go back up the tree
270      * updating counts. If we've come here because n is NULL, we
271      * need to create a new root for the tree because the old one
272      * has just split into two. */
273     if (n) {
274 	while (n->parent) {
275 	    int count = countnode234(n);
276 	    int childnum;
277 	    childnum = (n->parent->kids[0] == n ? 0 :
278 			n->parent->kids[1] == n ? 1 :
279 			n->parent->kids[2] == n ? 2 : 3);
280 	    n->parent->counts[childnum] = count;
281 	    n = n->parent;
282 	}
283 	return 0;		       /* root unchanged */
284     } else {
285 	LOG(("  root is overloaded, split into two\n"));
286 	(*root) = snew(node234);
287 	(*root)->kids[0] = left;     (*root)->counts[0] = lcount;
288 	(*root)->elems[0] = e;
289 	(*root)->kids[1] = right;    (*root)->counts[1] = rcount;
290 	(*root)->elems[1] = NULL;
291 	(*root)->kids[2] = NULL;     (*root)->counts[2] = 0;
292 	(*root)->elems[2] = NULL;
293 	(*root)->kids[3] = NULL;     (*root)->counts[3] = 0;
294 	(*root)->parent = NULL;
295 	if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root);
296 	if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root);
297 	LOG(("  new root is %p/%d \"%s\" %p/%d\n",
298 	     (*root)->kids[0], (*root)->counts[0],
299 	     (*root)->elems[0],
300 	     (*root)->kids[1], (*root)->counts[1]));
301 	return 1;		       /* root moved */
302     }
303 }
304 
305 /*
306  * Add an element e to a 2-3-4 tree t. Returns e on success, or if
307  * an existing element compares equal, returns that.
308  */
add234_internal(tree234 * t,void * e,int index)309 static void *add234_internal(tree234 *t, void *e, int index) {
310     node234 *n;
311     int ki;
312     void *orig_e = e;
313     int c;
314 
315     LOG(("adding element \"%s\" to tree %p\n", e, t));
316     if (t->root == NULL) {
317 	t->root = snew(node234);
318 	t->root->elems[1] = t->root->elems[2] = NULL;
319 	t->root->kids[0] = t->root->kids[1] = NULL;
320 	t->root->kids[2] = t->root->kids[3] = NULL;
321 	t->root->counts[0] = t->root->counts[1] = 0;
322 	t->root->counts[2] = t->root->counts[3] = 0;
323 	t->root->parent = NULL;
324 	t->root->elems[0] = e;
325 	LOG(("  created root %p\n", t->root));
326 	return orig_e;
327     }
328 
329     n = t->root;
330     while (n) {
331 	LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
332 	     n,
333 	     n->kids[0], n->counts[0], n->elems[0],
334 	     n->kids[1], n->counts[1], n->elems[1],
335 	     n->kids[2], n->counts[2], n->elems[2],
336 	     n->kids[3], n->counts[3]));
337 	if (index >= 0) {
338 	    if (!n->kids[0]) {
339 		/*
340 		 * Leaf node. We want to insert at kid position
341 		 * equal to the index:
342 		 *
343 		 *   0 A 1 B 2 C 3
344 		 */
345 		ki = index;
346 	    } else {
347 		/*
348 		 * Internal node. We always descend through it (add
349 		 * always starts at the bottom, never in the
350 		 * middle).
351 		 */
352 		if (index <= n->counts[0]) {
353 		    ki = 0;
354 		} else if (index -= n->counts[0] + 1, index <= n->counts[1]) {
355 		    ki = 1;
356 		} else if (index -= n->counts[1] + 1, index <= n->counts[2]) {
357 		    ki = 2;
358 		} else if (index -= n->counts[2] + 1, index <= n->counts[3]) {
359 		    ki = 3;
360 		} else
361 		    return NULL;       /* error: index out of range */
362 	    }
363 	} else {
364 	    if ((c = t->cmp(e, n->elems[0])) < 0)
365 		ki = 0;
366 	    else if (c == 0)
367 		return n->elems[0];	       /* already exists */
368 	    else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
369 		ki = 1;
370 	    else if (c == 0)
371 		return n->elems[1];	       /* already exists */
372 	    else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
373 		ki = 2;
374 	    else if (c == 0)
375 		return n->elems[2];	       /* already exists */
376 	    else
377 		ki = 3;
378 	}
379 	LOG(("  moving to child %d (%p)\n", ki, n->kids[ki]));
380 	if (!n->kids[ki])
381 	    break;
382 	n = n->kids[ki];
383     }
384 
385     add234_insert(NULL, e, NULL, &t->root, n, ki);
386 
387     return orig_e;
388 }
389 
add234(tree234 * t,void * e)390 void *add234(tree234 *t, void *e) {
391     if (!t->cmp)		       /* tree is unsorted */
392 	return NULL;
393 
394     return add234_internal(t, e, -1);
395 }
addpos234(tree234 * t,void * e,int index)396 void *addpos234(tree234 *t, void *e, int index) {
397     if (index < 0 ||		       /* index out of range */
398 	t->cmp)			       /* tree is sorted */
399 	return NULL;		       /* return failure */
400 
401     return add234_internal(t, e, index);  /* this checks the upper bound */
402 }
403 
404 /*
405  * Look up the element at a given numeric index in a 2-3-4 tree.
406  * Returns NULL if the index is out of range.
407  */
index234(tree234 * t,int index)408 void *index234(tree234 *t, int index) {
409     node234 *n;
410 
411     if (!t->root)
412 	return NULL;		       /* tree is empty */
413 
414     if (index < 0 || index >= countnode234(t->root))
415 	return NULL;		       /* out of range */
416 
417     n = t->root;
418 
419     while (n) {
420 	if (index < n->counts[0])
421 	    n = n->kids[0];
422 	else if (index -= n->counts[0] + 1, index < 0)
423 	    return n->elems[0];
424 	else if (index < n->counts[1])
425 	    n = n->kids[1];
426 	else if (index -= n->counts[1] + 1, index < 0)
427 	    return n->elems[1];
428 	else if (index < n->counts[2])
429 	    n = n->kids[2];
430 	else if (index -= n->counts[2] + 1, index < 0)
431 	    return n->elems[2];
432 	else
433 	    n = n->kids[3];
434     }
435 
436     /* We shouldn't ever get here. I wonder how we did. */
437     return NULL;
438 }
439 
440 /*
441  * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
442  * found. e is always passed as the first argument to cmp, so cmp
443  * can be an asymmetric function if desired. cmp can also be passed
444  * as NULL, in which case the compare function from the tree proper
445  * will be used.
446  */
findrelpos234(tree234 * t,void * e,cmpfn234 cmp,int relation,int * index)447 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
448 		    int relation, int *index) {
449     node234 *n;
450     void *ret;
451     int c;
452     int idx, ecount, kcount, cmpret;
453 
454     if (t->root == NULL)
455 	return NULL;
456 
457     if (cmp == NULL)
458 	cmp = t->cmp;
459 
460     n = t->root;
461     /*
462      * Attempt to find the element itself.
463      */
464     idx = 0;
465     ecount = -1;
466     /*
467      * Prepare a fake `cmp' result if e is NULL.
468      */
469     cmpret = 0;
470     if (e == NULL) {
471 	assert(relation == REL234_LT || relation == REL234_GT);
472 	if (relation == REL234_LT)
473 	    cmpret = +1;	       /* e is a max: always greater */
474 	else if (relation == REL234_GT)
475 	    cmpret = -1;	       /* e is a min: always smaller */
476     }
477     while (1) {
478 	for (kcount = 0; kcount < 4; kcount++) {
479 	    if (kcount >= 3 || n->elems[kcount] == NULL ||
480 		(c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
481 		break;
482 	    }
483 	    if (n->kids[kcount]) idx += n->counts[kcount];
484 	    if (c == 0) {
485 		ecount = kcount;
486 		break;
487 	    }
488 	    idx++;
489 	}
490 	if (ecount >= 0)
491 	    break;
492 	if (n->kids[kcount])
493 	    n = n->kids[kcount];
494 	else
495 	    break;
496     }
497 
498     if (ecount >= 0) {
499 	/*
500 	 * We have found the element we're looking for. It's
501 	 * n->elems[ecount], at tree index idx. If our search
502 	 * relation is EQ, LE or GE we can now go home.
503 	 */
504 	if (relation != REL234_LT && relation != REL234_GT) {
505 	    if (index) *index = idx;
506 	    return n->elems[ecount];
507 	}
508 
509 	/*
510 	 * Otherwise, we'll do an indexed lookup for the previous
511 	 * or next element. (It would be perfectly possible to
512 	 * implement these search types in a non-counted tree by
513 	 * going back up from where we are, but far more fiddly.)
514 	 */
515 	if (relation == REL234_LT)
516 	    idx--;
517 	else
518 	    idx++;
519     } else {
520 	/*
521 	 * We've found our way to the bottom of the tree and we
522 	 * know where we would insert this node if we wanted to:
523 	 * we'd put it in in place of the (empty) subtree
524 	 * n->kids[kcount], and it would have index idx
525 	 *
526 	 * But the actual element isn't there. So if our search
527 	 * relation is EQ, we're doomed.
528 	 */
529 	if (relation == REL234_EQ)
530 	    return NULL;
531 
532 	/*
533 	 * Otherwise, we must do an index lookup for index idx-1
534 	 * (if we're going left - LE or LT) or index idx (if we're
535 	 * going right - GE or GT).
536 	 */
537 	if (relation == REL234_LT || relation == REL234_LE) {
538 	    idx--;
539 	}
540     }
541 
542     /*
543      * We know the index of the element we want; just call index234
544      * to do the rest. This will return NULL if the index is out of
545      * bounds, which is exactly what we want.
546      */
547     ret = index234(t, idx);
548     if (ret && index) *index = idx;
549     return ret;
550 }
find234(tree234 * t,void * e,cmpfn234 cmp)551 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
552     return findrelpos234(t, e, cmp, REL234_EQ, NULL);
553 }
findrel234(tree234 * t,void * e,cmpfn234 cmp,int relation)554 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
555     return findrelpos234(t, e, cmp, relation, NULL);
556 }
findpos234(tree234 * t,void * e,cmpfn234 cmp,int * index)557 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
558     return findrelpos234(t, e, cmp, REL234_EQ, index);
559 }
560 
561 /*
562  * Tree transformation used in delete and split: move a subtree
563  * right, from child ki of a node to the next child. Update k and
564  * index so that they still point to the same place in the
565  * transformed tree. Assumes the destination child is not full, and
566  * that the source child does have a subtree to spare. Can cope if
567  * the destination child is undersized.
568  *
569  *                . C .                     . B .
570  *               /     \     ->            /     \
571  * [more] a A b B c   d D e      [more] a A b   c C d D e
572  *
573  *                 . C .                     . B .
574  *                /     \    ->             /     \
575  *  [more] a A b B c     d        [more] a A b   c C d
576  */
trans234_subtree_right(node234 * n,int ki,int * k,int * index)577 static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) {
578     node234 *src, *dest;
579     int i, srclen, adjust;
580 
581     src = n->kids[ki];
582     dest = n->kids[ki+1];
583 
584     LOG(("  trans234_subtree_right(%p, %d):\n", n, ki));
585     LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
586 	 n,
587 	 n->kids[0], n->counts[0], n->elems[0],
588 	 n->kids[1], n->counts[1], n->elems[1],
589 	 n->kids[2], n->counts[2], n->elems[2],
590 	 n->kids[3], n->counts[3]));
591     LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
592 	 src,
593 	 src->kids[0], src->counts[0], src->elems[0],
594 	 src->kids[1], src->counts[1], src->elems[1],
595 	 src->kids[2], src->counts[2], src->elems[2],
596 	 src->kids[3], src->counts[3]));
597     LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
598 	 dest,
599 	 dest->kids[0], dest->counts[0], dest->elems[0],
600 	 dest->kids[1], dest->counts[1], dest->elems[1],
601 	 dest->kids[2], dest->counts[2], dest->elems[2],
602 	 dest->kids[3], dest->counts[3]));
603     /*
604      * Move over the rest of the destination node to make space.
605      */
606     dest->kids[3] = dest->kids[2];    dest->counts[3] = dest->counts[2];
607     dest->elems[2] = dest->elems[1];
608     dest->kids[2] = dest->kids[1];    dest->counts[2] = dest->counts[1];
609     dest->elems[1] = dest->elems[0];
610     dest->kids[1] = dest->kids[0];    dest->counts[1] = dest->counts[0];
611 
612     /* which element to move over */
613     i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0);
614 
615     dest->elems[0] = n->elems[ki];
616     n->elems[ki] = src->elems[i];
617     src->elems[i] = NULL;
618 
619     dest->kids[0] = src->kids[i+1];   dest->counts[0] = src->counts[i+1];
620     src->kids[i+1] = NULL;            src->counts[i+1] = 0;
621 
622     if (dest->kids[0]) dest->kids[0]->parent = dest;
623 
624     adjust = dest->counts[0] + 1;
625 
626     n->counts[ki] -= adjust;
627     n->counts[ki+1] += adjust;
628 
629     srclen = n->counts[ki];
630 
631     if (k) {
632 	LOG(("    before: k,index = %d,%d\n", (*k), (*index)));
633 	if ((*k) == ki && (*index) > srclen) {
634 	    (*index) -= srclen + 1;
635 	    (*k)++;
636 	} else if ((*k) == ki+1) {
637 	    (*index) += adjust;
638 	}
639 	LOG(("    after: k,index = %d,%d\n", (*k), (*index)));
640     }
641 
642     LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
643 	 n,
644 	 n->kids[0], n->counts[0], n->elems[0],
645 	 n->kids[1], n->counts[1], n->elems[1],
646 	 n->kids[2], n->counts[2], n->elems[2],
647 	 n->kids[3], n->counts[3]));
648     LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
649 	 src,
650 	 src->kids[0], src->counts[0], src->elems[0],
651 	 src->kids[1], src->counts[1], src->elems[1],
652 	 src->kids[2], src->counts[2], src->elems[2],
653 	 src->kids[3], src->counts[3]));
654     LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
655 	 dest,
656 	 dest->kids[0], dest->counts[0], dest->elems[0],
657 	 dest->kids[1], dest->counts[1], dest->elems[1],
658 	 dest->kids[2], dest->counts[2], dest->elems[2],
659 	 dest->kids[3], dest->counts[3]));
660 }
661 
662 /*
663  * Tree transformation used in delete and split: move a subtree
664  * left, from child ki of a node to the previous child. Update k
665  * and index so that they still point to the same place in the
666  * transformed tree. Assumes the destination child is not full, and
667  * that the source child does have a subtree to spare. Can cope if
668  * the destination child is undersized.
669  *
670  *      . B .                             . C .
671  *     /     \                ->         /     \
672  *  a A b   c C d D e [more]      a A b B c   d D e [more]
673  *
674  *     . A .                             . B .
675  *    /     \                 ->        /     \
676  *   a   b B c C d [more]            a A b   c C d [more]
677  */
trans234_subtree_left(node234 * n,int ki,int * k,int * index)678 static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) {
679     node234 *src, *dest;
680     int i, adjust;
681 
682     src = n->kids[ki];
683     dest = n->kids[ki-1];
684 
685     LOG(("  trans234_subtree_left(%p, %d):\n", n, ki));
686     LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
687 	 n,
688 	 n->kids[0], n->counts[0], n->elems[0],
689 	 n->kids[1], n->counts[1], n->elems[1],
690 	 n->kids[2], n->counts[2], n->elems[2],
691 	 n->kids[3], n->counts[3]));
692     LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
693 	 dest,
694 	 dest->kids[0], dest->counts[0], dest->elems[0],
695 	 dest->kids[1], dest->counts[1], dest->elems[1],
696 	 dest->kids[2], dest->counts[2], dest->elems[2],
697 	 dest->kids[3], dest->counts[3]));
698     LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
699 	 src,
700 	 src->kids[0], src->counts[0], src->elems[0],
701 	 src->kids[1], src->counts[1], src->elems[1],
702 	 src->kids[2], src->counts[2], src->elems[2],
703 	 src->kids[3], src->counts[3]));
704 
705     /* where in dest to put it */
706     i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0);
707     dest->elems[i] = n->elems[ki-1];
708     n->elems[ki-1] = src->elems[0];
709 
710     dest->kids[i+1] = src->kids[0];   dest->counts[i+1] = src->counts[0];
711 
712     if (dest->kids[i+1]) dest->kids[i+1]->parent = dest;
713 
714     /*
715      * Move over the rest of the source node.
716      */
717     src->kids[0] = src->kids[1];      src->counts[0] = src->counts[1];
718     src->elems[0] = src->elems[1];
719     src->kids[1] = src->kids[2];      src->counts[1] = src->counts[2];
720     src->elems[1] = src->elems[2];
721     src->kids[2] = src->kids[3];      src->counts[2] = src->counts[3];
722     src->elems[2] = NULL;
723     src->kids[3] = NULL;              src->counts[3] = 0;
724 
725     adjust = dest->counts[i+1] + 1;
726 
727     n->counts[ki] -= adjust;
728     n->counts[ki-1] += adjust;
729 
730     if (k) {
731 	LOG(("    before: k,index = %d,%d\n", (*k), (*index)));
732 	if ((*k) == ki) {
733 	    (*index) -= adjust;
734 	    if ((*index) < 0) {
735 		(*index) += n->counts[ki-1] + 1;
736 		(*k)--;
737 	    }
738 	}
739 	LOG(("    after: k,index = %d,%d\n", (*k), (*index)));
740     }
741 
742     LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
743 	 n,
744 	 n->kids[0], n->counts[0], n->elems[0],
745 	 n->kids[1], n->counts[1], n->elems[1],
746 	 n->kids[2], n->counts[2], n->elems[2],
747 	 n->kids[3], n->counts[3]));
748     LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
749 	 dest,
750 	 dest->kids[0], dest->counts[0], dest->elems[0],
751 	 dest->kids[1], dest->counts[1], dest->elems[1],
752 	 dest->kids[2], dest->counts[2], dest->elems[2],
753 	 dest->kids[3], dest->counts[3]));
754     LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
755 	 src,
756 	 src->kids[0], src->counts[0], src->elems[0],
757 	 src->kids[1], src->counts[1], src->elems[1],
758 	 src->kids[2], src->counts[2], src->elems[2],
759 	 src->kids[3], src->counts[3]));
760 }
761 
762 /*
763  * Tree transformation used in delete and split: merge child nodes
764  * ki and ki+1 of a node. Update k and index so that they still
765  * point to the same place in the transformed tree. Assumes both
766  * children _are_ sufficiently small.
767  *
768  *      . B .                .
769  *     /     \     ->        |
770  *  a A b   c C d      a A b B c C d
771  *
772  * This routine can also cope with either child being undersized:
773  *
774  *     . A .                 .
775  *    /     \      ->        |
776  *   a     b B c         a A b B c
777  *
778  *    . A .                  .
779  *   /     \       ->        |
780  *  a   b B c C d      a A b B c C d
781  */
trans234_subtree_merge(node234 * n,int ki,int * k,int * index)782 static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) {
783     node234 *left, *right;
784     int i, leftlen, rightlen, lsize, rsize;
785 
786     left = n->kids[ki];               leftlen = n->counts[ki];
787     right = n->kids[ki+1];            rightlen = n->counts[ki+1];
788 
789     LOG(("  trans234_subtree_merge(%p, %d):\n", n, ki));
790     LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
791 	 n,
792 	 n->kids[0], n->counts[0], n->elems[0],
793 	 n->kids[1], n->counts[1], n->elems[1],
794 	 n->kids[2], n->counts[2], n->elems[2],
795 	 n->kids[3], n->counts[3]));
796     LOG(("    left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
797 	 left,
798 	 left->kids[0], left->counts[0], left->elems[0],
799 	 left->kids[1], left->counts[1], left->elems[1],
800 	 left->kids[2], left->counts[2], left->elems[2],
801 	 left->kids[3], left->counts[3]));
802     LOG(("    right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
803 	 right,
804 	 right->kids[0], right->counts[0], right->elems[0],
805 	 right->kids[1], right->counts[1], right->elems[1],
806 	 right->kids[2], right->counts[2], right->elems[2],
807 	 right->kids[3], right->counts[3]));
808 
809     assert(!left->elems[2] && !right->elems[2]);   /* neither is large! */
810     lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0);
811     rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0);
812 
813     left->elems[lsize] = n->elems[ki];
814 
815     for (i = 0; i < rsize+1; i++) {
816 	left->kids[lsize+1+i] = right->kids[i];
817 	left->counts[lsize+1+i] = right->counts[i];
818 	if (left->kids[lsize+1+i])
819 	    left->kids[lsize+1+i]->parent = left;
820 	if (i < rsize)
821 	    left->elems[lsize+1+i] = right->elems[i];
822     }
823 
824     n->counts[ki] += rightlen + 1;
825 
826     sfree(right);
827 
828     /*
829      * Move the rest of n up by one.
830      */
831     for (i = ki+1; i < 3; i++) {
832 	n->kids[i] = n->kids[i+1];
833 	n->counts[i] = n->counts[i+1];
834     }
835     for (i = ki; i < 2; i++) {
836 	n->elems[i] = n->elems[i+1];
837     }
838     n->kids[3] = NULL;
839     n->counts[3] = 0;
840     n->elems[2] = NULL;
841 
842     if (k) {
843 	LOG(("    before: k,index = %d,%d\n", (*k), (*index)));
844 	if ((*k) == ki+1) {
845 	    (*k)--;
846 	    (*index) += leftlen + 1;
847 	} else if ((*k) > ki+1) {
848 	    (*k)--;
849 	}
850 	LOG(("    after: k,index = %d,%d\n", (*k), (*index)));
851     }
852 
853     LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
854 	 n,
855 	 n->kids[0], n->counts[0], n->elems[0],
856 	 n->kids[1], n->counts[1], n->elems[1],
857 	 n->kids[2], n->counts[2], n->elems[2],
858 	 n->kids[3], n->counts[3]));
859     LOG(("    merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
860 	 left,
861 	 left->kids[0], left->counts[0], left->elems[0],
862 	 left->kids[1], left->counts[1], left->elems[1],
863 	 left->kids[2], left->counts[2], left->elems[2],
864 	 left->kids[3], left->counts[3]));
865 
866 }
867 
868 /*
869  * Delete an element e in a 2-3-4 tree. Does not free the element,
870  * merely removes all links to it from the tree nodes.
871  */
delpos234_internal(tree234 * t,int index)872 static void *delpos234_internal(tree234 *t, int index) {
873     node234 *n;
874     void *retval;
875     int ki, i;
876 
877     retval = NULL;
878 
879     n = t->root;		       /* by assumption this is non-NULL */
880     LOG(("deleting item %d from tree %p\n", index, t));
881     while (1) {
882 	node234 *sub;
883 
884 	LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
885 	     n,
886 	     n->kids[0], n->counts[0], n->elems[0],
887 	     n->kids[1], n->counts[1], n->elems[1],
888 	     n->kids[2], n->counts[2], n->elems[2],
889 	     n->kids[3], n->counts[3],
890 	     index));
891 	if (index <= n->counts[0]) {
892 	    ki = 0;
893 	} else if (index -= n->counts[0]+1, index <= n->counts[1]) {
894 	    ki = 1;
895 	} else if (index -= n->counts[1]+1, index <= n->counts[2]) {
896 	    ki = 2;
897 	} else if (index -= n->counts[2]+1, index <= n->counts[3]) {
898 	    ki = 3;
899 	} else {
900 	    assert(0);		       /* can't happen */
901 	}
902 
903 	if (!n->kids[0])
904 	    break;		       /* n is a leaf node; we're here! */
905 
906 	/*
907 	 * Check to see if we've found our target element. If so,
908 	 * we must choose a new target (we'll use the old target's
909 	 * successor, which will be in a leaf), move it into the
910 	 * place of the old one, continue down to the leaf and
911 	 * delete the old copy of the new target.
912 	 */
913 	if (index == n->counts[ki]) {
914 	    node234 *m;
915 	    LOG(("  found element in internal node, index %d\n", ki));
916 	    assert(n->elems[ki]);      /* must be a kid _before_ an element */
917 	    ki++; index = 0;
918 	    for (m = n->kids[ki]; m->kids[0]; m = m->kids[0])
919 		continue;
920 	    LOG(("  replacing with element \"%s\" from leaf node %p\n",
921 		 m->elems[0], m));
922 	    retval = n->elems[ki-1];
923 	    n->elems[ki-1] = m->elems[0];
924 	}
925 
926 	/*
927 	 * Recurse down to subtree ki. If it has only one element,
928 	 * we have to do some transformation to start with.
929 	 */
930 	LOG(("  moving to subtree %d\n", ki));
931 	sub = n->kids[ki];
932 	if (!sub->elems[1]) {
933 	    LOG(("  subtree has only one element!\n"));
934 	    if (ki > 0 && n->kids[ki-1]->elems[1]) {
935 		/*
936 		 * Child ki has only one element, but child
937 		 * ki-1 has two or more. So we need to move a
938 		 * subtree from ki-1 to ki.
939 		 */
940 		trans234_subtree_right(n, ki-1, &ki, &index);
941 	    } else if (ki < 3 && n->kids[ki+1] &&
942 		       n->kids[ki+1]->elems[1]) {
943 		/*
944 		 * Child ki has only one element, but ki+1 has
945 		 * two or more. Move a subtree from ki+1 to ki.
946 		 */
947 		trans234_subtree_left(n, ki+1, &ki, &index);
948 	    } else {
949 		/*
950 		 * ki is small with only small neighbours. Pick a
951 		 * neighbour and merge with it.
952 		 */
953 		trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index);
954 		sub = n->kids[ki];
955 
956 		if (!n->elems[0]) {
957 		    /*
958 		     * The root is empty and needs to be
959 		     * removed.
960 		     */
961 		    LOG(("  shifting root!\n"));
962 		    t->root = sub;
963 		    sub->parent = NULL;
964 		    sfree(n);
965 		    n = NULL;
966 		}
967 	    }
968 	}
969 
970 	if (n)
971 	    n->counts[ki]--;
972 	n = sub;
973     }
974 
975     /*
976      * Now n is a leaf node, and ki marks the element number we
977      * want to delete. We've already arranged for the leaf to be
978      * bigger than minimum size, so let's just go to it.
979      */
980     assert(!n->kids[0]);
981     if (!retval)
982 	retval = n->elems[ki];
983 
984     for (i = ki; i < 2 && n->elems[i+1]; i++)
985 	n->elems[i] = n->elems[i+1];
986     n->elems[i] = NULL;
987 
988     /*
989      * It's just possible that we have reduced the leaf to zero
990      * size. This can only happen if it was the root - so destroy
991      * it and make the tree empty.
992      */
993     if (!n->elems[0]) {
994 	LOG(("  removed last element in tree, destroying empty root\n"));
995 	assert(n == t->root);
996 	sfree(n);
997 	t->root = NULL;
998     }
999 
1000     return retval;		       /* finished! */
1001 }
delpos234(tree234 * t,int index)1002 void *delpos234(tree234 *t, int index) {
1003     if (index < 0 || index >= countnode234(t->root))
1004 	return NULL;
1005     return delpos234_internal(t, index);
1006 }
del234(tree234 * t,void * e)1007 void *del234(tree234 *t, void *e) {
1008     int index;
1009     if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
1010 	return NULL;		       /* it wasn't in there anyway */
1011     return delpos234_internal(t, index); /* it's there; delete it. */
1012 }
1013 
1014 /*
1015  * Join two subtrees together with a separator element between
1016  * them, given their relative height.
1017  *
1018  * (Height<0 means the left tree is shorter, >0 means the right
1019  * tree is shorter, =0 means (duh) they're equal.)
1020  *
1021  * It is assumed that any checks needed on the ordering criterion
1022  * have _already_ been done.
1023  *
1024  * The value returned in `height' is 0 or 1 depending on whether the
1025  * resulting tree is the same height as the original larger one, or
1026  * one higher.
1027  */
join234_internal(node234 * left,void * sep,node234 * right,int * height)1028 static node234 *join234_internal(node234 *left, void *sep,
1029 				 node234 *right, int *height) {
1030     node234 *root, *node;
1031     int relht = *height;
1032     int ki;
1033 
1034     LOG(("  join: joining %p \"%s\" %p, relative height is %d\n",
1035 	 left, sep, right, relht));
1036     if (relht == 0) {
1037 	/*
1038 	 * The trees are the same height. Create a new one-element
1039 	 * root containing the separator and pointers to the two
1040 	 * nodes.
1041 	 */
1042 	node234 *newroot;
1043 	newroot = snew(node234);
1044 	newroot->kids[0] = left;     newroot->counts[0] = countnode234(left);
1045 	newroot->elems[0] = sep;
1046 	newroot->kids[1] = right;    newroot->counts[1] = countnode234(right);
1047 	newroot->elems[1] = NULL;
1048 	newroot->kids[2] = NULL;     newroot->counts[2] = 0;
1049 	newroot->elems[2] = NULL;
1050 	newroot->kids[3] = NULL;     newroot->counts[3] = 0;
1051 	newroot->parent = NULL;
1052 	if (left) left->parent = newroot;
1053 	if (right) right->parent = newroot;
1054 	*height = 1;
1055 	LOG(("  join: same height, brand new root\n"));
1056 	return newroot;
1057     }
1058 
1059     /*
1060      * This now works like the addition algorithm on the larger
1061      * tree. We're replacing a single kid pointer with two kid
1062      * pointers separated by an element; if that causes the node to
1063      * overload, we split it in two, move a separator element up to
1064      * the next node, and repeat.
1065      */
1066     if (relht < 0) {
1067 	/*
1068 	 * Left tree is shorter. Search down the right tree to find
1069 	 * the pointer we're inserting at.
1070 	 */
1071 	node = root = right;
1072 	while (++relht < 0) {
1073 	    node = node->kids[0];
1074 	}
1075 	ki = 0;
1076 	right = node->kids[ki];
1077     } else {
1078 	/*
1079 	 * Right tree is shorter; search down the left to find the
1080 	 * pointer we're inserting at.
1081 	 */
1082 	node = root = left;
1083 	while (--relht > 0) {
1084 	    if (node->elems[2])
1085 		node = node->kids[3];
1086 	    else if (node->elems[1])
1087 		node = node->kids[2];
1088 	    else
1089 		node = node->kids[1];
1090 	}
1091 	if (node->elems[2])
1092 	    ki = 3;
1093 	else if (node->elems[1])
1094 	    ki = 2;
1095 	else
1096 	    ki = 1;
1097 	left = node->kids[ki];
1098     }
1099 
1100     /*
1101      * Now proceed as for addition.
1102      */
1103     *height = add234_insert(left, sep, right, &root, node, ki);
1104 
1105     return root;
1106 }
height234(tree234 * t)1107 static int height234(tree234 *t) {
1108     int level = 0;
1109     node234 *n = t->root;
1110     while (n) {
1111 	level++;
1112 	n = n->kids[0];
1113     }
1114     return level;
1115 }
join234(tree234 * t1,tree234 * t2)1116 tree234 *join234(tree234 *t1, tree234 *t2) {
1117     int size2 = countnode234(t2->root);
1118     if (size2 > 0) {
1119 	void *element;
1120 	int relht;
1121 
1122 	if (t1->cmp) {
1123 	    element = index234(t2, 0);
1124 	    element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
1125 	    if (element)
1126 		return NULL;
1127 	}
1128 
1129 	element = delpos234(t2, 0);
1130 	relht = height234(t1) - height234(t2);
1131 	t1->root = join234_internal(t1->root, element, t2->root, &relht);
1132 	t2->root = NULL;
1133     }
1134     return t1;
1135 }
join234r(tree234 * t1,tree234 * t2)1136 tree234 *join234r(tree234 *t1, tree234 *t2) {
1137     int size1 = countnode234(t1->root);
1138     if (size1 > 0) {
1139 	void *element;
1140 	int relht;
1141 
1142 	if (t2->cmp) {
1143 	    element = index234(t1, size1-1);
1144 	    element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
1145 	    if (element)
1146 		return NULL;
1147 	}
1148 
1149 	element = delpos234(t1, size1-1);
1150 	relht = height234(t1) - height234(t2);
1151 	t2->root = join234_internal(t1->root, element, t2->root, &relht);
1152 	t1->root = NULL;
1153     }
1154     return t2;
1155 }
1156 
1157 /*
1158  * Split out the first <index> elements in a tree and return a
1159  * pointer to the root node. Leave the root node of the remainder
1160  * in t.
1161  */
split234_internal(tree234 * t,int index)1162 static node234 *split234_internal(tree234 *t, int index) {
1163     node234 *halves[2], *n, *sib, *sub;
1164     node234 *lparent, *rparent;
1165     int ki, pki, i, half, lcount, rcount;
1166 
1167     n = t->root;
1168     LOG(("splitting tree %p at point %d\n", t, index));
1169 
1170     /*
1171      * Easy special cases. After this we have also dealt completely
1172      * with the empty-tree case and we can assume the root exists.
1173      */
1174     if (index == 0)		       /* return nothing */
1175 	return NULL;
1176     if (index == countnode234(t->root)) {   /* return the whole tree */
1177 	node234 *ret = t->root;
1178 	t->root = NULL;
1179 	return ret;
1180     }
1181 
1182     /*
1183      * Search down the tree to find the split point.
1184      */
1185     lparent = rparent = NULL;
1186     while (n) {
1187 	LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
1188 	     n,
1189 	     n->kids[0], n->counts[0], n->elems[0],
1190 	     n->kids[1], n->counts[1], n->elems[1],
1191 	     n->kids[2], n->counts[2], n->elems[2],
1192 	     n->kids[3], n->counts[3],
1193 	     index));
1194 	lcount = index;
1195 	rcount = countnode234(n) - lcount;
1196 	if (index <= n->counts[0]) {
1197 	    ki = 0;
1198 	} else if (index -= n->counts[0]+1, index <= n->counts[1]) {
1199 	    ki = 1;
1200 	} else if (index -= n->counts[1]+1, index <= n->counts[2]) {
1201 	    ki = 2;
1202 	} else {
1203 	    index -= n->counts[2]+1;
1204 	    ki = 3;
1205 	}
1206 
1207 	LOG(("  splitting at subtree %d\n", ki));
1208 	sub = n->kids[ki];
1209 
1210 	LOG(("  splitting at child index %d\n", ki));
1211 
1212 	/*
1213 	 * Split the node, put halves[0] on the right of the left
1214 	 * one and halves[1] on the left of the right one, put the
1215 	 * new node pointers in halves[0] and halves[1], and go up
1216 	 * a level.
1217 	 */
1218 	sib = snew(node234);
1219 	for (i = 0; i < 3; i++) {
1220 	    if (i+ki < 3 && n->elems[i+ki]) {
1221 		sib->elems[i] = n->elems[i+ki];
1222 		sib->kids[i+1] = n->kids[i+ki+1];
1223 		if (sib->kids[i+1]) sib->kids[i+1]->parent = sib;
1224 		sib->counts[i+1] = n->counts[i+ki+1];
1225 		n->elems[i+ki] = NULL;
1226 		n->kids[i+ki+1] = NULL;
1227 		n->counts[i+ki+1] = 0;
1228 	    } else {
1229 		sib->elems[i] = NULL;
1230 		sib->kids[i+1] = NULL;
1231 		sib->counts[i+1] = 0;
1232 	    }
1233 	}
1234 	if (lparent) {
1235 	    lparent->kids[pki] = n;
1236 	    lparent->counts[pki] = lcount;
1237 	    n->parent = lparent;
1238 	    rparent->kids[0] = sib;
1239 	    rparent->counts[0] = rcount;
1240 	    sib->parent = rparent;
1241 	} else {
1242 	    halves[0] = n;
1243 	    n->parent = NULL;
1244 	    halves[1] = sib;
1245 	    sib->parent = NULL;
1246 	}
1247 	lparent = n;
1248 	rparent = sib;
1249 	pki = ki;
1250 	LOG(("  left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1251 	     n,
1252 	     n->kids[0], n->counts[0], n->elems[0],
1253 	     n->kids[1], n->counts[1], n->elems[1],
1254 	     n->kids[2], n->counts[2], n->elems[2],
1255 	     n->kids[3], n->counts[3]));
1256 	LOG(("  right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1257 	     sib,
1258 	     sib->kids[0], sib->counts[0], sib->elems[0],
1259 	     sib->kids[1], sib->counts[1], sib->elems[1],
1260 	     sib->kids[2], sib->counts[2], sib->elems[2],
1261 	     sib->kids[3], sib->counts[3]));
1262 
1263 	n = sub;
1264     }
1265 
1266     /*
1267      * We've come off the bottom here, so we've successfully split
1268      * the tree into two equally high subtrees. The only problem is
1269      * that some of the nodes down the fault line will be smaller
1270      * than the minimum permitted size. (Since this is a 2-3-4
1271      * tree, that means they'll be zero-element one-child nodes.)
1272      */
1273     LOG(("  fell off bottom, lroot is %p, rroot is %p\n",
1274 	 halves[0], halves[1]));
1275     lparent->counts[pki] = rparent->counts[0] = 0;
1276     lparent->kids[pki] = rparent->kids[0] = NULL;
1277 
1278     /*
1279      * So now we go back down the tree from each of the two roots,
1280      * fixing up undersize nodes.
1281      */
1282     for (half = 0; half < 2; half++) {
1283 	/*
1284 	 * Remove the root if it's undersize (it will contain only
1285 	 * one child pointer, so just throw it away and replace it
1286 	 * with its child). This might happen several times.
1287 	 */
1288 	while (halves[half] && !halves[half]->elems[0]) {
1289 	    LOG(("  root %p is undersize, throwing away\n", halves[half]));
1290 	    halves[half] = halves[half]->kids[0];
1291 	    sfree(halves[half]->parent);
1292 	    halves[half]->parent = NULL;
1293 	    LOG(("  new root is %p\n", halves[half]));
1294 	}
1295 
1296 	n = halves[half];
1297 	while (n) {
1298 	    void (*toward)(node234 *n, int ki, int *k, int *index);
1299 	    int ni, merge;
1300 
1301 	    /*
1302 	     * Now we have a potentially undersize node on the
1303 	     * right (if half==0) or left (if half==1). Sort it
1304 	     * out, by merging with a neighbour or by transferring
1305 	     * subtrees over. At this time we must also ensure that
1306 	     * nodes are bigger than minimum, in case we need an
1307 	     * element to merge two nodes below.
1308 	     */
1309 	    LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
1310 		 n,
1311 		 n->kids[0], n->counts[0], n->elems[0],
1312 		 n->kids[1], n->counts[1], n->elems[1],
1313 		 n->kids[2], n->counts[2], n->elems[2],
1314 		 n->kids[3], n->counts[3]));
1315 	    if (half == 1) {
1316 		ki = 0;		       /* the kid we're interested in */
1317 		ni = 1;		       /* the neighbour */
1318 		merge = 0;	       /* for merge: leftmost of the two */
1319 		toward = trans234_subtree_left;
1320 	    } else {
1321 		ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1);
1322 		ni = ki-1;
1323 		merge = ni;
1324 		toward = trans234_subtree_right;
1325 	    }
1326 
1327 	    sub = n->kids[ki];
1328 	    if (sub && !sub->elems[1]) {
1329 		/*
1330 		 * This node is undersized or minimum-size. If we
1331 		 * can merge it with its neighbour, we do so;
1332 		 * otherwise we must be able to transfer subtrees
1333 		 * over to it until it is greater than minimum
1334 		 * size.
1335 		 */
1336 		int undersized = (!sub->elems[0]);
1337 		LOG(("  child %d is %ssize\n", ki,
1338 		     undersized ? "under" : "minimum-"));
1339 		LOG(("  neighbour is %s\n",
1340 		     n->kids[ni]->elems[2] ? "large" :
1341 		     n->kids[ni]->elems[1] ? "medium" : "small"));
1342 		if (!n->kids[ni]->elems[1] ||
1343 		    (undersized && !n->kids[ni]->elems[2])) {
1344 		    /*
1345 		     * Neighbour is small, or possibly neighbour is
1346 		     * medium and we are undersize.
1347 		     */
1348 		    trans234_subtree_merge(n, merge, NULL, NULL);
1349 		    sub = n->kids[merge];
1350 		    if (!n->elems[0]) {
1351 			/*
1352 			 * n is empty, and hence must have been the
1353 			 * root and needs to be removed.
1354 			 */
1355 			assert(!n->parent);
1356 			LOG(("  shifting root!\n"));
1357 			halves[half] = sub;
1358 			halves[half]->parent = NULL;
1359 			sfree(n);
1360 		    }
1361 		} else {
1362 		    /* Neighbour is big enough to move trees over. */
1363 		    toward(n, ni, NULL, NULL);
1364 		    if (undersized)
1365 			toward(n, ni, NULL, NULL);
1366 		}
1367 	    }
1368 	    n = sub;
1369 	}
1370     }
1371 
1372     t->root = halves[1];
1373     return halves[0];
1374 }
splitpos234(tree234 * t,int index,int before)1375 tree234 *splitpos234(tree234 *t, int index, int before) {
1376     tree234 *ret;
1377     node234 *n;
1378     int count;
1379 
1380     count = countnode234(t->root);
1381     if (index < 0 || index > count)
1382 	return NULL;		       /* error */
1383     ret = newtree234(t->cmp);
1384     n = split234_internal(t, index);
1385     if (before) {
1386 	/* We want to return the ones before the index. */
1387 	ret->root = n;
1388     } else {
1389 	/*
1390 	 * We want to keep the ones before the index and return the
1391 	 * ones after.
1392 	 */
1393 	ret->root = t->root;
1394 	t->root = n;
1395     }
1396     return ret;
1397 }
split234(tree234 * t,void * e,cmpfn234 cmp,int rel)1398 tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) {
1399     int before;
1400     int index;
1401 
1402     assert(rel != REL234_EQ);
1403 
1404     if (rel == REL234_GT || rel == REL234_GE) {
1405 	before = 1;
1406 	rel = (rel == REL234_GT ? REL234_LE : REL234_LT);
1407     } else {
1408 	before = 0;
1409     }
1410     if (!findrelpos234(t, e, cmp, rel, &index))
1411 	index = 0;
1412 
1413     return splitpos234(t, index+1, before);
1414 }
1415 
copynode234(node234 * n,copyfn234 copyfn,void * copyfnstate)1416 static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) {
1417     int i;
1418     node234 *n2 = snew(node234);
1419 
1420     for (i = 0; i < 3; i++) {
1421 	if (n->elems[i] && copyfn)
1422 	    n2->elems[i] = copyfn(copyfnstate, n->elems[i]);
1423 	else
1424 	    n2->elems[i] = n->elems[i];
1425     }
1426 
1427     for (i = 0; i < 4; i++) {
1428 	if (n->kids[i]) {
1429 	    n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate);
1430 	    n2->kids[i]->parent = n2;
1431 	} else {
1432 	    n2->kids[i] = NULL;
1433 	}
1434 	n2->counts[i] = n->counts[i];
1435     }
1436 
1437     return n2;
1438 }
copytree234(tree234 * t,copyfn234 copyfn,void * copyfnstate)1439 tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) {
1440     tree234 *t2;
1441 
1442     t2 = newtree234(t->cmp);
1443     if (t->root) {
1444 	t2->root = copynode234(t->root, copyfn, copyfnstate);
1445 	t2->root->parent = NULL;
1446     } else
1447 	t2->root = NULL;
1448 
1449     return t2;
1450 }
1451 
1452 #ifdef TEST
1453 
1454 /*
1455  * Test code for the 2-3-4 tree. This code maintains an alternative
1456  * representation of the data in the tree, in an array (using the
1457  * obvious and slow insert and delete functions). After each tree
1458  * operation, the verify() function is called, which ensures all
1459  * the tree properties are preserved:
1460  *  - node->child->parent always equals node
1461  *  - tree->root->parent always equals NULL
1462  *  - number of kids == 0 or number of elements + 1;
1463  *  - tree has the same depth everywhere
1464  *  - every node has at least one element
1465  *  - subtree element counts are accurate
1466  *  - any NULL kid pointer is accompanied by a zero count
1467  *  - in a sorted tree: ordering property between elements of a
1468  *    node and elements of its children is preserved
1469  * and also ensures the list represented by the tree is the same
1470  * list it should be. (This last check also doubly verifies the
1471  * ordering properties, because the `same list it should be' is by
1472  * definition correctly ordered. It also ensures all nodes are
1473  * distinct, because the enum functions would get caught in a loop
1474  * if not.)
1475  */
1476 
1477 #include <stdarg.h>
1478 
1479 #define srealloc realloc
1480 
1481 /*
1482  * Error reporting function.
1483  */
error(char * fmt,...)1484 void error(char *fmt, ...) {
1485     va_list ap;
1486     printf("ERROR: ");
1487     va_start(ap, fmt);
1488     vfprintf(stdout, fmt, ap);
1489     va_end(ap);
1490     printf("\n");
1491 }
1492 
1493 /* The array representation of the data. */
1494 void **array;
1495 int arraylen, arraysize;
1496 cmpfn234 cmp;
1497 
1498 /* The tree representation of the same data. */
1499 tree234 *tree;
1500 
1501 /*
1502  * Routines to provide a diagnostic printout of a tree. Currently
1503  * relies on every element in the tree being a one-character string
1504  * :-)
1505  */
1506 typedef struct {
1507     char **levels;
1508 } dispctx;
1509 
dispnode(node234 * n,int level,dispctx * ctx)1510 int dispnode(node234 *n, int level, dispctx *ctx) {
1511     if (level == 0) {
1512 	int xpos = strlen(ctx->levels[0]);
1513 	int len;
1514 
1515 	if (n->elems[2])
1516 	    len = sprintf(ctx->levels[0]+xpos, " %s%s%s",
1517 			  n->elems[0], n->elems[1], n->elems[2]);
1518 	else if (n->elems[1])
1519 	    len = sprintf(ctx->levels[0]+xpos, " %s%s",
1520 			  n->elems[0], n->elems[1]);
1521 	else
1522 	    len = sprintf(ctx->levels[0]+xpos, " %s",
1523 			  n->elems[0]);
1524 	return xpos + 1 + (len-1) / 2;
1525     } else {
1526 	int xpos[4], nkids;
1527 	int nodelen, mypos, myleft, x, i;
1528 
1529 	xpos[0] = dispnode(n->kids[0], level-3, ctx);
1530 	xpos[1] = dispnode(n->kids[1], level-3, ctx);
1531 	nkids = 2;
1532 	if (n->kids[2]) {
1533 	    xpos[2] = dispnode(n->kids[2], level-3, ctx);
1534 	    nkids = 3;
1535 	}
1536 	if (n->kids[3]) {
1537 	    xpos[3] = dispnode(n->kids[3], level-3, ctx);
1538 	    nkids = 4;
1539 	}
1540 
1541 	if (nkids == 4)
1542 	    mypos = (xpos[1] + xpos[2]) / 2;
1543 	else if (nkids == 3)
1544 	    mypos = xpos[1];
1545 	else
1546 	    mypos = (xpos[0] + xpos[1]) / 2;
1547 	nodelen = nkids * 2 - 1;
1548 	myleft = mypos - ((nodelen-1)/2);
1549 	assert(myleft >= xpos[0]);
1550 	assert(myleft + nodelen-1 <= xpos[nkids-1]);
1551 
1552 	x = strlen(ctx->levels[level]);
1553 	while (x <= xpos[0] && x < myleft)
1554 	    ctx->levels[level][x++] = ' ';
1555 	while (x < myleft)
1556 	    ctx->levels[level][x++] = '_';
1557 	if (nkids==4)
1558 	    x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.",
1559 			 n->elems[0], n->elems[1], n->elems[2]);
1560 	else if (nkids==3)
1561 	    x += sprintf(ctx->levels[level]+x, ".%s.%s.",
1562 			 n->elems[0], n->elems[1]);
1563 	else
1564 	    x += sprintf(ctx->levels[level]+x, ".%s.",
1565 			 n->elems[0]);
1566 	while (x < xpos[nkids-1])
1567 	    ctx->levels[level][x++] = '_';
1568 	ctx->levels[level][x] = '\0';
1569 
1570 	x = strlen(ctx->levels[level-1]);
1571 	for (i = 0; i < nkids; i++) {
1572 	    int rpos, pos;
1573 	    rpos = xpos[i];
1574 	    if (i > 0 && i < nkids-1)
1575 		pos = myleft + 2*i;
1576 	    else
1577 		pos = rpos;
1578 	    if (rpos < pos)
1579 		rpos++;
1580 	    while (x < pos && x < rpos)
1581 		ctx->levels[level-1][x++] = ' ';
1582 	    if (x == pos)
1583 		ctx->levels[level-1][x++] = '|';
1584 	    while (x < pos || x < rpos)
1585 		ctx->levels[level-1][x++] = '_';
1586 	    if (x == pos)
1587 		ctx->levels[level-1][x++] = '|';
1588 	}
1589 	ctx->levels[level-1][x] = '\0';
1590 
1591 	x = strlen(ctx->levels[level-2]);
1592 	for (i = 0; i < nkids; i++) {
1593 	    int rpos = xpos[i];
1594 
1595 	    while (x < rpos)
1596 		ctx->levels[level-2][x++] = ' ';
1597 	    ctx->levels[level-2][x++] = '|';
1598 	}
1599 	ctx->levels[level-2][x] = '\0';
1600 
1601 	return mypos;
1602     }
1603 }
1604 
disptree(tree234 * t)1605 void disptree(tree234 *t) {
1606     dispctx ctx;
1607     char *leveldata;
1608     int width = count234(t);
1609     int ht = height234(t) * 3 - 2;
1610     int i;
1611 
1612     if (!t->root) {
1613 	printf("[empty tree]\n");
1614     }
1615 
1616     leveldata = smalloc(ht * (width+2));
1617     ctx.levels = smalloc(ht * sizeof(char *));
1618     for (i = 0; i < ht; i++) {
1619 	ctx.levels[i] = leveldata + i * (width+2);
1620 	ctx.levels[i][0] = '\0';
1621     }
1622 
1623     (void) dispnode(t->root, ht-1, &ctx);
1624 
1625     for (i = ht; i-- ;)
1626 	printf("%s\n", ctx.levels[i]);
1627 
1628     sfree(ctx.levels);
1629     sfree(leveldata);
1630 }
1631 
1632 typedef struct {
1633     int treedepth;
1634     int elemcount;
1635 } chkctx;
1636 
chknode(chkctx * ctx,int level,node234 * node,void * lowbound,void * highbound)1637 int chknode(chkctx *ctx, int level, node234 *node,
1638 	    void *lowbound, void *highbound) {
1639     int nkids, nelems;
1640     int i;
1641     int count;
1642 
1643     /* Count the non-NULL kids. */
1644     for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
1645     /* Ensure no kids beyond the first NULL are non-NULL. */
1646     for (i = nkids; i < 4; i++)
1647         if (node->kids[i]) {
1648             error("node %p: nkids=%d but kids[%d] non-NULL",
1649                    node, nkids, i);
1650         } else if (node->counts[i]) {
1651             error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
1652                    node, i, i, node->counts[i]);
1653 	}
1654 
1655     /* Count the non-NULL elements. */
1656     for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
1657     /* Ensure no elements beyond the first NULL are non-NULL. */
1658     for (i = nelems; i < 3; i++)
1659         if (node->elems[i]) {
1660             error("node %p: nelems=%d but elems[%d] non-NULL",
1661                    node, nelems, i);
1662         }
1663 
1664     if (nkids == 0) {
1665         /*
1666          * If nkids==0, this is a leaf node; verify that the tree
1667          * depth is the same everywhere.
1668          */
1669         if (ctx->treedepth < 0)
1670             ctx->treedepth = level;    /* we didn't know the depth yet */
1671         else if (ctx->treedepth != level)
1672             error("node %p: leaf at depth %d, previously seen depth %d",
1673                    node, level, ctx->treedepth);
1674     } else {
1675         /*
1676          * If nkids != 0, then it should be nelems+1, unless nelems
1677          * is 0 in which case nkids should also be 0 (and so we
1678          * shouldn't be in this condition at all).
1679          */
1680         int shouldkids = (nelems ? nelems+1 : 0);
1681         if (nkids != shouldkids) {
1682             error("node %p: %d elems should mean %d kids but has %d",
1683                    node, nelems, shouldkids, nkids);
1684         }
1685     }
1686 
1687     /*
1688      * nelems should be at least 1.
1689      */
1690     if (nelems == 0) {
1691         error("node %p: no elems", node, nkids);
1692     }
1693 
1694     /*
1695      * Add nelems to the running element count of the whole tree.
1696      */
1697     ctx->elemcount += nelems;
1698 
1699     /*
1700      * Check ordering property: all elements should be strictly >
1701      * lowbound, strictly < highbound, and strictly < each other in
1702      * sequence. (lowbound and highbound are NULL at edges of tree
1703      * - both NULL at root node - and NULL is considered to be <
1704      * everything and > everything. IYSWIM.)
1705      */
1706     if (cmp) {
1707 	for (i = -1; i < nelems; i++) {
1708 	    void *lower = (i == -1 ? lowbound : node->elems[i]);
1709 	    void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1710 	    if (lower && higher && cmp(lower, higher) >= 0) {
1711 		error("node %p: kid comparison [%d=%s,%d=%s] failed",
1712 		      node, i, lower, i+1, higher);
1713 	    }
1714 	}
1715     }
1716 
1717     /*
1718      * Check parent pointers: all non-NULL kids should have a
1719      * parent pointer coming back to this node.
1720      */
1721     for (i = 0; i < nkids; i++)
1722         if (node->kids[i]->parent != node) {
1723             error("node %p kid %d: parent ptr is %p not %p",
1724                    node, i, node->kids[i]->parent, node);
1725         }
1726 
1727 
1728     /*
1729      * Now (finally!) recurse into subtrees.
1730      */
1731     count = nelems;
1732 
1733     for (i = 0; i < nkids; i++) {
1734         void *lower = (i == 0 ? lowbound : node->elems[i-1]);
1735         void *higher = (i >= nelems ? highbound : node->elems[i]);
1736 	int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
1737 	if (node->counts[i] != subcount) {
1738 	    error("node %p kid %d: count says %d, subtree really has %d",
1739 		  node, i, node->counts[i], subcount);
1740 	}
1741         count += subcount;
1742     }
1743 
1744     return count;
1745 }
1746 
verifytree(tree234 * tree,void ** array,int arraylen)1747 void verifytree(tree234 *tree, void **array, int arraylen) {
1748     chkctx ctx;
1749     int i;
1750     void *p;
1751 
1752     ctx.treedepth = -1;                /* depth unknown yet */
1753     ctx.elemcount = 0;                 /* no elements seen yet */
1754     /*
1755      * Verify validity of tree properties.
1756      */
1757     if (tree->root) {
1758 	if (tree->root->parent != NULL)
1759 	    error("root->parent is %p should be null", tree->root->parent);
1760         chknode(&ctx, 0, tree->root, NULL, NULL);
1761     }
1762     printf("tree depth: %d\n", ctx.treedepth);
1763     /*
1764      * Enumerate the tree and ensure it matches up to the array.
1765      */
1766     for (i = 0; NULL != (p = index234(tree, i)); i++) {
1767         if (i >= arraylen)
1768             error("tree contains more than %d elements", arraylen);
1769         if (array[i] != p)
1770             error("enum at position %d: array says %s, tree says %s",
1771                    i, array[i], p);
1772     }
1773     if (ctx.elemcount != i) {
1774         error("tree really contains %d elements, enum gave %d",
1775                ctx.elemcount, i);
1776     }
1777     if (i < arraylen) {
1778         error("enum gave only %d elements, array has %d", i, arraylen);
1779     }
1780     i = count234(tree);
1781     if (ctx.elemcount != i) {
1782         error("tree really contains %d elements, count234 gave %d",
1783 	      ctx.elemcount, i);
1784     }
1785 }
verify(void)1786 void verify(void) { verifytree(tree, array, arraylen); }
1787 
internal_addtest(void * elem,int index,void * realret)1788 void internal_addtest(void *elem, int index, void *realret) {
1789     int i, j;
1790     void *retval;
1791 
1792     if (arraysize < arraylen+1) {
1793         arraysize = arraylen+1+256;
1794         array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
1795                  srealloc(array, arraysize*sizeof(*array)));
1796     }
1797 
1798     i = index;
1799     /* now i points to the first element >= elem */
1800     retval = elem;                  /* expect elem returned (success) */
1801     for (j = arraylen; j > i; j--)
1802 	array[j] = array[j-1];
1803     array[i] = elem;                /* add elem to array */
1804     arraylen++;
1805 
1806     if (realret != retval) {
1807         error("add: retval was %p expected %p", realret, retval);
1808     }
1809 
1810     verify();
1811 }
1812 
addtest(void * elem)1813 void addtest(void *elem) {
1814     int i;
1815     void *realret;
1816 
1817     realret = add234(tree, elem);
1818 
1819     i = 0;
1820     while (i < arraylen && cmp(elem, array[i]) > 0)
1821         i++;
1822     if (i < arraylen && !cmp(elem, array[i])) {
1823         void *retval = array[i];       /* expect that returned not elem */
1824 	if (realret != retval) {
1825 	    error("add: retval was %p expected %p", realret, retval);
1826 	}
1827     } else
1828 	internal_addtest(elem, i, realret);
1829 }
1830 
addpostest(void * elem,int i)1831 void addpostest(void *elem, int i) {
1832     void *realret;
1833 
1834     realret = addpos234(tree, elem, i);
1835 
1836     internal_addtest(elem, i, realret);
1837 }
1838 
delpostest(int i)1839 void delpostest(int i) {
1840     int index = i;
1841     void *elem = array[i], *ret;
1842 
1843     /* i points to the right element */
1844     while (i < arraylen-1) {
1845 	array[i] = array[i+1];
1846 	i++;
1847     }
1848     arraylen--;			       /* delete elem from array */
1849 
1850     if (tree->cmp)
1851 	ret = del234(tree, elem);
1852     else
1853 	ret = delpos234(tree, index);
1854 
1855     if (ret != elem) {
1856 	error("del returned %p, expected %p", ret, elem);
1857     }
1858 
1859     verify();
1860 }
1861 
deltest(void * elem)1862 void deltest(void *elem) {
1863     int i;
1864 
1865     i = 0;
1866     while (i < arraylen && cmp(elem, array[i]) > 0)
1867         i++;
1868     if (i >= arraylen || cmp(elem, array[i]) != 0)
1869         return;                        /* don't do it! */
1870     delpostest(i);
1871 }
1872 
1873 /* A sample data set and test utility. Designed for pseudo-randomness,
1874  * and yet repeatability. */
1875 
1876 /*
1877  * This random number generator uses the `portable implementation'
1878  * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1879  * change it if not.
1880  */
randomnumber(unsigned * seed)1881 int randomnumber(unsigned *seed) {
1882     *seed *= 1103515245;
1883     *seed += 12345;
1884     return ((*seed) / 65536) % 32768;
1885 }
1886 
mycmp(void * av,void * bv)1887 int mycmp(void *av, void *bv) {
1888     char const *a = (char const *)av;
1889     char const *b = (char const *)bv;
1890     return strcmp(a, b);
1891 }
1892 
1893 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1894 
1895 char *strings[] = {
1896     "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
1897     "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
1898     "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
1899     "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
1900     "m", "s", "l", "4",
1901 #if 0
1902     "a", "ab", "absque", "coram", "de",
1903     "palam", "clam", "cum", "ex", "e",
1904     "sine", "tenus", "pro", "prae",
1905     "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1906     "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1907     "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1908     "murfl", "spoo", "breen", "flarn", "octothorpe",
1909     "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1910     "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1911     "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1912     "wand", "ring", "amulet"
1913 #endif
1914 };
1915 
1916 #define NSTR lenof(strings)
1917 
findtest(void)1918 void findtest(void) {
1919     static const int rels[] = {
1920 	REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1921     };
1922     static const char *const relnames[] = {
1923 	"EQ", "GE", "LE", "LT", "GT"
1924     };
1925     int i, j, rel, index;
1926     char *p, *ret, *realret, *realret2;
1927     int lo, hi, mid, c;
1928 
1929     for (i = 0; i < (int)NSTR; i++) {
1930 	p = strings[i];
1931 	for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
1932 	    rel = rels[j];
1933 
1934 	    lo = 0; hi = arraylen-1;
1935 	    while (lo <= hi) {
1936 		mid = (lo + hi) / 2;
1937 		c = strcmp(p, array[mid]);
1938 		if (c < 0)
1939 		    hi = mid-1;
1940 		else if (c > 0)
1941 		    lo = mid+1;
1942 		else
1943 		    break;
1944 	    }
1945 
1946 	    if (c == 0) {
1947 		if (rel == REL234_LT)
1948 		    ret = (mid > 0 ? array[--mid] : NULL);
1949 		else if (rel == REL234_GT)
1950 		    ret = (mid < arraylen-1 ? array[++mid] : NULL);
1951 		else
1952 		    ret = array[mid];
1953 	    } else {
1954 		assert(lo == hi+1);
1955 		if (rel == REL234_LT || rel == REL234_LE) {
1956 		    mid = hi;
1957 		    ret = (hi >= 0 ? array[hi] : NULL);
1958 		} else if (rel == REL234_GT || rel == REL234_GE) {
1959 		    mid = lo;
1960 		    ret = (lo < arraylen ? array[lo] : NULL);
1961 		} else
1962 		    ret = NULL;
1963 	    }
1964 
1965 	    realret = findrelpos234(tree, p, NULL, rel, &index);
1966 	    if (realret != ret) {
1967 		error("find(\"%s\",%s) gave %s should be %s",
1968 		      p, relnames[j], realret, ret);
1969 	    }
1970 	    if (realret && index != mid) {
1971 		error("find(\"%s\",%s) gave %d should be %d",
1972 		      p, relnames[j], index, mid);
1973 	    }
1974 	    if (realret && rel == REL234_EQ) {
1975 		realret2 = index234(tree, index);
1976 		if (realret2 != realret) {
1977 		    error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1978 			  p, relnames[j], realret, index, index, realret2);
1979 		}
1980 	    }
1981 #if 0
1982 	    printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1983 		   realret, index);
1984 #endif
1985 	}
1986     }
1987 
1988     realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1989     if (arraylen && (realret != array[0] || index != 0)) {
1990 	error("find(NULL,GT) gave %s(%d) should be %s(0)",
1991 	      realret, index, array[0]);
1992     } else if (!arraylen && (realret != NULL)) {
1993 	error("find(NULL,GT) gave %s(%d) should be NULL",
1994 	      realret, index);
1995     }
1996 
1997     realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1998     if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
1999 	error("find(NULL,LT) gave %s(%d) should be %s(0)",
2000 	      realret, index, array[arraylen-1]);
2001     } else if (!arraylen && (realret != NULL)) {
2002 	error("find(NULL,LT) gave %s(%d) should be NULL",
2003 	      realret, index);
2004     }
2005 }
2006 
splittest(tree234 * tree,void ** array,int arraylen)2007 void splittest(tree234 *tree, void **array, int arraylen) {
2008     int i;
2009     tree234 *tree3, *tree4;
2010     for (i = 0; i <= arraylen; i++) {
2011 	tree3 = copytree234(tree, NULL, NULL);
2012 	tree4 = splitpos234(tree3, i, 0);
2013 	verifytree(tree3, array, i);
2014 	verifytree(tree4, array+i, arraylen-i);
2015 	join234(tree3, tree4);
2016 	freetree234(tree4);	       /* left empty by join */
2017 	verifytree(tree3, array, arraylen);
2018 	freetree234(tree3);
2019     }
2020 }
2021 
main(void)2022 int main(void) {
2023     int in[NSTR];
2024     int i, j, k;
2025     int tworoot, tmplen;
2026     unsigned seed = 0;
2027     tree234 *tree2, *tree3, *tree4;
2028     int c;
2029 
2030     setvbuf(stdout, NULL, _IOLBF, 0);
2031 
2032     for (i = 0; i < (int)NSTR; i++) in[i] = 0;
2033     array = NULL;
2034     arraylen = arraysize = 0;
2035     tree = newtree234(mycmp);
2036     cmp = mycmp;
2037 
2038     verify();
2039     for (i = 0; i < 10000; i++) {
2040         j = randomnumber(&seed);
2041         j %= NSTR;
2042         printf("trial: %d\n", i);
2043         if (in[j]) {
2044             printf("deleting %s (%d)\n", strings[j], j);
2045             deltest(strings[j]);
2046             in[j] = 0;
2047         } else {
2048             printf("adding %s (%d)\n", strings[j], j);
2049             addtest(strings[j]);
2050             in[j] = 1;
2051         }
2052 	disptree(tree);
2053 	findtest();
2054     }
2055 
2056     while (arraylen > 0) {
2057         j = randomnumber(&seed);
2058         j %= arraylen;
2059         deltest(array[j]);
2060     }
2061 
2062     freetree234(tree);
2063 
2064     /*
2065      * Now try an unsorted tree. We don't really need to test
2066      * delpos234 because we know del234 is based on it, so it's
2067      * already been tested in the above sorted-tree code; but for
2068      * completeness we'll use it to tear down our unsorted tree
2069      * once we've built it.
2070      */
2071     tree = newtree234(NULL);
2072     cmp = NULL;
2073     verify();
2074     for (i = 0; i < 1000; i++) {
2075 	printf("trial: %d\n", i);
2076 	j = randomnumber(&seed);
2077 	j %= NSTR;
2078 	k = randomnumber(&seed);
2079 	k %= count234(tree)+1;
2080 	printf("adding string %s at index %d\n", strings[j], k);
2081 	addpostest(strings[j], k);
2082     }
2083 
2084     /*
2085      * While we have this tree in its full form, we'll take a copy
2086      * of it to use in split and join testing.
2087      */
2088     tree2 = copytree234(tree, NULL, NULL);
2089     verifytree(tree2, array, arraylen);/* check the copy is accurate */
2090     /*
2091      * Split tests. Split the tree at every possible point and
2092      * check the resulting subtrees.
2093      */
2094     tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */
2095     splittest(tree2, array, arraylen);
2096     /*
2097      * Now do the split test again, but on a tree that has a 2-root
2098      * (if the previous one didn't) or doesn't (if the previous one
2099      * did).
2100      */
2101     tmplen = arraylen;
2102     while ((!tree2->root->elems[1]) == tworoot) {
2103 	delpos234(tree2, --tmplen);
2104     }
2105     printf("now trying splits on second tree\n");
2106     splittest(tree2, array, tmplen);
2107     freetree234(tree2);
2108 
2109     /*
2110      * Back to the main testing of uncounted trees.
2111      */
2112     while (count234(tree) > 0) {
2113 	printf("cleanup: tree size %d\n", count234(tree));
2114 	j = randomnumber(&seed);
2115 	j %= count234(tree);
2116 	printf("deleting string %s from index %d\n", (char *)array[j], j);
2117 	delpostest(j);
2118     }
2119     freetree234(tree);
2120 
2121     /*
2122      * Finally, do some testing on split/join on _sorted_ trees. At
2123      * the same time, we'll be testing split on very small trees.
2124      */
2125     tree = newtree234(mycmp);
2126     cmp = mycmp;
2127     arraylen = 0;
2128     for (i = 0; i < 17; i++) {
2129 	tree2 = copytree234(tree, NULL, NULL);
2130 	splittest(tree2, array, arraylen);
2131 	freetree234(tree2);
2132 	if (i < 16)
2133 	    addtest(strings[i]);
2134     }
2135     freetree234(tree);
2136 
2137     /*
2138      * Test silly cases of join: join(emptytree, emptytree), and
2139      * also ensure join correctly spots when sorted trees fail the
2140      * ordering constraint.
2141      */
2142     tree = newtree234(mycmp);
2143     tree2 = newtree234(mycmp);
2144     tree3 = newtree234(mycmp);
2145     tree4 = newtree234(mycmp);
2146     assert(mycmp(strings[0], strings[1]) < 0);   /* just in case :-) */
2147     add234(tree2, strings[1]);
2148     add234(tree4, strings[0]);
2149     array[0] = strings[0];
2150     array[1] = strings[1];
2151     verifytree(tree, array, 0);
2152     verifytree(tree2, array+1, 1);
2153     verifytree(tree3, array, 0);
2154     verifytree(tree4, array, 1);
2155 
2156     /*
2157      * So:
2158      *  - join(tree,tree3) should leave both tree and tree3 unchanged.
2159      *  - joinr(tree,tree2) should leave both tree and tree2 unchanged.
2160      *  - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
2161      *  - join(tree, tree2) should move the element from tree2 to tree.
2162      *  - joinr(tree4, tree3) should move the element from tree4 to tree3.
2163      *  - join(tree,tree3) should return NULL and leave both unchanged.
2164      *  - join(tree3,tree) should work and create a bigger tree in tree3.
2165      */
2166     assert(tree == join234(tree, tree3));
2167     verifytree(tree, array, 0);
2168     verifytree(tree3, array, 0);
2169     assert(tree2 == join234r(tree, tree2));
2170     verifytree(tree, array, 0);
2171     verifytree(tree2, array+1, 1);
2172     assert(tree4 == join234(tree4, tree3));
2173     verifytree(tree3, array, 0);
2174     verifytree(tree4, array, 1);
2175     assert(tree == join234(tree, tree2));
2176     verifytree(tree, array+1, 1);
2177     verifytree(tree2, array, 0);
2178     assert(tree3 == join234r(tree4, tree3));
2179     verifytree(tree3, array, 1);
2180     verifytree(tree4, array, 0);
2181     assert(NULL == join234(tree, tree3));
2182     verifytree(tree, array+1, 1);
2183     verifytree(tree3, array, 1);
2184     assert(tree3 == join234(tree3, tree));
2185     verifytree(tree3, array, 2);
2186     verifytree(tree, array, 0);
2187 
2188     return 0;
2189 }
2190 
2191 #endif
2192 
2193 #if 0 /* sorted list of strings might be useful */
2194 {
2195     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",
2196 }
2197 #endif
2198