1 /*
2  * tree234.c: reasonably generic counted 2-3-4 tree routines.
3  *
4  * This file is copyright 1999-2001 Simon Tatham.
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or
11  * sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <assert.h>
31 
32 #include "defs.h"
33 #include "tree234.h"
34 
35 #ifdef TEST
36 #define LOG(x) (printf x)
37 #define snew(type) ((type *)malloc(sizeof(type)))
38 #define snewn(n, type) ((type *)malloc((n) * sizeof(type)))
39 #define sresize(ptr, n, type)                                         \
40     ((type *)realloc(sizeof((type *)0 == (ptr)) ? (ptr) : (ptr),      \
41                      (n) * sizeof(type)))
42 #define sfree(ptr) free(ptr)
43 #else
44 #include "puttymem.h"
45 #define LOG(x)
46 #endif
47 
48 typedef struct node234_Tag node234;
49 
50 struct tree234_Tag {
51     node234 *root;
52     cmpfn234 cmp;
53 };
54 
55 struct node234_Tag {
56     node234 *parent;
57     node234 *kids[4];
58     int counts[4];
59     void *elems[3];
60 };
61 
62 /*
63  * Create a 2-3-4 tree.
64  */
newtree234(cmpfn234 cmp)65 tree234 *newtree234(cmpfn234 cmp)
66 {
67     tree234 *ret = snew(tree234);
68     LOG(("created tree %p\n", ret));
69     ret->root = NULL;
70     ret->cmp = cmp;
71     return ret;
72 }
73 
74 /*
75  * Free a 2-3-4 tree (not including freeing the elements).
76  */
freenode234(node234 * n)77 static void freenode234(node234 * n)
78 {
79     if (!n)
80         return;
81     freenode234(n->kids[0]);
82     freenode234(n->kids[1]);
83     freenode234(n->kids[2]);
84     freenode234(n->kids[3]);
85     sfree(n);
86 }
87 
freetree234(tree234 * t)88 void freetree234(tree234 * t)
89 {
90     freenode234(t->root);
91     sfree(t);
92 }
93 
94 /*
95  * Internal function to count a node.
96  */
countnode234(node234 * n)97 static int countnode234(node234 * n)
98 {
99     int count = 0;
100     int i;
101     if (!n)
102         return 0;
103     for (i = 0; i < 4; i++)
104         count += n->counts[i];
105     for (i = 0; i < 3; i++)
106         if (n->elems[i])
107             count++;
108     return count;
109 }
110 
111 /*
112  * Internal function to return the number of elements in a node.
113  */
elements234(node234 * n)114 static int elements234(node234 *n)
115 {
116     int i;
117     for (i = 0; i < 3; i++)
118         if (!n->elems[i])
119             break;
120     return i;
121 }
122 
123 /*
124  * Count the elements in a tree.
125  */
count234(tree234 * t)126 int count234(tree234 * t)
127 {
128     if (t->root)
129         return countnode234(t->root);
130     else
131         return 0;
132 }
133 
134 /*
135  * Add an element e to a 2-3-4 tree t. Returns e on success, or if
136  * an existing element compares equal, returns that.
137  */
add234_internal(tree234 * t,void * e,int index)138 static void *add234_internal(tree234 * t, void *e, int index)
139 {
140     node234 *n, **np, *left, *right;
141     void *orig_e = e;
142     int c, lcount, rcount;
143 
144     LOG(("adding node %p to tree %p\n", e, t));
145     if (t->root == NULL) {
146         t->root = snew(node234);
147         t->root->elems[1] = t->root->elems[2] = NULL;
148         t->root->kids[0] = t->root->kids[1] = NULL;
149         t->root->kids[2] = t->root->kids[3] = NULL;
150         t->root->counts[0] = t->root->counts[1] = 0;
151         t->root->counts[2] = t->root->counts[3] = 0;
152         t->root->parent = NULL;
153         t->root->elems[0] = e;
154         LOG(("  created root %p\n", t->root));
155         return orig_e;
156     }
157 
158     n = NULL; /* placate gcc; will always be set below since t->root != NULL */
159     np = &t->root;
160     while (*np) {
161         int childnum;
162         n = *np;
163         LOG(("  node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
164              n,
165              n->kids[0], n->counts[0], n->elems[0],
166              n->kids[1], n->counts[1], n->elems[1],
167              n->kids[2], n->counts[2], n->elems[2],
168              n->kids[3], n->counts[3]));
169         if (index >= 0) {
170             if (!n->kids[0]) {
171                 /*
172                  * Leaf node. We want to insert at kid position
173                  * equal to the index:
174                  *
175                  *   0 A 1 B 2 C 3
176                  */
177                 childnum = index;
178             } else {
179                 /*
180                  * Internal node. We always descend through it (add
181                  * always starts at the bottom, never in the
182                  * middle).
183                  */
184                 do {                   /* this is a do ... while (0) to allow `break' */
185                     if (index <= n->counts[0]) {
186                         childnum = 0;
187                         break;
188                     }
189                     index -= n->counts[0] + 1;
190                     if (index <= n->counts[1]) {
191                         childnum = 1;
192                         break;
193                     }
194                     index -= n->counts[1] + 1;
195                     if (index <= n->counts[2]) {
196                         childnum = 2;
197                         break;
198                     }
199                     index -= n->counts[2] + 1;
200                     if (index <= n->counts[3]) {
201                         childnum = 3;
202                         break;
203                     }
204                     return NULL;       /* error: index out of range */
205                 } while (0);
206             }
207         } else {
208             if ((c = t->cmp(e, n->elems[0])) < 0)
209                 childnum = 0;
210             else if (c == 0)
211                 return n->elems[0];    /* already exists */
212             else if (n->elems[1] == NULL
213                      || (c = t->cmp(e, n->elems[1])) < 0) childnum = 1;
214             else if (c == 0)
215                 return n->elems[1];    /* already exists */
216             else if (n->elems[2] == NULL
217                      || (c = t->cmp(e, n->elems[2])) < 0) childnum = 2;
218             else if (c == 0)
219                 return n->elems[2];    /* already exists */
220             else
221                 childnum = 3;
222         }
223         np = &n->kids[childnum];
224         LOG(("  moving to child %d (%p)\n", childnum, *np));
225     }
226 
227     /*
228      * We need to insert the new element in n at position np.
229      */
230     left = NULL;
231     lcount = 0;
232     right = NULL;
233     rcount = 0;
234     while (n) {
235         LOG(("  at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
236              n,
237              n->kids[0], n->counts[0], n->elems[0],
238              n->kids[1], n->counts[1], n->elems[1],
239              n->kids[2], n->counts[2], n->elems[2],
240              n->kids[3], n->counts[3]));
241         LOG(("  need to insert %p/%d [%p] %p/%d at position %d\n",
242              left, lcount, e, right, rcount, (int)(np - n->kids)));
243         if (n->elems[1] == NULL) {
244             /*
245              * Insert in a 2-node; simple.
246              */
247             if (np == &n->kids[0]) {
248                 LOG(("  inserting on left of 2-node\n"));
249                 n->kids[2] = n->kids[1];
250                 n->counts[2] = n->counts[1];
251                 n->elems[1] = n->elems[0];
252                 n->kids[1] = right;
253                 n->counts[1] = rcount;
254                 n->elems[0] = e;
255                 n->kids[0] = left;
256                 n->counts[0] = lcount;
257             } else {                   /* np == &n->kids[1] */
258                 LOG(("  inserting on right of 2-node\n"));
259                 n->kids[2] = right;
260                 n->counts[2] = rcount;
261                 n->elems[1] = e;
262                 n->kids[1] = left;
263                 n->counts[1] = lcount;
264             }
265             if (n->kids[0])
266                 n->kids[0]->parent = n;
267             if (n->kids[1])
268                 n->kids[1]->parent = n;
269             if (n->kids[2])
270                 n->kids[2]->parent = n;
271             LOG(("  done\n"));
272             break;
273         } else if (n->elems[2] == NULL) {
274             /*
275              * Insert in a 3-node; simple.
276              */
277             if (np == &n->kids[0]) {
278                 LOG(("  inserting on left of 3-node\n"));
279                 n->kids[3] = n->kids[2];
280                 n->counts[3] = n->counts[2];
281                 n->elems[2] = n->elems[1];
282                 n->kids[2] = n->kids[1];
283                 n->counts[2] = n->counts[1];
284                 n->elems[1] = n->elems[0];
285                 n->kids[1] = right;
286                 n->counts[1] = rcount;
287                 n->elems[0] = e;
288                 n->kids[0] = left;
289                 n->counts[0] = lcount;
290             } else if (np == &n->kids[1]) {
291                 LOG(("  inserting in middle of 3-node\n"));
292                 n->kids[3] = n->kids[2];
293                 n->counts[3] = n->counts[2];
294                 n->elems[2] = n->elems[1];
295                 n->kids[2] = right;
296                 n->counts[2] = rcount;
297                 n->elems[1] = e;
298                 n->kids[1] = left;
299                 n->counts[1] = lcount;
300             } else {                   /* np == &n->kids[2] */
301                 LOG(("  inserting on right of 3-node\n"));
302                 n->kids[3] = right;
303                 n->counts[3] = rcount;
304                 n->elems[2] = e;
305                 n->kids[2] = left;
306                 n->counts[2] = lcount;
307             }
308             if (n->kids[0])
309                 n->kids[0]->parent = n;
310             if (n->kids[1])
311                 n->kids[1]->parent = n;
312             if (n->kids[2])
313                 n->kids[2]->parent = n;
314             if (n->kids[3])
315                 n->kids[3]->parent = n;
316             LOG(("  done\n"));
317             break;
318         } else {
319             node234 *m = snew(node234);
320             m->parent = n->parent;
321             LOG(("  splitting a 4-node; created new node %p\n", m));
322             /*
323              * Insert in a 4-node; split into a 2-node and a
324              * 3-node, and move focus up a level.
325              *
326              * I don't think it matters which way round we put the
327              * 2 and the 3. For simplicity, we'll put the 3 first
328              * always.
329              */
330             if (np == &n->kids[0]) {
331                 m->kids[0] = left;
332                 m->counts[0] = lcount;
333                 m->elems[0] = e;
334                 m->kids[1] = right;
335                 m->counts[1] = rcount;
336                 m->elems[1] = n->elems[0];
337                 m->kids[2] = n->kids[1];
338                 m->counts[2] = n->counts[1];
339                 e = n->elems[1];
340                 n->kids[0] = n->kids[2];
341                 n->counts[0] = n->counts[2];
342                 n->elems[0] = n->elems[2];
343                 n->kids[1] = n->kids[3];
344                 n->counts[1] = n->counts[3];
345             } else if (np == &n->kids[1]) {
346                 m->kids[0] = n->kids[0];
347                 m->counts[0] = n->counts[0];
348                 m->elems[0] = n->elems[0];
349                 m->kids[1] = left;
350                 m->counts[1] = lcount;
351                 m->elems[1] = e;
352                 m->kids[2] = right;
353                 m->counts[2] = rcount;
354                 e = n->elems[1];
355                 n->kids[0] = n->kids[2];
356                 n->counts[0] = n->counts[2];
357                 n->elems[0] = n->elems[2];
358                 n->kids[1] = n->kids[3];
359                 n->counts[1] = n->counts[3];
360             } else if (np == &n->kids[2]) {
361                 m->kids[0] = n->kids[0];
362                 m->counts[0] = n->counts[0];
363                 m->elems[0] = n->elems[0];
364                 m->kids[1] = n->kids[1];
365                 m->counts[1] = n->counts[1];
366                 m->elems[1] = n->elems[1];
367                 m->kids[2] = left;
368                 m->counts[2] = lcount;
369                 /* e = e; */
370                 n->kids[0] = right;
371                 n->counts[0] = rcount;
372                 n->elems[0] = n->elems[2];
373                 n->kids[1] = n->kids[3];
374                 n->counts[1] = n->counts[3];
375             } else {                   /* np == &n->kids[3] */
376                 m->kids[0] = n->kids[0];
377                 m->counts[0] = n->counts[0];
378                 m->elems[0] = n->elems[0];
379                 m->kids[1] = n->kids[1];
380                 m->counts[1] = n->counts[1];
381                 m->elems[1] = n->elems[1];
382                 m->kids[2] = n->kids[2];
383                 m->counts[2] = n->counts[2];
384                 n->kids[0] = left;
385                 n->counts[0] = lcount;
386                 n->elems[0] = e;
387                 n->kids[1] = right;
388                 n->counts[1] = rcount;
389                 e = n->elems[2];
390             }
391             m->kids[3] = n->kids[3] = n->kids[2] = NULL;
392             m->counts[3] = n->counts[3] = n->counts[2] = 0;
393             m->elems[2] = n->elems[2] = n->elems[1] = NULL;
394             if (m->kids[0])
395                 m->kids[0]->parent = m;
396             if (m->kids[1])
397                 m->kids[1]->parent = m;
398             if (m->kids[2])
399                 m->kids[2]->parent = m;
400             if (n->kids[0])
401                 n->kids[0]->parent = n;
402             if (n->kids[1])
403                 n->kids[1]->parent = n;
404             LOG(("  left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
405                  m->kids[0], m->counts[0], m->elems[0],
406                  m->kids[1], m->counts[1], m->elems[1],
407                  m->kids[2], m->counts[2]));
408             LOG(("  right (%p): %p/%d [%p] %p/%d\n", n,
409                  n->kids[0], n->counts[0], n->elems[0],
410                  n->kids[1], n->counts[1]));
411             left = m;
412             lcount = countnode234(left);
413             right = n;
414             rcount = countnode234(right);
415         }
416         if (n->parent)
417             np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
418                   n->parent->kids[1] == n ? &n->parent->kids[1] :
419                   n->parent->kids[2] == n ? &n->parent->kids[2] :
420                   &n->parent->kids[3]);
421         n = n->parent;
422     }
423 
424     /*
425      * If we've come out of here by `break', n will still be
426      * non-NULL and all we need to do is go back up the tree
427      * updating counts. If we've come here because n is NULL, we
428      * need to create a new root for the tree because the old one
429      * has just split into two. */
430     if (n) {
431         while (n->parent) {
432             int count = countnode234(n);
433             int childnum;
434             childnum = (n->parent->kids[0] == n ? 0 :
435                         n->parent->kids[1] == n ? 1 :
436                         n->parent->kids[2] == n ? 2 : 3);
437             n->parent->counts[childnum] = count;
438             n = n->parent;
439         }
440     } else {
441         LOG(("  root is overloaded, split into two\n"));
442         t->root = snew(node234);
443         t->root->kids[0] = left;
444         t->root->counts[0] = lcount;
445         t->root->elems[0] = e;
446         t->root->kids[1] = right;
447         t->root->counts[1] = rcount;
448         t->root->elems[1] = NULL;
449         t->root->kids[2] = NULL;
450         t->root->counts[2] = 0;
451         t->root->elems[2] = NULL;
452         t->root->kids[3] = NULL;
453         t->root->counts[3] = 0;
454         t->root->parent = NULL;
455         if (t->root->kids[0])
456             t->root->kids[0]->parent = t->root;
457         if (t->root->kids[1])
458             t->root->kids[1]->parent = t->root;
459         LOG(("  new root is %p/%d [%p] %p/%d\n",
460              t->root->kids[0], t->root->counts[0],
461              t->root->elems[0], t->root->kids[1], t->root->counts[1]));
462     }
463 
464     return orig_e;
465 }
466 
add234(tree234 * t,void * e)467 void *add234(tree234 * t, void *e)
468 {
469     if (!t->cmp)                       /* tree is unsorted */
470         return NULL;
471 
472     return add234_internal(t, e, -1);
473 }
addpos234(tree234 * t,void * e,int index)474 void *addpos234(tree234 * t, void *e, int index)
475 {
476     if (index < 0 ||                   /* index out of range */
477         t->cmp)                        /* tree is sorted */
478         return NULL;                   /* return failure */
479 
480     return add234_internal(t, e, index);        /* this checks the upper bound */
481 }
482 
483 /*
484  * Look up the element at a given numeric index in a 2-3-4 tree.
485  * Returns NULL if the index is out of range.
486  */
index234(tree234 * t,int index)487 void *index234(tree234 * t, int index)
488 {
489     node234 *n;
490 
491     if (!t->root)
492         return NULL;                   /* tree is empty */
493 
494     if (index < 0 || index >= countnode234(t->root))
495         return NULL;                   /* out of range */
496 
497     n = t->root;
498 
499     while (n) {
500         if (index < n->counts[0])
501             n = n->kids[0];
502         else if (index -= n->counts[0] + 1, index < 0)
503             return n->elems[0];
504         else if (index < n->counts[1])
505             n = n->kids[1];
506         else if (index -= n->counts[1] + 1, index < 0)
507             return n->elems[1];
508         else if (index < n->counts[2])
509             n = n->kids[2];
510         else if (index -= n->counts[2] + 1, index < 0)
511             return n->elems[2];
512         else
513             n = n->kids[3];
514     }
515 
516     /* We shouldn't ever get here. I wonder how we did. */
517     return NULL;
518 }
519 
520 /*
521  * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
522  * found. e is always passed as the first argument to cmp, so cmp
523  * can be an asymmetric function if desired. cmp can also be passed
524  * as NULL, in which case the compare function from the tree proper
525  * will be used.
526  */
findrelpos234(tree234 * t,void * e,cmpfn234 cmp,int relation,int * index)527 void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp,
528                     int relation, int *index)
529 {
530     search234_state ss;
531     int reldir = (relation == REL234_LT || relation == REL234_LE ? -1 :
532                   relation == REL234_GT || relation == REL234_GE ? +1 : 0);
533     bool equal_permitted = (relation != REL234_LT && relation != REL234_GT);
534     void *toret;
535 
536     /* Only LT / GT relations are permitted with a null query element. */
537     assert(!(equal_permitted && !e));
538 
539     if (cmp == NULL)
540         cmp = t->cmp;
541 
542     search234_start(&ss, t);
543     while (ss.element) {
544         int cmpret;
545 
546         if (e) {
547             cmpret = cmp(e, ss.element);
548         } else {
549             cmpret = -reldir;          /* invent a fixed compare result */
550         }
551 
552         if (cmpret == 0) {
553             /*
554              * We've found an element that compares exactly equal to
555              * the query element.
556              */
557             if (equal_permitted) {
558                 /* If our search relation permits equality, we've
559                  * finished already. */
560                 if (index)
561                     *index = ss.index;
562                 return ss.element;
563             } else {
564                 /* Otherwise, pretend this element was slightly too
565                  * big/small, according to the direction of search. */
566                 cmpret = reldir;
567             }
568         }
569 
570         search234_step(&ss, cmpret);
571     }
572 
573     /*
574      * No element compares equal to the one we were after, but
575      * ss.index indicates the index that element would have if it were
576      * inserted.
577      *
578      * So if our search relation is EQ, we must simply return failure.
579      */
580     if (relation == REL234_EQ)
581         return NULL;
582 
583     /*
584      * Otherwise, we must do an index lookup for the previous index
585      * (if we're going left - LE or LT) or this index (if we're going
586      * right - GE or GT).
587      */
588     if (relation == REL234_LT || relation == REL234_LE) {
589         ss.index--;
590     }
591 
592     /*
593      * We know the index of the element we want; just call index234
594      * to do the rest. This will return NULL if the index is out of
595      * bounds, which is exactly what we want.
596      */
597     toret = index234(t, ss.index);
598     if (toret && index)
599         *index = ss.index;
600     return toret;
601 }
find234(tree234 * t,void * e,cmpfn234 cmp)602 void *find234(tree234 * t, void *e, cmpfn234 cmp)
603 {
604     return findrelpos234(t, e, cmp, REL234_EQ, NULL);
605 }
findrel234(tree234 * t,void * e,cmpfn234 cmp,int relation)606 void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation)
607 {
608     return findrelpos234(t, e, cmp, relation, NULL);
609 }
findpos234(tree234 * t,void * e,cmpfn234 cmp,int * index)610 void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index)
611 {
612     return findrelpos234(t, e, cmp, REL234_EQ, index);
613 }
614 
search234_start(search234_state * state,tree234 * t)615 void search234_start(search234_state *state, tree234 *t)
616 {
617     state->_node = t->root;
618     state->_base = 0; /* index of first element in this node's subtree */
619     state->_last = -1; /* indicate that this node is not previously visted */
620     search234_step(state, 0);
621 }
search234_step(search234_state * state,int direction)622 void search234_step(search234_state *state, int direction)
623 {
624     node234 *node = state->_node;
625     int i;
626 
627     if (!node) {
628         state->element = NULL;
629         state->index = 0;
630         return;
631     }
632 
633     if (state->_last != -1) {
634         /*
635          * We're already pointing at some element of a node, so we
636          * should restrict to the elements left or right of it,
637          * depending on the requested search direction.
638          */
639         assert(direction);
640         assert(node);
641 
642         if (direction > 0)
643             state->_lo = state->_last + 1;
644         else
645             state->_hi = state->_last - 1;
646 
647         if (state->_lo > state->_hi) {
648             /*
649              * We've run out of elements in this node, i.e. we've
650              * narrowed to nothing but a child pointer. Descend to
651              * that child, and update _base to the leftmost index of
652              * its subtree.
653              */
654             for (i = 0; i < state->_lo; i++)
655                 state->_base += 1 + node->counts[i];
656             state->_node = node = node->kids[state->_lo];
657             state->_last = -1;
658         }
659     }
660 
661     if (state->_last == -1) {
662         /*
663          * We've just entered a new node - either because of the above
664          * code, or because we were called from search234_start - and
665          * anything in that node is a viable answer.
666          */
667         state->_lo = 0;
668         state->_hi = node ? elements234(node)-1 : 0;
669     }
670 
671     /*
672      * Now we've got something we can return.
673      */
674     if (!node) {
675         state->element = NULL;
676         state->index = state->_base;
677     } else {
678         state->_last = (state->_lo + state->_hi) / 2;
679         state->element = node->elems[state->_last];
680         state->index = state->_base + state->_last;
681         for (i = 0; i <= state->_last; i++)
682             state->index += node->counts[i];
683     }
684 }
685 
686 /*
687  * Delete an element e in a 2-3-4 tree. Does not free the element,
688  * merely removes all links to it from the tree nodes.
689  */
delpos234_internal(tree234 * t,int index)690 static void *delpos234_internal(tree234 * t, int index)
691 {
692     node234 *n;
693     void *retval;
694     int ei = -1;
695 
696     retval = 0;
697 
698     n = t->root;
699     LOG(("deleting item %d from tree %p\n", index, t));
700     while (1) {
701         while (n) {
702             int ki;
703             node234 *sub;
704 
705             LOG(
706                 ("  node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
707                  n, n->kids[0], n->counts[0], n->elems[0], n->kids[1],
708                  n->counts[1], n->elems[1], n->kids[2], n->counts[2],
709                  n->elems[2], n->kids[3], n->counts[3], index));
710             if (index < n->counts[0]) {
711                 ki = 0;
712             } else if (index -= n->counts[0] + 1, index < 0) {
713                 ei = 0;
714                 break;
715             } else if (index < n->counts[1]) {
716                 ki = 1;
717             } else if (index -= n->counts[1] + 1, index < 0) {
718                 ei = 1;
719                 break;
720             } else if (index < n->counts[2]) {
721                 ki = 2;
722             } else if (index -= n->counts[2] + 1, index < 0) {
723                 ei = 2;
724                 break;
725             } else {
726                 ki = 3;
727             }
728             /*
729              * Recurse down to subtree ki. If it has only one element,
730              * we have to do some transformation to start with.
731              */
732             LOG(("  moving to subtree %d\n", ki));
733             sub = n->kids[ki];
734             if (!sub->elems[1]) {
735                 LOG(("  subtree has only one element!\n"));
736                 if (ki > 0 && n->kids[ki - 1]->elems[1]) {
737                     /*
738                      * Case 3a, left-handed variant. Child ki has
739                      * only one element, but child ki-1 has two or
740                      * more. So we need to move a subtree from ki-1
741                      * to ki.
742                      *
743                      *                . C .                     . B .
744                      *               /     \     ->            /     \
745                      * [more] a A b B c   d D e      [more] a A b   c C d D e
746                      */
747                     node234 *sib = n->kids[ki - 1];
748                     int lastelem = (sib->elems[2] ? 2 :
749                                     sib->elems[1] ? 1 : 0);
750                     sub->kids[2] = sub->kids[1];
751                     sub->counts[2] = sub->counts[1];
752                     sub->elems[1] = sub->elems[0];
753                     sub->kids[1] = sub->kids[0];
754                     sub->counts[1] = sub->counts[0];
755                     sub->elems[0] = n->elems[ki - 1];
756                     sub->kids[0] = sib->kids[lastelem + 1];
757                     sub->counts[0] = sib->counts[lastelem + 1];
758                     if (sub->kids[0])
759                         sub->kids[0]->parent = sub;
760                     n->elems[ki - 1] = sib->elems[lastelem];
761                     sib->kids[lastelem + 1] = NULL;
762                     sib->counts[lastelem + 1] = 0;
763                     sib->elems[lastelem] = NULL;
764                     n->counts[ki] = countnode234(sub);
765                     LOG(("  case 3a left\n"));
766                     LOG(
767                         ("  index and left subtree count before adjustment: %d, %d\n",
768                          index, n->counts[ki - 1]));
769                     index += n->counts[ki - 1];
770                     n->counts[ki - 1] = countnode234(sib);
771                     index -= n->counts[ki - 1];
772                     LOG(
773                         ("  index and left subtree count after adjustment: %d, %d\n",
774                          index, n->counts[ki - 1]));
775                 } else if (ki < 3 && n->kids[ki + 1]
776                            && n->kids[ki + 1]->elems[1]) {
777                     /*
778                      * Case 3a, right-handed variant. ki has only
779                      * one element but ki+1 has two or more. Move a
780                      * subtree from ki+1 to ki.
781                      *
782                      *      . B .                             . C .
783                      *     /     \                ->         /     \
784                      *  a A b   c C d D e [more]      a A b B c   d D e [more]
785                      */
786                     node234 *sib = n->kids[ki + 1];
787                     int j;
788                     sub->elems[1] = n->elems[ki];
789                     sub->kids[2] = sib->kids[0];
790                     sub->counts[2] = sib->counts[0];
791                     if (sub->kids[2])
792                         sub->kids[2]->parent = sub;
793                     n->elems[ki] = sib->elems[0];
794                     sib->kids[0] = sib->kids[1];
795                     sib->counts[0] = sib->counts[1];
796                     for (j = 0; j < 2 && sib->elems[j + 1]; j++) {
797                         sib->kids[j + 1] = sib->kids[j + 2];
798                         sib->counts[j + 1] = sib->counts[j + 2];
799                         sib->elems[j] = sib->elems[j + 1];
800                     }
801                     sib->kids[j + 1] = NULL;
802                     sib->counts[j + 1] = 0;
803                     sib->elems[j] = NULL;
804                     n->counts[ki] = countnode234(sub);
805                     n->counts[ki + 1] = countnode234(sib);
806                     LOG(("  case 3a right\n"));
807                 } else {
808                     /*
809                      * Case 3b. ki has only one element, and has no
810                      * neighbour with more than one. So pick a
811                      * neighbour and merge it with ki, taking an
812                      * element down from n to go in the middle.
813                      *
814                      *      . B .                .
815                      *     /     \     ->        |
816                      *  a A b   c C d      a A b B c C d
817                      *
818                      * (Since at all points we have avoided
819                      * descending to a node with only one element,
820                      * we can be sure that n is not reduced to
821                      * nothingness by this move, _unless_ it was
822                      * the very first node, ie the root of the
823                      * tree. In that case we remove the now-empty
824                      * root and replace it with its single large
825                      * child as shown.)
826                      */
827                     node234 *sib;
828                     int j;
829 
830                     if (ki > 0) {
831                         ki--;
832                         index += n->counts[ki] + 1;
833                     }
834                     sib = n->kids[ki];
835                     sub = n->kids[ki + 1];
836 
837                     sub->kids[3] = sub->kids[1];
838                     sub->counts[3] = sub->counts[1];
839                     sub->elems[2] = sub->elems[0];
840                     sub->kids[2] = sub->kids[0];
841                     sub->counts[2] = sub->counts[0];
842                     sub->elems[1] = n->elems[ki];
843                     sub->kids[1] = sib->kids[1];
844                     sub->counts[1] = sib->counts[1];
845                     if (sub->kids[1])
846                         sub->kids[1]->parent = sub;
847                     sub->elems[0] = sib->elems[0];
848                     sub->kids[0] = sib->kids[0];
849                     sub->counts[0] = sib->counts[0];
850                     if (sub->kids[0])
851                         sub->kids[0]->parent = sub;
852 
853                     n->counts[ki + 1] = countnode234(sub);
854 
855                     sfree(sib);
856 
857                     /*
858                      * That's built the big node in sub. Now we
859                      * need to remove the reference to sib in n.
860                      */
861                     for (j = ki; j < 3 && n->kids[j + 1]; j++) {
862                         n->kids[j] = n->kids[j + 1];
863                         n->counts[j] = n->counts[j + 1];
864                         n->elems[j] = j < 2 ? n->elems[j + 1] : NULL;
865                     }
866                     n->kids[j] = NULL;
867                     n->counts[j] = 0;
868                     if (j < 3)
869                         n->elems[j] = NULL;
870                     LOG(("  case 3b ki=%d\n", ki));
871 
872                     if (!n->elems[0]) {
873                         /*
874                          * The root is empty and needs to be
875                          * removed.
876                          */
877                         LOG(("  shifting root!\n"));
878                         t->root = sub;
879                         sub->parent = NULL;
880                         sfree(n);
881                     }
882                 }
883             }
884             n = sub;
885         }
886         if (!retval)
887             retval = n->elems[ei];
888 
889         if (ei == -1)
890             return NULL;               /* although this shouldn't happen */
891 
892         /*
893          * Treat special case: this is the one remaining item in
894          * the tree. n is the tree root (no parent), has one
895          * element (no elems[1]), and has no kids (no kids[0]).
896          */
897         if (!n->parent && !n->elems[1] && !n->kids[0]) {
898             LOG(("  removed last element in tree\n"));
899             sfree(n);
900             t->root = NULL;
901             return retval;
902         }
903 
904         /*
905          * Now we have the element we want, as n->elems[ei], and we
906          * have also arranged for that element not to be the only
907          * one in its node. So...
908          */
909 
910         if (!n->kids[0] && n->elems[1]) {
911             /*
912              * Case 1. n is a leaf node with more than one element,
913              * so it's _really easy_. Just delete the thing and
914              * we're done.
915              */
916             int i;
917             LOG(("  case 1\n"));
918             for (i = ei; i < 2 && n->elems[i + 1]; i++)
919                 n->elems[i] = n->elems[i + 1];
920             n->elems[i] = NULL;
921             /*
922              * Having done that to the leaf node, we now go back up
923              * the tree fixing the counts.
924              */
925             while (n->parent) {
926                 int childnum;
927                 childnum = (n->parent->kids[0] == n ? 0 :
928                             n->parent->kids[1] == n ? 1 :
929                             n->parent->kids[2] == n ? 2 : 3);
930                 n->parent->counts[childnum]--;
931                 n = n->parent;
932             }
933             return retval;             /* finished! */
934         } else if (n->kids[ei]->elems[1]) {
935             /*
936              * Case 2a. n is an internal node, and the root of the
937              * subtree to the left of e has more than one element.
938              * So find the predecessor p to e (ie the largest node
939              * in that subtree), place it where e currently is, and
940              * then start the deletion process over again on the
941              * subtree with p as target.
942              */
943             node234 *m = n->kids[ei];
944             void *target;
945             LOG(("  case 2a\n"));
946             while (m->kids[0]) {
947                 m = (m->kids[3] ? m->kids[3] :
948                      m->kids[2] ? m->kids[2] :
949                      m->kids[1] ? m->kids[1] : m->kids[0]);
950             }
951             target = (m->elems[2] ? m->elems[2] :
952                       m->elems[1] ? m->elems[1] : m->elems[0]);
953             n->elems[ei] = target;
954             index = n->counts[ei] - 1;
955             n = n->kids[ei];
956         } else if (n->kids[ei + 1]->elems[1]) {
957             /*
958              * Case 2b, symmetric to 2a but s/left/right/ and
959              * s/predecessor/successor/. (And s/largest/smallest/).
960              */
961             node234 *m = n->kids[ei + 1];
962             void *target;
963             LOG(("  case 2b\n"));
964             while (m->kids[0]) {
965                 m = m->kids[0];
966             }
967             target = m->elems[0];
968             n->elems[ei] = target;
969             n = n->kids[ei + 1];
970             index = 0;
971         } else {
972             /*
973              * Case 2c. n is an internal node, and the subtrees to
974              * the left and right of e both have only one element.
975              * So combine the two subnodes into a single big node
976              * with their own elements on the left and right and e
977              * in the middle, then restart the deletion process on
978              * that subtree, with e still as target.
979              */
980             node234 *a = n->kids[ei], *b = n->kids[ei + 1];
981             int j;
982 
983             LOG(("  case 2c\n"));
984             a->elems[1] = n->elems[ei];
985             a->kids[2] = b->kids[0];
986             a->counts[2] = b->counts[0];
987             if (a->kids[2])
988                 a->kids[2]->parent = a;
989             a->elems[2] = b->elems[0];
990             a->kids[3] = b->kids[1];
991             a->counts[3] = b->counts[1];
992             if (a->kids[3])
993                 a->kids[3]->parent = a;
994             sfree(b);
995             n->counts[ei] = countnode234(a);
996             /*
997              * That's built the big node in a, and destroyed b. Now
998              * remove the reference to b (and e) in n.
999              */
1000             for (j = ei; j < 2 && n->elems[j + 1]; j++) {
1001                 n->elems[j] = n->elems[j + 1];
1002                 n->kids[j + 1] = n->kids[j + 2];
1003                 n->counts[j + 1] = n->counts[j + 2];
1004             }
1005             n->elems[j] = NULL;
1006             n->kids[j + 1] = NULL;
1007             n->counts[j + 1] = 0;
1008             /*
1009              * It's possible, in this case, that we've just removed
1010              * the only element in the root of the tree. If so,
1011              * shift the root.
1012              */
1013             if (n->elems[0] == NULL) {
1014                 LOG(("  shifting root!\n"));
1015                 t->root = a;
1016                 a->parent = NULL;
1017                 sfree(n);
1018             }
1019             /*
1020              * Now go round the deletion process again, with n
1021              * pointing at the new big node and e still the same.
1022              */
1023             n = a;
1024             index = a->counts[0] + a->counts[1] + 1;
1025         }
1026     }
1027 }
delpos234(tree234 * t,int index)1028 void *delpos234(tree234 * t, int index)
1029 {
1030     if (index < 0 || index >= countnode234(t->root))
1031         return NULL;
1032     return delpos234_internal(t, index);
1033 }
del234(tree234 * t,void * e)1034 void *del234(tree234 * t, void *e)
1035 {
1036     int index;
1037     if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
1038         return NULL;                   /* it wasn't in there anyway */
1039     return delpos234_internal(t, index);        /* it's there; delete it. */
1040 }
1041 
1042 #ifdef TEST
1043 
1044 /*
1045  * Test code for the 2-3-4 tree. This code maintains an alternative
1046  * representation of the data in the tree, in an array (using the
1047  * obvious and slow insert and delete functions). After each tree
1048  * operation, the verify() function is called, which ensures all
1049  * the tree properties are preserved:
1050  *  - node->child->parent always equals node
1051  *  - tree->root->parent always equals NULL
1052  *  - number of kids == 0 or number of elements + 1;
1053  *  - tree has the same depth everywhere
1054  *  - every node has at least one element
1055  *  - subtree element counts are accurate
1056  *  - any NULL kid pointer is accompanied by a zero count
1057  *  - in a sorted tree: ordering property between elements of a
1058  *    node and elements of its children is preserved
1059  * and also ensures the list represented by the tree is the same
1060  * list it should be. (This last check also doubly verifies the
1061  * ordering properties, because the `same list it should be' is by
1062  * definition correctly ordered. It also ensures all nodes are
1063  * distinct, because the enum functions would get caught in a loop
1064  * if not.)
1065  */
1066 
1067 #include <stdarg.h>
1068 #include <string.h>
1069 
1070 int n_errors = 0;
1071 
1072 /*
1073  * Error reporting function.
1074  */
error(char * fmt,...)1075 PRINTF_LIKE(1, 2) void error(char *fmt, ...)
1076 {
1077     va_list ap;
1078     printf("ERROR: ");
1079     va_start(ap, fmt);
1080     vfprintf(stdout, fmt, ap);
1081     va_end(ap);
1082     printf("\n");
1083     n_errors++;
1084 }
1085 
1086 /* The array representation of the data. */
1087 void **array;
1088 int arraylen, arraysize;
1089 cmpfn234 cmp;
1090 
1091 /* The tree representation of the same data. */
1092 tree234 *tree;
1093 
1094 typedef struct {
1095     int treedepth;
1096     int elemcount;
1097 } chkctx;
1098 
chknode(chkctx * ctx,int level,node234 * node,void * lowbound,void * highbound)1099 int chknode(chkctx * ctx, int level, node234 * node,
1100             void *lowbound, void *highbound)
1101 {
1102     int nkids, nelems;
1103     int i;
1104     int count;
1105 
1106     /* Count the non-NULL kids. */
1107     for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
1108     /* Ensure no kids beyond the first NULL are non-NULL. */
1109     for (i = nkids; i < 4; i++)
1110         if (node->kids[i]) {
1111             error("node %p: nkids=%d but kids[%d] non-NULL",
1112                   node, nkids, i);
1113         } else if (node->counts[i]) {
1114             error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
1115                   node, i, i, node->counts[i]);
1116         }
1117 
1118     /* Count the non-NULL elements. */
1119     for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
1120     /* Ensure no elements beyond the first NULL are non-NULL. */
1121     for (i = nelems; i < 3; i++)
1122         if (node->elems[i]) {
1123             error("node %p: nelems=%d but elems[%d] non-NULL",
1124                   node, nelems, i);
1125         }
1126 
1127     if (nkids == 0) {
1128         /*
1129          * If nkids==0, this is a leaf node; verify that the tree
1130          * depth is the same everywhere.
1131          */
1132         if (ctx->treedepth < 0)
1133             ctx->treedepth = level;    /* we didn't know the depth yet */
1134         else if (ctx->treedepth != level)
1135             error("node %p: leaf at depth %d, previously seen depth %d",
1136                   node, level, ctx->treedepth);
1137     } else {
1138         /*
1139          * If nkids != 0, then it should be nelems+1, unless nelems
1140          * is 0 in which case nkids should also be 0 (and so we
1141          * shouldn't be in this condition at all).
1142          */
1143         int shouldkids = (nelems ? nelems + 1 : 0);
1144         if (nkids != shouldkids) {
1145             error("node %p: %d elems should mean %d kids but has %d",
1146                   node, nelems, shouldkids, nkids);
1147         }
1148     }
1149 
1150     /*
1151      * nelems should be at least 1.
1152      */
1153     if (nelems == 0) {
1154         error("node %p: no elems", node, nkids);
1155     }
1156 
1157     /*
1158      * Add nelems to the running element count of the whole tree.
1159      */
1160     ctx->elemcount += nelems;
1161 
1162     /*
1163      * Check ordering property: all elements should be strictly >
1164      * lowbound, strictly < highbound, and strictly < each other in
1165      * sequence. (lowbound and highbound are NULL at edges of tree
1166      * - both NULL at root node - and NULL is considered to be <
1167      * everything and > everything. IYSWIM.)
1168      */
1169     if (cmp) {
1170         for (i = -1; i < nelems; i++) {
1171             void *lower = (i == -1 ? lowbound : node->elems[i]);
1172             void *higher =
1173                 (i + 1 == nelems ? highbound : node->elems[i + 1]);
1174             if (lower && higher && cmp(lower, higher) >= 0) {
1175                 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1176                       node, i, lower, i + 1, higher);
1177             }
1178         }
1179     }
1180 
1181     /*
1182      * Check parent pointers: all non-NULL kids should have a
1183      * parent pointer coming back to this node.
1184      */
1185     for (i = 0; i < nkids; i++)
1186         if (node->kids[i]->parent != node) {
1187             error("node %p kid %d: parent ptr is %p not %p",
1188                   node, i, node->kids[i]->parent, node);
1189         }
1190 
1191 
1192     /*
1193      * Now (finally!) recurse into subtrees.
1194      */
1195     count = nelems;
1196 
1197     for (i = 0; i < nkids; i++) {
1198         void *lower = (i == 0 ? lowbound : node->elems[i - 1]);
1199         void *higher = (i >= nelems ? highbound : node->elems[i]);
1200         int subcount =
1201             chknode(ctx, level + 1, node->kids[i], lower, higher);
1202         if (node->counts[i] != subcount) {
1203             error("node %p kid %d: count says %d, subtree really has %d",
1204                   node, i, node->counts[i], subcount);
1205         }
1206         count += subcount;
1207     }
1208 
1209     return count;
1210 }
1211 
verify(void)1212 void verify(void)
1213 {
1214     chkctx ctx[1];
1215     int i;
1216     void *p;
1217 
1218     ctx->treedepth = -1;                /* depth unknown yet */
1219     ctx->elemcount = 0;                 /* no elements seen yet */
1220     /*
1221      * Verify validity of tree properties.
1222      */
1223     if (tree->root) {
1224         if (tree->root->parent != NULL)
1225             error("root->parent is %p should be null", tree->root->parent);
1226         chknode(&ctx, 0, tree->root, NULL, NULL);
1227     }
1228     printf("tree depth: %d\n", ctx->treedepth);
1229     /*
1230      * Enumerate the tree and ensure it matches up to the array.
1231      */
1232     for (i = 0; NULL != (p = index234(tree, i)); i++) {
1233         if (i >= arraylen)
1234             error("tree contains more than %d elements", arraylen);
1235         if (array[i] != p)
1236             error("enum at position %d: array says %s, tree says %s",
1237                   i, array[i], p);
1238     }
1239     if (ctx->elemcount != i) {
1240         error("tree really contains %d elements, enum gave %d",
1241               ctx->elemcount, i);
1242     }
1243     if (i < arraylen) {
1244         error("enum gave only %d elements, array has %d", i, arraylen);
1245     }
1246     i = count234(tree);
1247     if (ctx->elemcount != i) {
1248         error("tree really contains %d elements, count234 gave %d",
1249               ctx->elemcount, i);
1250     }
1251 }
1252 
internal_addtest(void * elem,int index,void * realret)1253 void internal_addtest(void *elem, int index, void *realret)
1254 {
1255     int i, j;
1256     void *retval;
1257 
1258     if (arraysize < arraylen + 1) {
1259         arraysize = arraylen + 1 + 256;
1260         array = sresize(array, arraysize, void *);
1261     }
1262 
1263     i = index;
1264     /* now i points to the first element >= elem */
1265     retval = elem;                     /* expect elem returned (success) */
1266     for (j = arraylen; j > i; j--)
1267         array[j] = array[j - 1];
1268     array[i] = elem;                   /* add elem to array */
1269     arraylen++;
1270 
1271     if (realret != retval) {
1272         error("add: retval was %p expected %p", realret, retval);
1273     }
1274 
1275     verify();
1276 }
1277 
addtest(void * elem)1278 void addtest(void *elem)
1279 {
1280     int i;
1281     void *realret;
1282 
1283     realret = add234(tree, elem);
1284 
1285     i = 0;
1286     while (i < arraylen && cmp(elem, array[i]) > 0)
1287         i++;
1288     if (i < arraylen && !cmp(elem, array[i])) {
1289         void *retval = array[i];       /* expect that returned not elem */
1290         if (realret != retval) {
1291             error("add: retval was %p expected %p", realret, retval);
1292         }
1293     } else
1294         internal_addtest(elem, i, realret);
1295 }
1296 
addpostest(void * elem,int i)1297 void addpostest(void *elem, int i)
1298 {
1299     void *realret;
1300 
1301     realret = addpos234(tree, elem, i);
1302 
1303     internal_addtest(elem, i, realret);
1304 }
1305 
delpostest(int i)1306 void delpostest(int i)
1307 {
1308     int index = i;
1309     void *elem = array[i], *ret;
1310 
1311     /* i points to the right element */
1312     while (i < arraylen - 1) {
1313         array[i] = array[i + 1];
1314         i++;
1315     }
1316     arraylen--;                        /* delete elem from array */
1317 
1318     if (tree->cmp)
1319         ret = del234(tree, elem);
1320     else
1321         ret = delpos234(tree, index);
1322 
1323     if (ret != elem) {
1324         error("del returned %p, expected %p", ret, elem);
1325     }
1326 
1327     verify();
1328 }
1329 
deltest(void * elem)1330 void deltest(void *elem)
1331 {
1332     int i;
1333 
1334     i = 0;
1335     while (i < arraylen && cmp(elem, array[i]) > 0)
1336         i++;
1337     if (i >= arraylen || cmp(elem, array[i]) != 0)
1338         return;                        /* don't do it! */
1339     delpostest(i);
1340 }
1341 
1342 /* A sample data set and test utility. Designed for pseudo-randomness,
1343  * and yet repeatability. */
1344 
1345 /*
1346  * This random number generator uses the `portable implementation'
1347  * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1348  * change it if not.
1349  */
randomnumber(unsigned * seed)1350 int randomnumber(unsigned *seed)
1351 {
1352     *seed *= 1103515245;
1353     *seed += 12345;
1354     return ((*seed) / 65536) % 32768;
1355 }
1356 
mycmp(void * av,void * bv)1357 int mycmp(void *av, void *bv)
1358 {
1359     char const *a = (char const *) av;
1360     char const *b = (char const *) bv;
1361     return strcmp(a, b);
1362 }
1363 
1364 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1365 
1366 char *strings[] = {
1367     "a", "ab", "absque", "coram", "de",
1368     "palam", "clam", "cum", "ex", "e",
1369     "sine", "tenus", "pro", "prae",
1370     "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1371     "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1372     "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1373     "murfl", "spoo", "breen", "flarn", "octothorpe",
1374     "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1375     "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1376     "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1377     "wand", "ring", "amulet"
1378 };
1379 
1380 #define NSTR lenof(strings)
1381 
findtest(void)1382 int findtest(void)
1383 {
1384     const static int rels[] = {
1385         REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1386     };
1387     const static char *const relnames[] = {
1388         "EQ", "GE", "LE", "LT", "GT"
1389     };
1390     int i, j, rel, index;
1391     char *p, *ret, *realret, *realret2;
1392     int lo, hi, mid, c;
1393 
1394     for (i = 0; i < NSTR; i++) {
1395         p = strings[i];
1396         for (j = 0; j < sizeof(rels) / sizeof(*rels); j++) {
1397             rel = rels[j];
1398 
1399             lo = 0;
1400             hi = arraylen - 1;
1401             while (lo <= hi) {
1402                 mid = (lo + hi) / 2;
1403                 c = strcmp(p, array[mid]);
1404                 if (c < 0)
1405                     hi = mid - 1;
1406                 else if (c > 0)
1407                     lo = mid + 1;
1408                 else
1409                     break;
1410             }
1411 
1412             if (c == 0) {
1413                 if (rel == REL234_LT)
1414                     ret = (mid > 0 ? array[--mid] : NULL);
1415                 else if (rel == REL234_GT)
1416                     ret = (mid < arraylen - 1 ? array[++mid] : NULL);
1417                 else
1418                     ret = array[mid];
1419             } else {
1420                 assert(lo == hi + 1);
1421                 if (rel == REL234_LT || rel == REL234_LE) {
1422                     mid = hi;
1423                     ret = (hi >= 0 ? array[hi] : NULL);
1424                 } else if (rel == REL234_GT || rel == REL234_GE) {
1425                     mid = lo;
1426                     ret = (lo < arraylen ? array[lo] : NULL);
1427                 } else
1428                     ret = NULL;
1429             }
1430 
1431             realret = findrelpos234(tree, p, NULL, rel, &index);
1432             if (realret != ret) {
1433                 error("find(\"%s\",%s) gave %s should be %s",
1434                       p, relnames[j], realret, ret);
1435             }
1436             if (realret && index != mid) {
1437                 error("find(\"%s\",%s) gave %d should be %d",
1438                       p, relnames[j], index, mid);
1439             }
1440             if (realret && rel == REL234_EQ) {
1441                 realret2 = index234(tree, index);
1442                 if (realret2 != realret) {
1443                     error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1444                           p, relnames[j], realret, index, index, realret2);
1445                 }
1446             }
1447 #if 0
1448             printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1449                    realret, index);
1450 #endif
1451         }
1452     }
1453 
1454     realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1455     if (arraylen && (realret != array[0] || index != 0)) {
1456         error("find(NULL,GT) gave %s(%d) should be %s(0)",
1457               realret, index, array[0]);
1458     } else if (!arraylen && (realret != NULL)) {
1459         error("find(NULL,GT) gave %s(%d) should be NULL", realret, index);
1460     }
1461 
1462     realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1463     if (arraylen
1464         && (realret != array[arraylen - 1] || index != arraylen - 1)) {
1465         error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index,
1466               array[arraylen - 1]);
1467     } else if (!arraylen && (realret != NULL)) {
1468         error("find(NULL,LT) gave %s(%d) should be NULL", realret, index);
1469     }
1470 }
1471 
searchtest_recurse(search234_state ss,int lo,int hi,char ** expected,char * directionbuf,char * directionptr)1472 void searchtest_recurse(search234_state ss, int lo, int hi,
1473                         char **expected, char *directionbuf,
1474                         char *directionptr)
1475 {
1476     *directionptr = '\0';
1477 
1478     if (!ss.element) {
1479         if (lo != hi) {
1480             error("search234(%s) gave NULL for non-empty interval [%d,%d)",
1481                   directionbuf, lo, hi);
1482         } else if (ss.index != lo) {
1483             error("search234(%s) gave index %d should be %d",
1484                   directionbuf, ss.index, lo);
1485         } else {
1486             printf("%*ssearch234(%s) gave NULL,%d\n",
1487                    (int)(directionptr-directionbuf) * 2, "", directionbuf,
1488                    ss.index);
1489         }
1490     } else if (lo == hi) {
1491         error("search234(%s) gave %s for empty interval [%d,%d)",
1492               directionbuf, (char *)ss.element, lo, hi);
1493     } else if (ss.element != expected[ss.index]) {
1494         error("search234(%s) gave element %s should be %s",
1495               directionbuf, (char *)ss.element, expected[ss.index]);
1496     } else if (ss.index < lo || ss.index >= hi) {
1497         error("search234(%s) gave index %d should be in [%d,%d)",
1498               directionbuf, ss.index, lo, hi);
1499         return;
1500     } else {
1501         search234_state next;
1502 
1503         printf("%*ssearch234(%s) gave %s,%d\n",
1504                (int)(directionptr-directionbuf) * 2, "", directionbuf,
1505                (char *)ss.element, ss.index);
1506 
1507         next = ss;
1508         search234_step(&next, -1);
1509         *directionptr = '-';
1510         searchtest_recurse(next, lo, ss.index,
1511                            expected, directionbuf, directionptr+1);
1512 
1513         next = ss;
1514         search234_step(&next, +1);
1515         *directionptr = '+';
1516         searchtest_recurse(next, ss.index+1, hi,
1517                            expected, directionbuf, directionptr+1);
1518     }
1519 }
1520 
searchtest(void)1521 void searchtest(void)
1522 {
1523     char *expected[NSTR], *p;
1524     char directionbuf[NSTR * 10];
1525     int n;
1526     search234_state ss;
1527 
1528     printf("beginning searchtest:");
1529     for (n = 0; (p = index234(tree, n)) != NULL; n++) {
1530         expected[n] = p;
1531         printf(" %d=%s", n, p);
1532     }
1533     printf(" count=%d\n", n);
1534 
1535     search234_start(&ss, tree);
1536     searchtest_recurse(ss, 0, n, expected, directionbuf, directionbuf);
1537 }
1538 
main(void)1539 int main(void)
1540 {
1541     int in[NSTR];
1542     int i, j, k;
1543     unsigned seed = 0;
1544 
1545     for (i = 0; i < NSTR; i++)
1546         in[i] = 0;
1547     array = NULL;
1548     arraylen = arraysize = 0;
1549     tree = newtree234(mycmp);
1550     cmp = mycmp;
1551 
1552     verify();
1553     searchtest();
1554     for (i = 0; i < 10000; i++) {
1555         j = randomnumber(&seed);
1556         j %= NSTR;
1557         printf("trial: %d\n", i);
1558         if (in[j]) {
1559             printf("deleting %s (%d)\n", strings[j], j);
1560             deltest(strings[j]);
1561             in[j] = 0;
1562         } else {
1563             printf("adding %s (%d)\n", strings[j], j);
1564             addtest(strings[j]);
1565             in[j] = 1;
1566         }
1567         findtest();
1568         searchtest();
1569     }
1570 
1571     while (arraylen > 0) {
1572         j = randomnumber(&seed);
1573         j %= arraylen;
1574         deltest(array[j]);
1575     }
1576 
1577     freetree234(tree);
1578 
1579     /*
1580      * Now try an unsorted tree. We don't really need to test
1581      * delpos234 because we know del234 is based on it, so it's
1582      * already been tested in the above sorted-tree code; but for
1583      * completeness we'll use it to tear down our unsorted tree
1584      * once we've built it.
1585      */
1586     tree = newtree234(NULL);
1587     cmp = NULL;
1588     verify();
1589     for (i = 0; i < 1000; i++) {
1590         printf("trial: %d\n", i);
1591         j = randomnumber(&seed);
1592         j %= NSTR;
1593         k = randomnumber(&seed);
1594         k %= count234(tree) + 1;
1595         printf("adding string %s at index %d\n", strings[j], k);
1596         addpostest(strings[j], k);
1597     }
1598     while (count234(tree) > 0) {
1599         printf("cleanup: tree size %d\n", count234(tree));
1600         j = randomnumber(&seed);
1601         j %= count234(tree);
1602         printf("deleting string %s from index %d\n",
1603                (const char *)array[j], j);
1604         delpostest(j);
1605     }
1606 
1607     printf("%d errors found\n", n_errors);
1608     return (n_errors != 0);
1609 }
1610 
1611 #endif
1612