1 /*****************************************************************************
2 
3   Copyright (c) 2001, 2002 Zope Foundation and Contributors.
4   All Rights Reserved.
5 
6   This software is subject to the provisions of the Zope Public License,
7   Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
8   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
9   WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
10   WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
11   FOR A PARTICULAR PURPOSE
12 
13 ****************************************************************************/
14 #include "_compat.h"
15 
16 #define BTREETEMPLATE_C "$Id$\n"
17 
18 static long
_get_max_size(BTree * self,PyObject * name,long default_max)19 _get_max_size(BTree *self, PyObject *name, long default_max)
20 {
21   PyObject *size;
22   long isize;
23   size = PyObject_GetAttr(OBJECT(OBJECT(self)->ob_type), name);
24   if (size == NULL) {
25       PyErr_Clear();
26       return default_max;
27   }
28 
29 #ifdef PY3K
30   isize = PyLong_AsLong(size);
31 #else
32   isize = PyInt_AsLong(size);
33 #endif
34 
35   Py_DECREF(size);
36   if (isize <= 0 && ! PyErr_Occurred()) {
37     PyErr_SetString(PyExc_ValueError,
38                     "non-positive max size in BTree subclass");
39     return -1;
40   }
41 
42   return isize;
43 }
44 
45 static int
_max_internal_size(BTree * self)46 _max_internal_size(BTree *self)
47 {
48   long isize;
49 
50   if (self->max_internal_size > 0) return self->max_internal_size;
51   isize = _get_max_size(self, max_internal_size_str, -1);
52   self->max_internal_size = isize;
53   return isize;
54 }
55 
56 static int
_max_leaf_size(BTree * self)57 _max_leaf_size(BTree *self)
58 {
59   long isize;
60 
61   if (self->max_leaf_size > 0) return self->max_leaf_size;
62   isize = _get_max_size(self, max_leaf_size_str, -1);
63   self->max_leaf_size = isize;
64   return isize;
65 }
66 
67 /* Sanity-check a BTree.  This is a private helper for BTree_check.  Return:
68  *      -1         Error.  If it's an internal inconsistency in the BTree,
69  *                 AssertionError is set.
70  *       0         No problem found.
71  *
72  * nextbucket is the bucket "one beyond the end" of the BTree; the last bucket
73  * directly reachable from following right child pointers *should* be linked
74  * to nextbucket (and this is checked).
75  */
76 static int
BTree_check_inner(BTree * self,Bucket * nextbucket)77 BTree_check_inner(BTree *self, Bucket *nextbucket)
78 {
79     int i;
80     Bucket *bucketafter;
81     Sized *child;
82     char *errormsg = "internal error";  /* someone should have overriden */
83     Sized *activated_child = NULL;
84     int result = -1;    /* until proved innocent */
85 
86 #define CHECK(CONDITION, ERRORMSG)              \
87   if (!(CONDITION)) {                           \
88     errormsg = (ERRORMSG);                      \
89     goto Error;                                 \
90   }
91 
92     PER_USE_OR_RETURN(self, -1);
93     CHECK(self->len >= 0, "BTree len < 0");
94     CHECK(self->len <= self->size, "BTree len > size");
95     if (self->len == 0) /* Empty BTree. */
96     {
97         CHECK(self->firstbucket == NULL,
98             "Empty BTree has non-NULL firstbucket");
99         result = 0;
100         goto Done;
101     }
102     /* Non-empty BTree. */
103     CHECK(self->firstbucket != NULL, "Non-empty BTree has NULL firstbucket");
104 
105     /* Obscure:  The first bucket is pointed to at least by self->firstbucket
106     * and data[0].child of whichever BTree node it's a child of.  However,
107     * if persistence is enabled then the latter BTree node may be a ghost
108     * at this point, and so its pointers "don't count":  we can only rely
109     * on self's pointers being intact.
110     */
111 #ifdef PERSISTENT
112     CHECK(Py_REFCNT(self->firstbucket) >= 1,
113             "Non-empty BTree firstbucket has refcount < 1");
114 #else
115     CHECK(Py_REFCNT(self->firstbucket) >= 2,
116             "Non-empty BTree firstbucket has refcount < 2");
117 #endif
118 
119     for (i = 0; i < self->len; ++i)
120     {
121         CHECK(self->data[i].child != NULL, "BTree has NULL child");
122     }
123 
124     if (SameType_Check(self, self->data[0].child))
125     {
126         /* Our children are also BTrees. */
127         child = self->data[0].child;
128         UNLESS (PER_USE(child))
129             goto Done;
130         activated_child = child;
131         CHECK(self->firstbucket == BTREE(child)->firstbucket,
132             "BTree has firstbucket different than "
133             "its first child's firstbucket");
134         PER_ALLOW_DEACTIVATION(child);
135         activated_child = NULL;
136         for (i = 0; i < self->len; ++i)
137         {
138             child = self->data[i].child;
139             CHECK(SameType_Check(self, child),
140                     "BTree children have different types");
141             if (i == self->len - 1)
142                 bucketafter = nextbucket;
143             else
144             {
145                 BTree *child2 = BTREE(self->data[i+1].child);
146                 UNLESS (PER_USE(child2))
147                     goto Done;
148                 bucketafter = child2->firstbucket;
149                 PER_ALLOW_DEACTIVATION(child2);
150             }
151             if (BTree_check_inner(BTREE(child), bucketafter) < 0)
152                 goto Done;
153         }
154     }
155     else /* Our children are buckets. */
156     {
157         CHECK(self->firstbucket == BUCKET(self->data[0].child),
158             "Bottom-level BTree node has inconsistent firstbucket belief");
159         for (i = 0; i < self->len; ++i)
160         {
161             child = self->data[i].child;
162             UNLESS (PER_USE(child))
163                 goto Done;
164             activated_child = child;
165             CHECK(!SameType_Check(self, child),
166                     "BTree children have different types");
167             CHECK(child->len >= 1, "Bucket length < 1");/* no empty buckets! */
168             CHECK(child->len <= child->size, "Bucket len > size");
169 #ifdef PERSISTENT
170             CHECK(Py_REFCNT(child) >= 1, "Bucket has refcount < 1");
171 #else
172             CHECK(Py_REFCNT(child) >= 2, "Bucket has refcount < 2");
173 #endif
174             if (i == self->len - 1)
175                 bucketafter = nextbucket;
176             else
177                 bucketafter = BUCKET(self->data[i+1].child);
178             CHECK(BUCKET(child)->next == bucketafter,
179                     "Bucket next pointer is damaged");
180             PER_ALLOW_DEACTIVATION(child);
181             activated_child = NULL;
182         }
183     }
184     result = 0;
185     goto Done;
186 
187 Error:
188     PyErr_SetString(PyExc_AssertionError, errormsg);
189     result = -1;
190 
191 Done:
192     /* No point updating access time -- this isn't a "real" use. */
193     PER_ALLOW_DEACTIVATION(self);
194     if (activated_child)
195     {
196         PER_ALLOW_DEACTIVATION(activated_child);
197     }
198     return result;
199 
200 #undef CHECK
201 }
202 
203 /* Sanity-check a BTree.  This is the ._check() method.  Return:
204  *      NULL       Error.  If it's an internal inconsistency in the BTree,
205  *                 AssertionError is set.
206  *      Py_None    No problem found.
207  */
208 static PyObject*
BTree_check(BTree * self)209 BTree_check(BTree *self)
210 {
211     PyObject *result = NULL;
212     int i = BTree_check_inner(self, NULL);
213 
214     if (i >= 0)
215     {
216         result = Py_None;
217         Py_INCREF(result);
218     }
219     return result;
220 }
221 
222 #define _BGET_REPLACE_TYPE_ERROR 1
223 #define _BGET_ALLOW_TYPE_ERROR 0
224 /*
225 ** _BTree_get
226 **
227 ** Search a BTree.
228 **
229 ** Arguments
230 **      self        a pointer to a BTree
231 **      keyarg      the key to search for, as a Python object
232 **      has_key     true/false; when false, try to return the associated
233 **                  value; when true, return a boolean
234 **      replace_type_err    true/false: When true, ignore the TypeError from
235 **                            a key conversion issue, instead
236 **                            transforming it into a KeyError set. If
237 **                            you are just reading/searching, set to
238 **                            true. If you will be adding/updating,
239 **                             however,  set to false. Or use
240 **                            _BGET_REPLACE_TYPE_ERROR
241 **                            and _BGET_ALLOW_TYPE_ERROR, respectively.
242 ** Return
243 **      When has_key false:
244 **          If key exists, its associated value.
245 **          If key doesn't exist, NULL and KeyError is set.
246 **      When has_key true:
247 **          A Python int is returned in any case.
248 **          If key exists, the depth of the bucket in which it was found.
249 **          If key doesn't exist, 0.
250 */
251 static PyObject *
_BTree_get(BTree * self,PyObject * keyarg,int has_key,int replace_type_err)252 _BTree_get(BTree *self, PyObject *keyarg, int has_key, int replace_type_err)
253 {
254     KEY_TYPE key;
255     PyObject *result = NULL;    /* guilty until proved innocent */
256     int copied = 1;
257 
258     COPY_KEY_FROM_ARG(key, keyarg, copied);
259     UNLESS (copied)
260     {
261         if (replace_type_err && PyErr_ExceptionMatches(PyExc_TypeError))
262         {
263             PyErr_Clear();
264             PyErr_SetObject(PyExc_KeyError, keyarg);
265         }
266         return NULL;
267     }
268 
269     PER_USE_OR_RETURN(self, NULL);
270     if (self->len == 0)
271     {
272         /* empty BTree */
273         if (has_key)
274             result = INT_FROM_LONG(0);
275         else
276             PyErr_SetObject(PyExc_KeyError, keyarg);
277     }
278     else
279     {
280         for (;;)
281         {
282             int i;
283             Sized *child;
284 
285             BTREE_SEARCH(i, self, key, goto Done);
286             child = self->data[i].child;
287             has_key += has_key != 0;    /* bump depth counter, maybe */
288             if (SameType_Check(self, child))
289             {
290                 PER_UNUSE(self);
291                 self = BTREE(child);
292                 PER_USE_OR_RETURN(self, NULL);
293             }
294             else
295             {
296                 result = _bucket_get(BUCKET(child), keyarg, has_key);
297                 break;
298             }
299         }
300     }
301 
302 Done:
303     PER_UNUSE(self);
304     return result;
305 }
306 
307 static PyObject *
BTree_get(BTree * self,PyObject * key)308 BTree_get(BTree *self, PyObject *key)
309 {
310     return _BTree_get(self, key, 0, _BGET_REPLACE_TYPE_ERROR);
311 }
312 
313 /* Create a new bucket for the BTree or TreeSet using the class attribute
314    _bucket_type, which is normally initialized to BucketType or SetType
315    as appropriate.
316 */
317 static Sized *
BTree_newBucket(BTree * self)318 BTree_newBucket(BTree *self)
319 {
320     PyObject *factory;
321     Sized *result;
322 
323     /* _bucket_type_str defined in BTreeModuleTemplate.c */
324     factory = PyObject_GetAttr((PyObject *)Py_TYPE(self), _bucket_type_str);
325     if (factory == NULL)
326         return NULL;
327     /* TODO: Should we check that the factory actually returns something
328         of the appropriate type? How?  The C code here is going to
329         depend on any custom bucket type having the same layout at the
330         C level.
331     */
332     result = SIZED(PyObject_CallObject(factory, NULL));
333     Py_DECREF(factory);
334     return result;
335 }
336 
337 /*
338  * Move data from the current BTree, from index onward, to the newly created
339  * BTree 'next'.  self and next must both be activated.  If index is OOB (< 0
340  * or >= self->len), use self->len / 2 as the index (i.e., split at the
341  * midpoint).  self must have at least 2 children on entry, and index must
342  * be such that self and next each have at least one child at exit.  self's
343  * accessed time is updated.
344  *
345  * Return:
346  *    -1    error
347  *     0    OK
348  */
349 static int
BTree_split(BTree * self,int index,BTree * next)350 BTree_split(BTree *self, int index, BTree *next)
351 {
352     int next_size;
353     Sized *child;
354 
355     if (index < 0 || index >= self->len)
356         index = self->len / 2;
357 
358     next_size = self->len - index;
359     ASSERT(index > 0, "split creates empty tree", -1);
360     ASSERT(next_size > 0, "split creates empty tree", -1);
361 
362     next->data = BTree_Malloc(sizeof(BTreeItem) * next_size);
363     if (!next->data)
364         return -1;
365     memcpy(next->data, self->data + index, sizeof(BTreeItem) * next_size);
366     next->size = next_size;  /* but don't set len until we succeed */
367 
368     /* Set next's firstbucket.  self->firstbucket is still correct. */
369     child = next->data[0].child;
370     if (SameType_Check(self, child))
371     {
372         PER_USE_OR_RETURN(child, -1);
373         next->firstbucket = BTREE(child)->firstbucket;
374         PER_UNUSE(child);
375     }
376     else
377         next->firstbucket = BUCKET(child);
378     Py_INCREF(next->firstbucket);
379 
380     next->len = next_size;
381     self->len = index;
382     return PER_CHANGED(self) >= 0 ? 0 : -1;
383 }
384 
385 
386 /* Fwd decl -- BTree_grow and BTree_split_root reference each other. */
387 static int BTree_grow(BTree *self, int index, int noval);
388 
389 /* Split the root.  This is a little special because the root isn't a child
390  * of anything else, and the root needs to retain its object identity.  So
391  * this routine moves the root's data into a new child, and splits the
392  * latter.  This leaves the root with two children.
393  *
394  * Return:
395  *      0   OK
396  *     -1   error
397  *
398  * CAUTION:  The caller must call PER_CHANGED on self.
399  */
400 static int
BTree_split_root(BTree * self,int noval)401 BTree_split_root(BTree *self, int noval)
402 {
403     BTree *child;
404     BTreeItem *d;
405 
406     /* Create a child BTree, and a new data vector for self. */
407     child = BTREE(PyObject_CallObject(OBJECT(Py_TYPE(self)), NULL));
408     if (!child)
409         return -1;
410 
411     d = BTree_Malloc(sizeof(BTreeItem) * 2);
412     if (!d) {
413         Py_DECREF(child);
414         return -1;
415     }
416 
417     /* Move our data to new BTree. */
418     child->size = self->size;
419     child->len = self->len;
420     child->data = self->data;
421     child->firstbucket = self->firstbucket;
422     Py_INCREF(child->firstbucket);
423 
424     /* Point self to child and split the child. */
425     self->data = d;
426     self->len = 1;
427     self->size = 2;
428     self->data[0].child = SIZED(child); /* transfers reference ownership */
429     return BTree_grow(self, 0, noval);
430 }
431 
432 /*
433 ** BTree_grow
434 **
435 ** Grow a BTree
436 **
437 ** Arguments:    self    The BTree
438 **        index    self->data[index].child needs to be split.  index
439 **                      must be 0 if self is empty (len == 0), and a new
440 **                      empty bucket is created then.
441 **              noval   Boolean; is this a set (true) or mapping (false)?
442 **
443 ** Returns:     0    on success
444 **        -1    on failure
445 **
446 ** CAUTION:  If self is empty on entry, this routine adds an empty bucket.
447 ** That isn't a legitimate BTree; if the caller doesn't put something in
448 ** in the bucket (say, because of a later error), the BTree must be cleared
449 ** to get rid of the empty bucket.
450 */
451 static int
BTree_grow(BTree * self,int index,int noval)452 BTree_grow(BTree *self, int index, int noval)
453 {
454     int i;
455     Sized *v, *e = 0;
456     BTreeItem *d;
457 
458     if (self->len == self->size)
459     {
460         if (self->size)
461         {
462             d = BTree_Realloc(self->data, sizeof(BTreeItem) * self->size * 2);
463             if (d == NULL)
464                 return -1;
465             self->data = d;
466             self->size *= 2;
467         }
468         else
469         {
470             d = BTree_Malloc(sizeof(BTreeItem) * 2);
471             if (d == NULL)
472                 return -1;
473             self->data = d;
474             self->size = 2;
475         }
476     }
477 
478     if (self->len)
479     {
480         long max_size = _max_internal_size(self);
481         if (max_size < 0) return -1;
482 
483         d = self->data + index;
484         v = d->child;
485         /* Create a new object of the same type as the target value */
486         e = (Sized *)PyObject_CallObject((PyObject *)Py_TYPE(v), NULL);
487         if (e == NULL)
488             return -1;
489 
490         UNLESS(PER_USE(v))
491         {
492             Py_DECREF(e);
493             return -1;
494         }
495 
496         /* Now split between the original (v) and the new (e) at the midpoint*/
497         if (SameType_Check(self, v))
498             i = BTree_split((BTree *)v, -1, (BTree *)e);
499         else
500             i = bucket_split((Bucket *)v, -1, (Bucket *)e);
501         PER_ALLOW_DEACTIVATION(v);
502 
503         if (i < 0)
504         {
505             Py_DECREF(e);
506             assert(PyErr_Occurred());
507             return -1;
508         }
509 
510         index++;
511         d++;
512         if (self->len > index)    /* Shift up the old values one array slot */
513             memmove(d+1, d, sizeof(BTreeItem)*(self->len-index));
514 
515         if (SameType_Check(self, v))
516         {
517             COPY_KEY(d->key, BTREE(e)->data->key);
518 
519             /* We take the unused reference from e, so there's no
520                 reason to INCREF!
521             */
522             /* INCREF_KEY(self->data[1].key); */
523         }
524         else
525         {
526             COPY_KEY(d->key, BUCKET(e)->keys[0]);
527             INCREF_KEY(d->key);
528         }
529         d->child = e;
530         self->len++;
531 
532         if (self->len >= max_size * 2)    /* the root is huge */
533             return BTree_split_root(self, noval);
534     }
535     else
536     {
537         /* The BTree is empty.  Create an empty bucket.  See CAUTION in
538         * the comments preceding.
539         */
540         assert(index == 0);
541         d = self->data;
542         d->child = BTree_newBucket(self);
543         if (d->child == NULL)
544             return -1;
545         self->len = 1;
546         Py_INCREF(d->child);
547         self->firstbucket = (Bucket *)d->child;
548     }
549 
550     return 0;
551 }
552 
553 /* Return the rightmost bucket reachable from following child pointers
554  * from self.  The caller gets a new reference to this bucket.  Note that
555  * bucket 'next' pointers are not followed:  if self is an interior node
556  * of a BTree, this returns the rightmost bucket in that node's subtree.
557  * In case of error, returns NULL.
558  *
559  * self must not be a ghost; this isn't checked.  The result may be a ghost.
560  *
561  * Pragmatics:  Note that the rightmost bucket's last key is the largest
562  * key in self's subtree.
563  */
564 static Bucket *
BTree_lastBucket(BTree * self)565 BTree_lastBucket(BTree *self)
566 {
567     Sized *pchild;
568     Bucket *result;
569 
570     UNLESS (self->data && self->len)
571     {
572         IndexError(-1); /* is this the best action to take? */
573         return NULL;
574     }
575 
576     pchild = self->data[self->len - 1].child;
577     if (SameType_Check(self, pchild))
578     {
579         self = BTREE(pchild);
580         PER_USE_OR_RETURN(self, NULL);
581         result = BTree_lastBucket(self);
582         PER_UNUSE(self);
583     }
584     else
585     {
586         Py_INCREF(pchild);
587         result = BUCKET(pchild);
588     }
589     return result;
590 }
591 
592 static int
BTree_deleteNextBucket(BTree * self)593 BTree_deleteNextBucket(BTree *self)
594 {
595     Bucket *b;
596 
597     UNLESS (PER_USE(self))
598         return -1;
599 
600     b = BTree_lastBucket(self);
601     if (b == NULL)
602         goto err;
603     if (Bucket_deleteNextBucket(b) < 0)
604         goto err;
605 
606     Py_DECREF(b);
607     PER_UNUSE(self);
608 
609     return 0;
610 
611 err:
612     Py_XDECREF(b);
613     PER_ALLOW_DEACTIVATION(self);
614     return -1;
615 }
616 
617 /*
618 ** _BTree_clear
619 **
620 ** Clears out all of the values in the BTree (firstbucket, keys, and children);
621 ** leaving self an empty BTree.
622 **
623 ** Arguments:    self    The BTree
624 **
625 ** Returns:     0    on success
626 **        -1    on failure
627 **
628 ** Internal:  Deallocation order is important.  The danger is that a long
629 ** list of buckets may get freed "at once" via decref'ing the first bucket,
630 ** in which case a chain of consequenct Py_DECREF calls may blow the stack.
631 ** Luckily, every bucket has a refcount of at least two, one due to being a
632 ** BTree node's child, and another either because it's not the first bucket in
633 ** the chain (so the preceding bucket points to it), or because firstbucket
634 ** points to it.  By clearing in the natural depth-first, left-to-right
635 ** order, the BTree->bucket child pointers prevent Py_DECREF(bucket->next)
636 ** calls from freeing bucket->next, and the maximum stack depth is equal
637 ** to the height of the tree.
638 **/
639 static int
_BTree_clear(BTree * self)640 _BTree_clear(BTree *self)
641 {
642     const int len = self->len;
643 
644     if (self->firstbucket)
645     {
646         /* Obscure:  The first bucket is pointed to at least by
647         * self->firstbucket and data[0].child of whichever BTree node it's
648         * a child of.  However, if persistence is enabled then the latter
649         * BTree node may be a ghost at this point, and so its pointers "don't
650         * count":  we can only rely on self's pointers being intact.
651         */
652 #ifdef PERSISTENT
653         ASSERT(Py_REFCNT(self->firstbucket) > 0,
654             "Invalid firstbucket pointer", -1);
655 #else
656         ASSERT(Py_REFCNT(self->firstbucket) > 1,
657             "Invalid firstbucket pointer", -1);
658 #endif
659         Py_DECREF(self->firstbucket);
660         self->firstbucket = NULL;
661     }
662 
663     if (self->data)
664     {
665         int i;
666         if (len > 0) /* 0 is special because key 0 is trash */
667         {
668             Py_DECREF(self->data[0].child);
669         }
670 
671         for (i = 1; i < len; i++)
672         {
673 #ifdef KEY_TYPE_IS_PYOBJECT
674             DECREF_KEY(self->data[i].key);
675 #endif
676             Py_DECREF(self->data[i].child);
677         }
678         free(self->data);
679         self->data = NULL;
680     }
681 
682     self->len = self->size = 0;
683     return 0;
684 }
685 
686 /*
687   Set (value != 0) or delete (value=0) a tree item.
688 
689   If unique is non-zero, then only change if the key is
690   new.
691 
692   If noval is non-zero, then don't set a value (the tree
693   is a set).
694 
695   Return:
696   -1  error
697   0  successful, and number of entries didn't change
698   >0  successful, and number of entries did change
699 
700   Internal
701   There are two distinct return values > 0:
702 
703   1  Successful, number of entries changed, but firstbucket did not go away.
704 
705   2  Successful, number of entries changed, firstbucket did go away.
706   This can only happen on a delete (value == NULL).  The caller may
707   need to change its own firstbucket pointer, and in any case *someone*
708   needs to adjust the 'next' pointer of the bucket immediately preceding
709   the bucket that went away (it needs to point to the bucket immediately
710   following the bucket that went away).
711 */
712 static int
_BTree_set(BTree * self,PyObject * keyarg,PyObject * value,int unique,int noval)713 _BTree_set(BTree *self, PyObject *keyarg, PyObject *value,
714            int unique, int noval)
715 {
716     int changed = 0;    /* did I mutate? */
717     int min;            /* index of child I searched */
718     BTreeItem *d;       /* self->data[min] */
719     int childlength;    /* len(self->data[min].child) */
720     int status;         /* our return value; and return value from callee */
721     int self_was_empty; /* was self empty at entry? */
722 
723     KEY_TYPE key;
724     int copied = 1;
725 
726     COPY_KEY_FROM_ARG(key, keyarg, copied);
727     if (!copied)
728         return -1;
729 #ifdef KEY_CHECK_ON_SET
730     if (value && !KEY_CHECK_ON_SET(keyarg))
731         return -1;
732 #endif
733 
734     PER_USE_OR_RETURN(self, -1);
735 
736     self_was_empty = self->len == 0;
737     if (self_was_empty)
738     {
739         /* We're empty.  Make room. */
740         if (value)
741         {
742             if (BTree_grow(self, 0, noval) < 0)
743                 goto Error;
744         }
745         else
746         {
747             /* Can't delete a key from an empty BTree. */
748             PyErr_SetObject(PyExc_KeyError, keyarg);
749             goto Error;
750         }
751     }
752 
753     /* Find the right child to search, and hand the work off to it. */
754     BTREE_SEARCH(min, self, key, goto Error);
755     d = self->data + min;
756 
757 #ifdef PERSISTENT
758     PER_READCURRENT(self, goto Error);
759 #endif
760 
761     if (SameType_Check(self, d->child))
762         status = _BTree_set(BTREE(d->child), keyarg, value, unique, noval);
763     else
764     {
765         int bucket_changed = 0;
766         status = _bucket_set(BUCKET(d->child), keyarg,
767                             value, unique, noval, &bucket_changed);
768 #ifdef PERSISTENT
769         /* If a BTree contains only a single bucket, BTree.__getstate__()
770         * includes the bucket's entire state, and the bucket doesn't get
771         * an oid of its own.  So if we have a single oid-less bucket that
772         * changed, it's *our* oid that should be marked as changed -- the
773         * bucket doesn't have one.
774         */
775         if (bucket_changed
776             && self->len == 1
777             && self->data[0].child->oid == NULL)
778         {
779             changed = 1;
780         }
781 #endif
782     }
783     if (status == 0)
784         goto Done;
785     if (status < 0)
786         goto Error;
787     assert(status == 1 || status == 2);
788 
789     /* The child changed size.  Get its new size.  Note that since the tree
790     * rooted at the child changed size, so did the tree rooted at self:
791     * our status must be >= 1 too.
792     */
793     UNLESS(PER_USE(d->child))
794         goto Error;
795     childlength = d->child->len;
796     PER_UNUSE(d->child);
797 
798     if (value)
799     {
800         /* A bucket got bigger -- if it's "too big", split it. */
801         int toobig;
802 
803         assert(status == 1);    /* can be 2 only on deletes */
804         if (SameType_Check(self, d->child)) {
805             long max_size = _max_internal_size(self);
806             if (max_size < 0) return -1;
807             toobig = childlength > max_size;
808         }
809         else {
810             long max_size = _max_leaf_size(self);
811             if (max_size < 0) return -1;
812             toobig = childlength > max_size;
813         }
814         if (toobig) {
815             if (BTree_grow(self, min, noval) < 0)
816                 goto Error;
817             changed = 1;        /* BTree_grow mutated self */
818         }
819         goto Done;      /* and status still == 1 */
820     }
821 
822     /* A bucket got smaller.  This is much harder, and despite that we
823     * don't try to rebalance the tree.
824     */
825 
826     if (min && childlength)
827     {  /* We removed a key. but the node child is non-empty.  If the
828         deleted key is the node key, then update the node key using
829         the smallest key of the node child.
830 
831         This doesn't apply to the 0th node, whos key is unused.
832         */
833         int _cmp = 1;
834         TEST_KEY_SET_OR(_cmp, key, d->key) goto Error;
835         if (_cmp == 0) /* Need to replace key with first key from child */
836         {
837             Bucket *bucket;
838 
839             if (SameType_Check(self, d->child))
840             {
841                 UNLESS(PER_USE(d->child))
842                     goto Error;
843                 bucket = BTREE(d->child)->firstbucket;
844                 PER_UNUSE(d->child);
845             }
846             else
847                 bucket = BUCKET(d->child);
848 
849             UNLESS(PER_USE(bucket))
850                 goto Error;
851             DECREF_KEY(d->key);
852             COPY_KEY(d->key, bucket->keys[0]);
853             INCREF_KEY(d->key);
854             PER_UNUSE(bucket);
855             if (PER_CHANGED(self) < 0)
856                     goto Error;
857         }
858     }
859 
860     if (status == 2)
861     {
862         /* The child must be a BTree because bucket.set never returns 2 */
863         /* Two problems to solve:  May have to adjust our own firstbucket,
864         * and the bucket that went away needs to get unlinked.
865         */
866         if (min)
867         {
868             /* This wasn't our firstbucket, so no need to adjust ours (note
869             * that it can't be the firstbucket of any node above us either).
870             * Tell "the tree to the left" to do the unlinking.
871             */
872             if (BTree_deleteNextBucket(BTREE(d[-1].child)) < 0)
873                 goto Error;
874             status = 1;     /* we solved the child's firstbucket problem */
875         }
876         else
877         {
878             /* This was our firstbucket.  Update to new firstbucket value. */
879             Bucket *nextbucket;
880             UNLESS(PER_USE(d->child))
881                 goto Error;
882             nextbucket = BTREE(d->child)->firstbucket;
883             PER_UNUSE(d->child);
884 
885             Py_XINCREF(nextbucket);
886             Py_DECREF(self->firstbucket);
887             self->firstbucket = nextbucket;
888             changed = 1;
889 
890             /* The caller has to do the unlinking -- we can't.  Also, since
891             * it was our firstbucket, it may also be theirs.
892             */
893             assert(status == 2);
894         }
895     }
896 
897     /* If the child isn't empty, we're done!  We did all that was possible for
898     * us to do with the firstbucket problems the child gave us, and since the
899     * child isn't empty don't create any new firstbucket problems of our own.
900     */
901     if (childlength)
902         goto Done;
903 
904     /* The child became empty:  we need to remove it from self->data.
905     * But first, if we're a bottom-level node, we've got more bucket-fiddling
906     * to set up.
907     */
908     if (! SameType_Check(self, d->child))
909     {
910         /* We're about to delete a bucket, so need to adjust bucket pointers. */
911         if (min)
912         {
913             /* It's not our first bucket, so we can tell the previous
914             * bucket to adjust its reference to it.  It can't be anyone
915             * else's first bucket either, so the caller needn't do anything.
916             */
917             if (Bucket_deleteNextBucket(BUCKET(d[-1].child)) < 0)
918                 goto Error;
919             /* status should be 1, and already is:  if it were 2, the
920             * block above would have set it to 1 in its min != 0 branch.
921             */
922             assert(status == 1);
923         }
924         else
925         {
926             Bucket *nextbucket;
927             /* It's our first bucket.  We can't unlink it directly. */
928             /* 'changed' will be set true by the deletion code following. */
929             UNLESS(PER_USE(d->child))
930                 goto Error;
931             nextbucket = BUCKET(d->child)->next;
932             PER_UNUSE(d->child);
933 
934             Py_XINCREF(nextbucket);
935             Py_DECREF(self->firstbucket);
936             self->firstbucket = nextbucket;
937 
938             status = 2; /* we're giving our caller a new firstbucket problem */
939         }
940     }
941 
942     /* Remove the child from self->data. */
943     Py_DECREF(d->child);
944 #ifdef KEY_TYPE_IS_PYOBJECT
945     if (min)
946     {
947         DECREF_KEY(d->key);
948     }
949     else if (self->len > 1)
950     {
951         /* We're deleting the first child of a BTree with more than one
952         * child.  The key at d+1 is about to be shifted into slot 0,
953         * and hence never to be referenced again (the key in slot 0 is
954         * trash).
955         */
956         DECREF_KEY((d+1)->key);
957     }
958     /* Else min==0 and len==1:  we're emptying the BTree entirely, and
959     * there is no key in need of decrefing.
960     */
961 #endif
962     --self->len;
963     if (min < self->len)
964         memmove(d, d+1, (self->len - min) * sizeof(BTreeItem));
965     changed = 1;
966 
967 Done:
968 #ifdef PERSISTENT
969     if (changed)
970     {
971         if (PER_CHANGED(self) < 0)
972             goto Error;
973     }
974 #endif
975     PER_UNUSE(self);
976     return status;
977 
978 Error:
979     assert(PyErr_Occurred());
980     if (self_was_empty)
981     {
982         /* BTree_grow may have left the BTree in an invalid state.  Make
983         * sure the tree is a legitimate empty tree.
984         */
985         _BTree_clear(self);
986     }
987     PER_UNUSE(self);
988     return -1;
989 }
990 
991 /*
992 ** BTree_setitem
993 **
994 ** wrapper for _BTree_set
995 **
996 ** Arguments:    self    The BTree
997 **        key    The key to insert
998 **        v    The value to insert
999 **
1000 ** Returns    -1    on failure
1001 **         0    on success
1002 */
1003 static int
BTree_setitem(BTree * self,PyObject * key,PyObject * v)1004 BTree_setitem(BTree *self, PyObject *key, PyObject *v)
1005 {
1006     if (_BTree_set(self, key, v, 0, 0) < 0)
1007         return -1;
1008     return 0;
1009 }
1010 
1011 #ifdef PERSISTENT
1012 static PyObject *
BTree__p_deactivate(BTree * self,PyObject * args,PyObject * keywords)1013 BTree__p_deactivate(BTree *self, PyObject *args, PyObject *keywords)
1014 {
1015     int ghostify = 1;
1016     PyObject *force = NULL;
1017 
1018     if (args && PyTuple_GET_SIZE(args) > 0)
1019     {
1020         PyErr_SetString(PyExc_TypeError,
1021                         "_p_deactivate takes not positional arguments");
1022         return NULL;
1023     }
1024     if (keywords)
1025     {
1026         int size = PyDict_Size(keywords);
1027         force = PyDict_GetItemString(keywords, "force");
1028         if (force)
1029             size--;
1030         if (size)
1031         {
1032             PyErr_SetString(PyExc_TypeError,
1033                             "_p_deactivate only accepts keyword arg force");
1034             return NULL;
1035         }
1036     }
1037 
1038     /*
1039       Always clear our node size cache, whether we're in a jar or not. It is
1040       only read from the type anyway, and we'll do so on the next write after
1041       we get activated.
1042     */
1043     self->max_internal_size = 0;
1044     self->max_leaf_size = 0;
1045 
1046     if (self->jar && self->oid)
1047     {
1048         ghostify = self->state == cPersistent_UPTODATE_STATE;
1049         if (!ghostify && force)
1050         {
1051             if (PyObject_IsTrue(force))
1052                 ghostify = 1;
1053             if (PyErr_Occurred())
1054                 return NULL;
1055         }
1056         if (ghostify)
1057         {
1058             if (_BTree_clear(self) < 0)
1059                 return NULL;
1060             PER_GHOSTIFY(self);
1061         }
1062     }
1063 
1064     Py_INCREF(Py_None);
1065     return Py_None;
1066 }
1067 #endif
1068 
1069 static PyObject *
BTree_clear(BTree * self)1070 BTree_clear(BTree *self)
1071 {
1072     UNLESS (PER_USE(self)) return NULL;
1073 
1074     if (self->len)
1075     {
1076         if (_BTree_clear(self) < 0)
1077             goto err;
1078         if (PER_CHANGED(self) < 0)
1079             goto err;
1080     }
1081 
1082     PER_UNUSE(self);
1083 
1084     Py_INCREF(Py_None);
1085     return Py_None;
1086 
1087 err:
1088     PER_UNUSE(self);
1089     return NULL;
1090 }
1091 
1092 /*
1093  * Return:
1094  *
1095  * For an empty BTree (self->len == 0), None.
1096  *
1097  * For a BTree with one child (self->len == 1), and that child is a bucket,
1098  * and that bucket has a NULL oid, a one-tuple containing a one-tuple
1099  * containing the bucket's state:
1100  *
1101  *     (
1102  *         (
1103  *              child[0].__getstate__(),
1104  *         ),
1105  *     )
1106  *
1107  * Else a two-tuple.  The first element is a tuple interleaving the BTree's
1108  * keys and direct children, of size 2*self->len - 1 (key[0] is unused and
1109  * is not saved).  The second element is the firstbucket:
1110  *
1111  *     (
1112  *          (child[0], key[1], child[1], key[2], child[2], ...,
1113  *                                       key[len-1], child[len-1]),
1114  *          self->firstbucket
1115  *     )
1116  *
1117  * In the above, key[i] means self->data[i].key, and similarly for child[i].
1118  */
1119 static PyObject *
BTree_getstate(BTree * self)1120 BTree_getstate(BTree *self)
1121 {
1122     PyObject *r = NULL;
1123     PyObject *o;
1124     int i, l;
1125 
1126     UNLESS (PER_USE(self))
1127         return NULL;
1128 
1129     if (self->len)
1130     {
1131         r = PyTuple_New(self->len * 2 - 1);
1132         if (r == NULL)
1133             goto err;
1134 
1135         if (self->len == 1
1136             && Py_TYPE(self->data->child) != Py_TYPE(self)
1137 #ifdef PERSISTENT
1138             && BUCKET(self->data->child)->oid == NULL
1139 #endif
1140             )
1141         {
1142             /* We have just one bucket. Save its data directly. */
1143             o = bucket_getstate((Bucket *)self->data->child);
1144             if (o == NULL)
1145                 goto err;
1146             PyTuple_SET_ITEM(r, 0, o);
1147             ASSIGN(r, Py_BuildValue("(O)", r));
1148         }
1149         else
1150         {
1151             for (i=0, l=0; i < self->len; i++)
1152             {
1153                 if (i)
1154                 {
1155                     COPY_KEY_TO_OBJECT(o, self->data[i].key);
1156                     PyTuple_SET_ITEM(r, l, o);
1157                     l++;
1158                 }
1159                 o = (PyObject *)self->data[i].child;
1160                 Py_INCREF(o);
1161                 PyTuple_SET_ITEM(r,l,o);
1162                 l++;
1163             }
1164             ASSIGN(r, Py_BuildValue("OO", r, self->firstbucket));
1165         }
1166 
1167     }
1168     else
1169     {
1170         r = Py_None;
1171         Py_INCREF(r);
1172     }
1173 
1174     PER_UNUSE(self);
1175 
1176     return r;
1177 
1178 err:
1179     PER_UNUSE(self);
1180     Py_XDECREF(r);
1181     return NULL;
1182 }
1183 
1184 static int
_BTree_setstate(BTree * self,PyObject * state,int noval)1185 _BTree_setstate(BTree *self, PyObject *state, int noval)
1186 {
1187     PyObject *items, *firstbucket = NULL;
1188     BTreeItem *d;
1189     int len, l, i, copied=1;
1190     PyTypeObject *leaftype = (noval ? &SetType : &BucketType);
1191 
1192     if (_BTree_clear(self) < 0)
1193         return -1;
1194 
1195     /* The state of a BTree can be one of the following:
1196         None -- an empty BTree
1197         A one-tuple -- a single bucket btree
1198         A two-tuple -- a BTree with more than one bucket
1199         See comments for BTree_getstate() for the details.
1200     */
1201 
1202     if (state == Py_None)
1203         return 0;
1204 
1205     if (!PyArg_ParseTuple(state, "O|O:__setstate__", &items, &firstbucket))
1206         return -1;
1207 
1208     if (!PyTuple_Check(items))
1209     {
1210         PyErr_SetString(PyExc_TypeError,
1211                         "tuple required for first state element");
1212         return -1;
1213     }
1214 
1215     len = PyTuple_Size(items);
1216     ASSERT(len >= 0, "_BTree_setstate: items tuple has negative size", -1);
1217     len = (len + 1) / 2;
1218 
1219     assert(len > 0); /* If the BTree is empty, it's state is None. */
1220     assert(self->size == 0); /* We called _BTree_clear(). */
1221 
1222     self->data = BTree_Malloc(sizeof(BTreeItem) * len);
1223     if (self->data == NULL)
1224         return -1;
1225     self->size = len;
1226 
1227     for (i = 0, d = self->data, l = 0; i < len; i++, d++)
1228     {
1229         PyObject *v;
1230         if (i)
1231         { /* skip the first key slot */
1232             COPY_KEY_FROM_ARG(d->key, PyTuple_GET_ITEM(items, l), copied);
1233             l++;
1234             if (!copied)
1235                 return -1;
1236             INCREF_KEY(d->key);
1237         }
1238         v = PyTuple_GET_ITEM(items, l);
1239         if (PyTuple_Check(v))
1240         {
1241             /* Handle the special case in __getstate__() for a BTree
1242                 with a single bucket. */
1243             d->child = BTree_newBucket(self);
1244             if (!d->child)
1245                 return -1;
1246             if (noval)
1247             {
1248                 if (_set_setstate(BUCKET(d->child), v) < 0)
1249                 return -1;
1250             }
1251             else
1252             {
1253                 if (_bucket_setstate(BUCKET(d->child), v) < 0)
1254                 return -1;
1255             }
1256         }
1257         else
1258         {
1259             if (!(SameType_Check(self, v) ||
1260                   PyObject_IsInstance(v, (PyObject *)leaftype)))
1261             {
1262                 PyErr_Format(PyExc_TypeError,
1263                              "tree child %s is neither %s nor %s",
1264                              Py_TYPE(v)->tp_name,
1265                              Py_TYPE(self)->tp_name,
1266                              leaftype->tp_name);
1267                 return -1;
1268             }
1269 
1270             d->child = (Sized *)v;
1271             Py_INCREF(v);
1272         }
1273         l++;
1274     }
1275 
1276     if (!firstbucket)
1277         firstbucket = (PyObject *)self->data->child;
1278 
1279     if (!PyObject_IsInstance(firstbucket, (PyObject *)leaftype))
1280     {
1281         PyErr_SetString(PyExc_TypeError,
1282                         "No firstbucket in non-empty BTree");
1283         return -1;
1284     }
1285     self->firstbucket = BUCKET(firstbucket);
1286     Py_INCREF(firstbucket);
1287 #ifndef PERSISTENT
1288     /* firstbucket is also the child of some BTree node, but that node may
1289     * be a ghost if persistence is enabled.
1290     */
1291     assert(Py_REFCNT(self->firstbucket) > 1);
1292 #endif
1293     self->len = len;
1294 
1295     return 0;
1296 }
1297 
1298 static PyObject *
BTree_setstate(BTree * self,PyObject * arg)1299 BTree_setstate(BTree *self, PyObject *arg)
1300 {
1301     int r;
1302 
1303     PER_PREVENT_DEACTIVATION(self);
1304     r = _BTree_setstate(self, arg, 0);
1305     PER_UNUSE(self);
1306 
1307     if (r < 0)
1308         return NULL;
1309     Py_INCREF(Py_None);
1310     return Py_None;
1311 }
1312 
1313 #ifdef PERSISTENT
1314 
1315 /* Recognize the special cases of a BTree that's empty or contains a single
1316  * bucket.  In the former case, return a borrowed reference to Py_None.
1317  * In this single-bucket case, the bucket state is embedded directly in the
1318  * BTree state, like so:
1319  *
1320  *     (
1321  *         (
1322  *              thebucket.__getstate__(),
1323  *         ),
1324  *     )
1325  *
1326  * When this obtains, return a borrowed reference to thebucket.__getstate__().
1327  * Else return NULL with an exception set.  The exception should always be
1328  * ConflictError then, but may be TypeError if the state makes no sense at all
1329  * for a BTree (corrupted or hostile state).
1330  */
1331 PyObject *
get_bucket_state(PyObject * t)1332 get_bucket_state(PyObject *t)
1333 {
1334     if (t == Py_None)
1335         return Py_None;        /* an empty BTree */
1336     if (! PyTuple_Check(t))
1337     {
1338         PyErr_SetString(PyExc_TypeError,
1339                         "_p_resolveConflict: expected tuple or None for state");
1340         return NULL;
1341     }
1342 
1343     if (PyTuple_GET_SIZE(t) == 2)
1344     {
1345         /* A non-degenerate BTree. */
1346         return merge_error(-1, -1, -1, 11);
1347     }
1348 
1349     /* We're in the one-bucket case. */
1350 
1351     if (PyTuple_GET_SIZE(t) != 1)
1352     {
1353         PyErr_SetString(PyExc_TypeError,
1354                         "_p_resolveConflict: expected 1- or 2-tuple for state");
1355         return NULL;
1356     }
1357 
1358     t = PyTuple_GET_ITEM(t, 0);
1359     if (! PyTuple_Check(t) || PyTuple_GET_SIZE(t) != 1)
1360     {
1361         PyErr_SetString(PyExc_TypeError,
1362                         "_p_resolveConflict: expected 1-tuple containing "
1363                         "bucket state");
1364         return NULL;
1365     }
1366 
1367     t = PyTuple_GET_ITEM(t, 0);
1368     if (! PyTuple_Check(t))
1369     {
1370         PyErr_SetString(PyExc_TypeError,
1371                         "_p_resolveConflict: expected tuple for bucket state");
1372         return NULL;
1373     }
1374 
1375     return t;
1376 }
1377 
1378 /* Tricky.  The only kind of BTree conflict we can actually potentially
1379  * resolve is the special case of a BTree containing a single bucket,
1380  * in which case this becomes a fancy way of calling the bucket conflict
1381  * resolution code.
1382  */
1383 static PyObject *
BTree__p_resolveConflict(BTree * self,PyObject * args)1384 BTree__p_resolveConflict(BTree *self, PyObject *args)
1385 {
1386     PyObject *s[3];
1387     PyObject *x, *y, *z;
1388 
1389     if (!PyArg_ParseTuple(args, "OOO", &x, &y, &z))
1390         return NULL;
1391 
1392     s[0] = get_bucket_state(x);
1393     if (s[0] == NULL)
1394         return NULL;
1395     s[1] = get_bucket_state(y);
1396     if (s[1] == NULL)
1397         return NULL;
1398     s[2] = get_bucket_state(z);
1399     if (s[2] == NULL)
1400         return NULL;
1401 
1402     if (PyObject_IsInstance((PyObject *)self, (PyObject *)&BTreeType))
1403         x = _bucket__p_resolveConflict(OBJECT(&BucketType), s);
1404     else
1405         x = _bucket__p_resolveConflict(OBJECT(&SetType), s);
1406 
1407     if (x == NULL)
1408         return NULL;
1409 
1410     return Py_BuildValue("((N))", x);
1411 }
1412 #endif
1413 
1414 /*
1415   BTree_findRangeEnd -- Find one end, expressed as a bucket and
1416   position, for a range search.
1417 
1418   If low, return bucket and index of the smallest item >= key,
1419   otherwise return bucket and index of the largest item <= key.
1420 
1421   If exclude_equal, exact matches aren't acceptable; if one is found,
1422   move right if low, or left if !low (this is for range searches exclusive
1423   of an endpoint).
1424 
1425   Return:
1426   -1      Error; offset and bucket unchanged
1427   0      Not found; offset and bucket unchanged
1428   1      Correct bucket and offset stored; the caller owns a new reference
1429   to the bucket.
1430 
1431   Internal:
1432   We do binary searches in BTree nodes downward, at each step following
1433   C(i) where K(i) <= key < K(i+1).  As always, K(i) <= C(i) < K(i+1) too.
1434   (See Maintainer.txt for the meaning of that notation.)  That eventually
1435   leads to a bucket where we do Bucket_findRangeEnd.  That usually works,
1436   but there are two cases where it can fail to find the correct answer:
1437 
1438   1. On a low search, we find a bucket with keys >= K(i), but that doesn't
1439   imply there are keys in the bucket >= key.  For example, suppose
1440   a bucket has keys in 1..100, its successor's keys are in 200..300, and
1441   we're doing a low search on 150.  We'll end up in the first bucket,
1442   but there are no keys >= 150 in it.  K(i+1) > key, though, and all
1443   the keys in C(i+1) >= K(i+1) > key, so the first key in the next
1444   bucket (if any) is the correct result.  This is easy to find by
1445   following the bucket 'next' pointer.
1446 
1447   2. On a high search, again that the keys in the bucket are >= K(i)
1448   doesn't imply that any key in the bucket is <= key, but it's harder
1449   for this to fail (and an earlier version of this routine didn't
1450   catch it):  if K(i) itself is in the bucket, it works (then
1451   K(i) <= key is *a* key in the bucket that's in the desired range).
1452   But when keys get deleted from buckets, they aren't also deleted from
1453   BTree nodes, so there's no guarantee that K(i) is in the bucket.
1454   For example, delete the smallest key S from some bucket, and S
1455   remains in the interior BTree nodes.  Do a high search for S, and
1456   the BTree nodes direct the search to the bucket S used to be in,
1457   but all keys remaining in that bucket are > S.  The largest key in
1458   the *preceding* bucket (if any) is < K(i), though, and K(i) <= key,
1459   so the largest key in the preceding bucket is < key and so is the
1460   proper result.
1461 
1462   This is harder to get at efficiently, as buckets are linked only in
1463   the increasing direction.  While we're searching downward,
1464   deepest_smaller is set to the  node deepest in the tree where
1465   we *could* have gone to the left of C(i).  The rightmost bucket in
1466   deepest_smaller's subtree is the bucket preceding the bucket we find
1467   at first.  This is clumsy to get at, but efficient.
1468 */
1469 static int
BTree_findRangeEnd(BTree * self,PyObject * keyarg,int low,int exclude_equal,Bucket ** bucket,int * offset)1470 BTree_findRangeEnd(BTree *self, PyObject *keyarg, int low, int exclude_equal,
1471                    Bucket **bucket, int *offset)
1472 {
1473     Sized *deepest_smaller = NULL;      /* last possibility to move left */
1474     int deepest_smaller_is_btree = 0;   /* Boolean; if false, it's a bucket */
1475     Bucket *pbucket;
1476     int self_got_rebound = 0;   /* Boolean; when true, deactivate self */
1477     int result = -1;            /* Until proven innocent */
1478     int i;
1479     KEY_TYPE key;
1480     int copied = 1;
1481 
1482     COPY_KEY_FROM_ARG(key, keyarg, copied);
1483     UNLESS (copied)
1484         return -1;
1485 
1486     /* We don't need to: PER_USE_OR_RETURN(self, -1);
1487         because the caller does. */
1488     UNLESS (self->data && self->len)
1489         return 0;
1490 
1491     /* Search downward until hitting a bucket, stored in pbucket. */
1492     for (;;)
1493     {
1494         Sized *pchild;
1495         int pchild_is_btree;
1496 
1497         BTREE_SEARCH(i, self, key, goto Done);
1498         pchild = self->data[i].child;
1499         pchild_is_btree = SameType_Check(self, pchild);
1500         if (i)
1501         {
1502             deepest_smaller = self->data[i-1].child;
1503             deepest_smaller_is_btree = pchild_is_btree;
1504         }
1505 
1506         if (pchild_is_btree)
1507         {
1508             if (self_got_rebound)
1509             {
1510                 PER_UNUSE(self);
1511             }
1512             self = BTREE(pchild);
1513             self_got_rebound = 1;
1514             PER_USE_OR_RETURN(self, -1);
1515         }
1516         else
1517         {
1518             pbucket = BUCKET(pchild);
1519             break;
1520         }
1521     }
1522 
1523     /* Search the bucket for a suitable key. */
1524     i = Bucket_findRangeEnd(pbucket, keyarg, low, exclude_equal, offset);
1525     if (i < 0)
1526         goto Done;
1527     if (i > 0)
1528     {
1529         Py_INCREF(pbucket);
1530         *bucket = pbucket;
1531         result = 1;
1532         goto Done;
1533     }
1534     /* This may be one of the two difficult cases detailed in the comments. */
1535     if (low)
1536     {
1537         Bucket *next;
1538 
1539         UNLESS(PER_USE(pbucket)) goto Done;
1540         next = pbucket->next;
1541         if (next) {
1542         result = 1;
1543         Py_INCREF(next);
1544         *bucket = next;
1545         *offset = 0;
1546         }
1547         else
1548         result = 0;
1549         PER_UNUSE(pbucket);
1550     }
1551     /* High-end search:  if it's possible to go left, do so. */
1552     else if (deepest_smaller)
1553     {
1554         if (deepest_smaller_is_btree)
1555         {
1556             UNLESS(PER_USE(deepest_smaller))
1557                 goto Done;
1558             /* We own the reference this returns. */
1559             pbucket = BTree_lastBucket(BTREE(deepest_smaller));
1560             PER_UNUSE(deepest_smaller);
1561             if (pbucket == NULL)
1562                     goto Done;   /* error */
1563         }
1564         else
1565         {
1566             pbucket = BUCKET(deepest_smaller);
1567             Py_INCREF(pbucket);
1568         }
1569         UNLESS(PER_USE(pbucket))
1570             goto Done;
1571         result = 1;
1572         *bucket = pbucket;  /* transfer ownership to caller */
1573         *offset = pbucket->len - 1;
1574         PER_UNUSE(pbucket);
1575     }
1576     else
1577         result = 0;     /* simply not found */
1578 
1579 Done:
1580     if (self_got_rebound)
1581     {
1582         PER_UNUSE(self);
1583     }
1584     return result;
1585 }
1586 
1587 static PyObject *
BTree_maxminKey(BTree * self,PyObject * args,int min)1588 BTree_maxminKey(BTree *self, PyObject *args, int min)
1589 {
1590     PyObject *key=0;
1591     Bucket *bucket = NULL;
1592     int offset, rc;
1593     int empty_tree = 1;
1594 
1595     UNLESS (PyArg_ParseTuple(args, "|O", &key))
1596         return NULL;
1597 
1598     UNLESS (PER_USE(self))
1599         return NULL;
1600 
1601     UNLESS (self->data && self->len)
1602         goto empty;
1603 
1604     /* Find the  range */
1605 
1606     if (key && key != Py_None)
1607     {
1608         if ((rc = BTree_findRangeEnd(self, key, min, 0, &bucket, &offset)) <= 0)
1609         {
1610             if (rc < 0)
1611                 goto err;
1612             empty_tree = 0;
1613             goto empty;
1614         }
1615         PER_UNUSE(self);
1616         UNLESS (PER_USE(bucket))
1617         {
1618             Py_DECREF(bucket);
1619             return NULL;
1620         }
1621     }
1622     else if (min)
1623     {
1624         bucket = self->firstbucket;
1625         PER_UNUSE(self);
1626         PER_USE_OR_RETURN(bucket, NULL);
1627         Py_INCREF(bucket);
1628         offset = 0;
1629     }
1630     else
1631     {
1632         bucket = BTree_lastBucket(self);
1633         PER_UNUSE(self);
1634         UNLESS (PER_USE(bucket))
1635         {
1636             Py_DECREF(bucket);
1637             return NULL;
1638         }
1639         assert(bucket->len);
1640         offset = bucket->len - 1;
1641     }
1642 
1643     COPY_KEY_TO_OBJECT(key, bucket->keys[offset]);
1644     PER_UNUSE(bucket);
1645     Py_DECREF(bucket);
1646 
1647     return key;
1648 
1649 empty:
1650     PyErr_SetString(PyExc_ValueError,
1651                     empty_tree ? "empty tree" :
1652                     "no key satisfies the conditions");
1653 err:
1654     PER_UNUSE(self);
1655     if (bucket)
1656     {
1657         PER_UNUSE(bucket);
1658         Py_DECREF(bucket);
1659     }
1660   return NULL;
1661 }
1662 
1663 static PyObject *
BTree_minKey(BTree * self,PyObject * args)1664 BTree_minKey(BTree *self, PyObject *args)
1665 {
1666     return BTree_maxminKey(self, args, 1);
1667 }
1668 
1669 static PyObject *
BTree_maxKey(BTree * self,PyObject * args)1670 BTree_maxKey(BTree *self, PyObject *args)
1671 {
1672     return BTree_maxminKey(self, args, 0);
1673 }
1674 
1675 /*
1676 ** BTree_rangeSearch
1677 **
1678 ** Generates a BTreeItems object based on the two indexes passed in,
1679 ** being the range between them.
1680 **
1681 */
1682 static PyObject *
BTree_rangeSearch(BTree * self,PyObject * args,PyObject * kw,char type)1683 BTree_rangeSearch(BTree *self, PyObject *args, PyObject *kw, char type)
1684 {
1685     PyObject *min = Py_None;
1686     PyObject *max = Py_None;
1687     int excludemin = 0;
1688     int excludemax = 0;
1689     int rc;
1690     Bucket *lowbucket = NULL;
1691     Bucket *highbucket = NULL;
1692     int lowoffset;
1693     int highoffset;
1694     PyObject *result;
1695 
1696     if (args)
1697     {
1698         if (! PyArg_ParseTupleAndKeywords(args, kw, "|OOii", search_keywords,
1699                                         &min,
1700                                         &max,
1701                                         &excludemin,
1702                                         &excludemax))
1703         return NULL;
1704     }
1705 
1706     UNLESS (PER_USE(self))
1707         return NULL;
1708 
1709     UNLESS (self->data && self->len)
1710         goto empty;
1711 
1712     /* Find the low range */
1713     if (min != Py_None)
1714     {
1715         if ((rc = BTree_findRangeEnd(self, min, 1, excludemin,
1716                                     &lowbucket, &lowoffset)) <= 0)
1717         {
1718             if (rc < 0)
1719                 goto err;
1720             goto empty;
1721         }
1722     }
1723     else
1724     {
1725         lowbucket = self->firstbucket;
1726         lowoffset = 0;
1727         if (excludemin)
1728         {
1729             int bucketlen;
1730             UNLESS (PER_USE(lowbucket))
1731                 goto err;
1732             bucketlen = lowbucket->len;
1733             PER_UNUSE(lowbucket);
1734             if (bucketlen > 1)
1735                 lowoffset = 1;
1736             else if (self->len < 2)
1737                 goto empty;
1738             else
1739             {    /* move to first item in next bucket */
1740                 Bucket *next;
1741                 UNLESS (PER_USE(lowbucket))
1742                     goto err;
1743                 next = lowbucket->next;
1744                 PER_UNUSE(lowbucket);
1745                 assert(next != NULL);
1746                 lowbucket = next;
1747                 /* and lowoffset is still 0 */
1748                 assert(lowoffset == 0);
1749             }
1750         }
1751         Py_INCREF(lowbucket);
1752     }
1753 
1754     /* Find the high range */
1755     if (max != Py_None)
1756     {
1757         if ((rc = BTree_findRangeEnd(self, max, 0, excludemax,
1758                                     &highbucket, &highoffset)) <= 0)
1759         {
1760             Py_DECREF(lowbucket);
1761             if (rc < 0)
1762                 goto err;
1763             goto empty;
1764         }
1765     }
1766     else
1767     {
1768         int bucketlen;
1769         highbucket = BTree_lastBucket(self);
1770         assert(highbucket != NULL);  /* we know self isn't empty */
1771         UNLESS (PER_USE(highbucket))
1772             goto err_and_decref_buckets;
1773         bucketlen = highbucket->len;
1774         PER_UNUSE(highbucket);
1775         highoffset = bucketlen - 1;
1776         if (excludemax)
1777         {
1778             if (highoffset > 0)
1779                 --highoffset;
1780             else if (self->len < 2)
1781                 goto empty_and_decref_buckets;
1782             else /* move to last item of preceding bucket */
1783             {
1784                 int status;
1785                 assert(highbucket != self->firstbucket);
1786                 Py_DECREF(highbucket);
1787                 status = PreviousBucket(&highbucket, self->firstbucket);
1788                 if (status < 0)
1789                 {
1790                     Py_DECREF(lowbucket);
1791                     goto err;
1792                 }
1793                 assert(status > 0);
1794                 Py_INCREF(highbucket);
1795                 UNLESS (PER_USE(highbucket))
1796                     goto err_and_decref_buckets;
1797                 highoffset = highbucket->len - 1;
1798                 PER_UNUSE(highbucket);
1799             }
1800         }
1801         assert(highoffset >= 0);
1802     }
1803 
1804     /* It's still possible that the range is empty, even if min < max.  For
1805     * example, if min=3 and max=4, and 3 and 4 aren't in the BTree, but 2 and
1806     * 5 are, then the low position points to the 5 now and the high position
1807     * points to the 2 now.  They're not necessarily even in the same bucket,
1808     * so there's no trick we can play with pointer compares to get out
1809     * cheap in general.
1810     */
1811     if (lowbucket == highbucket && lowoffset > highoffset)
1812         goto empty_and_decref_buckets;      /* definitely empty */
1813 
1814     /* The buckets differ, or they're the same and the offsets show a non-
1815     * empty range.
1816     */
1817     if (min != Py_None && max != Py_None && /* both args user-supplied */
1818         lowbucket != highbucket)   /* and different buckets */
1819     {
1820         KEY_TYPE first;
1821         KEY_TYPE last;
1822         int cmp;
1823 
1824         /* Have to check the hard way:  see how the endpoints compare. */
1825         UNLESS (PER_USE(lowbucket))
1826             goto err_and_decref_buckets;
1827         COPY_KEY(first, lowbucket->keys[lowoffset]);
1828         PER_UNUSE(lowbucket);
1829 
1830         UNLESS (PER_USE(highbucket))
1831             goto err_and_decref_buckets;
1832         COPY_KEY(last, highbucket->keys[highoffset]);
1833         PER_UNUSE(highbucket);
1834 
1835         TEST_KEY_SET_OR(cmp, first, last)
1836             goto err_and_decref_buckets;
1837         if (cmp > 0)
1838                 goto empty_and_decref_buckets;
1839     }
1840 
1841     PER_UNUSE(self);
1842 
1843     result = newBTreeItems(type, lowbucket, lowoffset, highbucket, highoffset);
1844     Py_DECREF(lowbucket);
1845     Py_DECREF(highbucket);
1846     return result;
1847 
1848 err_and_decref_buckets:
1849     Py_DECREF(lowbucket);
1850     Py_DECREF(highbucket);
1851 
1852 err:
1853     PER_UNUSE(self);
1854     return NULL;
1855 
1856 empty_and_decref_buckets:
1857     Py_DECREF(lowbucket);
1858     Py_DECREF(highbucket);
1859 
1860 empty:
1861     PER_UNUSE(self);
1862     return newBTreeItems(type, 0, 0, 0, 0);
1863 }
1864 
1865 /*
1866 ** BTree_keys
1867 */
1868 static PyObject *
BTree_keys(BTree * self,PyObject * args,PyObject * kw)1869 BTree_keys(BTree *self, PyObject *args, PyObject *kw)
1870 {
1871     return BTree_rangeSearch(self, args, kw, 'k');
1872 }
1873 
1874 /*
1875 ** BTree_values
1876 */
1877 static PyObject *
BTree_values(BTree * self,PyObject * args,PyObject * kw)1878 BTree_values(BTree *self, PyObject *args, PyObject *kw)
1879 {
1880     return BTree_rangeSearch(self, args, kw, 'v');
1881 }
1882 
1883 /*
1884 ** BTree_items
1885 */
1886 static PyObject *
BTree_items(BTree * self,PyObject * args,PyObject * kw)1887 BTree_items(BTree *self, PyObject *args, PyObject *kw)
1888 {
1889     return BTree_rangeSearch(self, args, kw, 'i');
1890 }
1891 
1892 static PyObject *
BTree_byValue(BTree * self,PyObject * omin)1893 BTree_byValue(BTree *self, PyObject *omin)
1894 {
1895     PyObject *r=0, *o=0, *item=0;
1896     VALUE_TYPE min;
1897     VALUE_TYPE v;
1898     int copied=1;
1899     SetIteration it = {0, 0, 1};
1900 
1901     UNLESS (PER_USE(self))
1902         return NULL;
1903 
1904     COPY_VALUE_FROM_ARG(min, omin, copied);
1905     UNLESS(copied)
1906         return NULL;
1907 
1908     UNLESS (r=PyList_New(0))
1909         goto err;
1910 
1911     it.set=BTree_rangeSearch(self, NULL, NULL, 'i');
1912     UNLESS(it.set)
1913         goto err;
1914 
1915     if (nextBTreeItems(&it) < 0)
1916         goto err;
1917 
1918     while (it.position >= 0)
1919     {
1920         if (TEST_VALUE(it.value, min) >= 0)
1921         {
1922             UNLESS (item = PyTuple_New(2))
1923                 goto err;
1924 
1925             COPY_KEY_TO_OBJECT(o, it.key);
1926             UNLESS (o)
1927                 goto err;
1928             PyTuple_SET_ITEM(item, 1, o);
1929 
1930             COPY_VALUE(v, it.value);
1931             NORMALIZE_VALUE(v, min);
1932             COPY_VALUE_TO_OBJECT(o, v);
1933             DECREF_VALUE(v);
1934             UNLESS (o)
1935                 goto err;
1936             PyTuple_SET_ITEM(item, 0, o);
1937 
1938             if (PyList_Append(r, item) < 0)
1939                 goto err;
1940             Py_DECREF(item);
1941             item = 0;
1942         }
1943         if (nextBTreeItems(&it) < 0)
1944             goto err;
1945     }
1946 
1947     item=PyObject_GetAttr(r,sort_str);
1948     UNLESS (item)
1949         goto err;
1950     ASSIGN(item, PyObject_CallObject(item, NULL));
1951     UNLESS (item)
1952         goto err;
1953     ASSIGN(item, PyObject_GetAttr(r, reverse_str));
1954     UNLESS (item)
1955         goto err;
1956     ASSIGN(item, PyObject_CallObject(item, NULL));
1957     UNLESS (item)
1958         goto err;
1959     Py_DECREF(item);
1960 
1961     finiSetIteration(&it);
1962     PER_UNUSE(self);
1963     return r;
1964 
1965 err:
1966     PER_UNUSE(self);
1967     Py_XDECREF(r);
1968     finiSetIteration(&it);
1969     Py_XDECREF(item);
1970     return NULL;
1971 }
1972 
1973 /*
1974 ** BTree_getm
1975 */
1976 static PyObject *
BTree_getm(BTree * self,PyObject * args)1977 BTree_getm(BTree *self, PyObject *args)
1978 {
1979     PyObject *key, *d=Py_None, *r;
1980 
1981     UNLESS (PyArg_ParseTuple(args, "O|O", &key, &d))
1982         return NULL;
1983     if ((r=_BTree_get(self, key, 0, _BGET_REPLACE_TYPE_ERROR)))
1984         return r;
1985     UNLESS (BTree_ShouldSuppressKeyError())
1986         return NULL;
1987     PyErr_Clear();
1988     Py_INCREF(d);
1989     return d;
1990 }
1991 
1992 static PyObject *
BTree_setdefault(BTree * self,PyObject * args)1993 BTree_setdefault(BTree *self, PyObject *args)
1994 {
1995     PyObject *key;
1996     PyObject *failobj; /* default */
1997     PyObject *value;   /* return value */
1998 
1999     if (! PyArg_UnpackTuple(args, "setdefault", 2, 2, &key, &failobj))
2000         return NULL;
2001 
2002     value = _BTree_get(self, key, 0, _BGET_ALLOW_TYPE_ERROR);
2003     if (value != NULL)
2004         return value;
2005 
2006     /* The key isn't in the tree.  If that's not due to a KeyError exception,
2007     * pass back the unexpected exception.
2008     */
2009     if (! BTree_ShouldSuppressKeyError())
2010         return NULL;
2011     PyErr_Clear();
2012 
2013     /* Associate `key` with `failobj` in the tree, and return `failobj`. */
2014     value = failobj;
2015     if (_BTree_set(self, key, failobj, 0, 0) < 0)
2016         value = NULL;
2017     Py_XINCREF(value);
2018     return value;
2019 }
2020 
2021 /* forward declaration */
2022 static Py_ssize_t
2023 BTree_length_or_nonzero(BTree *self, int nonzero);
2024 
2025 static PyObject *
BTree_pop(BTree * self,PyObject * args)2026 BTree_pop(BTree *self, PyObject *args)
2027 {
2028     PyObject *key;
2029     PyObject *failobj = NULL; /* default */
2030     PyObject *value;          /* return value */
2031 
2032     if (! PyArg_UnpackTuple(args, "pop", 1, 2, &key, &failobj))
2033         return NULL;
2034 
2035     value = _BTree_get(self, key, 0, _BGET_ALLOW_TYPE_ERROR);
2036     if (value != NULL)
2037     {
2038         /* Delete key and associated value. */
2039         if (_BTree_set(self, key, NULL, 0, 0) < 0)
2040         {
2041             Py_DECREF(value);
2042             return NULL;;
2043         }
2044         return value;
2045     }
2046 
2047     /* The key isn't in the tree.  If that's not due to a KeyError exception,
2048     * pass back the unexpected exception.
2049     */
2050     if (! BTree_ShouldSuppressKeyError())
2051         return NULL;
2052 
2053     if (failobj != NULL)
2054     {
2055         /* Clear the KeyError and return the explicit default. */
2056         PyErr_Clear();
2057         Py_INCREF(failobj);
2058         return failobj;
2059     }
2060 
2061     /* No default given.  The only difference in this case is the error
2062     * message, which depends on whether the tree is empty.
2063     */
2064     if (BTree_length_or_nonzero(self, 1) == 0) /* tree is empty */
2065         PyErr_SetString(PyExc_KeyError, "pop(): BTree is empty");
2066     return NULL;
2067 }
2068 
2069 
2070 static PyObject*
BTree_popitem(BTree * self,PyObject * args)2071 BTree_popitem(BTree* self, PyObject* args)
2072 {
2073     PyObject* key = NULL;
2074     PyObject* pop_args = NULL;
2075     PyObject* result_val = NULL;
2076     PyObject* result = NULL;
2077 
2078     if (PyTuple_Size(args) != 0) {
2079         PyErr_SetString(PyExc_TypeError, "popitem(): Takes no arguments.");
2080         return NULL;
2081     }
2082 
2083     key = BTree_minKey(self, args); /* reuse existing empty tuple. */
2084     if (!key) {
2085         PyErr_Clear();
2086         PyErr_SetString(PyExc_KeyError, "popitem(): empty BTree.");
2087         return NULL;
2088     }
2089 
2090     pop_args = PyTuple_Pack(1, key);
2091     if (pop_args) {
2092         result_val = BTree_pop(self, pop_args);
2093         Py_DECREF(pop_args);
2094         if (result_val) {
2095             result = PyTuple_Pack(2, key, result_val);
2096             Py_DECREF(result_val);
2097         }
2098     }
2099 
2100     Py_DECREF(key);
2101     return result;
2102 }
2103 
2104 
2105 
2106 /* Search BTree self for key.  This is the sq_contains slot of the
2107  * PySequenceMethods.
2108  *
2109  * Return:
2110  *     -1     error
2111  *      0     not found
2112  *      1     found
2113  */
2114 static int
BTree_contains(BTree * self,PyObject * key)2115 BTree_contains(BTree *self, PyObject *key)
2116 {
2117     PyObject *asobj = _BTree_get(self, key, 1, _BGET_REPLACE_TYPE_ERROR);
2118     int result = -1;
2119 
2120     if (asobj != NULL)
2121     {
2122         result = INT_AS_LONG(asobj) ? 1 : 0;
2123         Py_DECREF(asobj);
2124     }
2125     else if (BTree_ShouldSuppressKeyError())
2126     {
2127         PyErr_Clear();
2128         result = 0;
2129     }
2130     return result;
2131 }
2132 
2133 static PyObject *
BTree_has_key(BTree * self,PyObject * key)2134 BTree_has_key(BTree *self, PyObject *key)
2135 {
2136     int result = -1;
2137     result = BTree_contains(self, key);
2138     if (result == -1) {
2139         return NULL;
2140     }
2141 
2142     if (result)
2143         Py_RETURN_TRUE;
2144     Py_RETURN_FALSE;
2145 }
2146 
2147 
2148 static PyObject *
BTree_addUnique(BTree * self,PyObject * args)2149 BTree_addUnique(BTree *self, PyObject *args)
2150 {
2151     int grew;
2152     PyObject *key, *v;
2153 
2154     UNLESS (PyArg_ParseTuple(args, "OO", &key, &v))
2155         return NULL;
2156 
2157     if ((grew=_BTree_set(self, key, v, 1, 0)) < 0)
2158         return NULL;
2159     return INT_FROM_LONG(grew);
2160 }
2161 
2162 /**************************************************************************/
2163 /* Iterator support. */
2164 
2165 /* A helper to build all the iterators for BTrees and TreeSets.
2166  * If args is NULL, the iterator spans the entire structure.  Else it's an
2167  * argument tuple, with optional low and high arguments.
2168  * kind is 'k', 'v' or 'i'.
2169  * Returns a BTreeIter object, or NULL if error.
2170  */
2171 static PyObject *
buildBTreeIter(BTree * self,PyObject * args,PyObject * kw,char kind)2172 buildBTreeIter(BTree *self, PyObject *args, PyObject *kw, char kind)
2173 {
2174     BTreeIter *result = NULL;
2175     BTreeItems *items = (BTreeItems *)BTree_rangeSearch(self, args, kw, kind);
2176 
2177     if (items)
2178     {
2179         result = BTreeIter_new(items);
2180         Py_DECREF(items);
2181     }
2182     return (PyObject *)result;
2183 }
2184 
2185 /* The implementation of iter(BTree_or_TreeSet); the BTree tp_iter slot. */
2186 static PyObject *
BTree_getiter(BTree * self)2187 BTree_getiter(BTree *self)
2188 {
2189     return buildBTreeIter(self, NULL, NULL, 'k');
2190 }
2191 
2192 /* The implementation of BTree.iterkeys(). */
2193 static PyObject *
BTree_iterkeys(BTree * self,PyObject * args,PyObject * kw)2194 BTree_iterkeys(BTree *self, PyObject *args, PyObject *kw)
2195 {
2196     return buildBTreeIter(self, args, kw, 'k');
2197 }
2198 
2199 /* The implementation of BTree.itervalues(). */
2200 static PyObject *
BTree_itervalues(BTree * self,PyObject * args,PyObject * kw)2201 BTree_itervalues(BTree *self, PyObject *args, PyObject *kw)
2202 {
2203     return buildBTreeIter(self, args, kw, 'v');
2204 }
2205 
2206 /* The implementation of BTree.iteritems(). */
2207 static PyObject *
BTree_iteritems(BTree * self,PyObject * args,PyObject * kw)2208 BTree_iteritems(BTree *self, PyObject *args, PyObject *kw)
2209 {
2210     return buildBTreeIter(self, args, kw, 'i');
2211 }
2212 
2213 /* End of iterator support. */
2214 
2215 
2216 /* Caution:  Even though the _firstbucket attribute is read-only, a program
2217    could do arbitrary damage to the btree internals.  For example, it could
2218    call clear() on a bucket inside a BTree.
2219 
2220    We need to decide if the convenience for inspecting BTrees is worth
2221    the risk.
2222 */
2223 
2224 static struct PyMemberDef BTree_members[] = {
2225     {"_firstbucket", T_OBJECT, offsetof(BTree, firstbucket), READONLY},
2226     {NULL}
2227 };
2228 
2229 static struct PyMethodDef BTree_methods[] = {
2230     {"__getstate__", (PyCFunction) BTree_getstate, METH_NOARGS,
2231      "__getstate__() -> state\n\n"
2232      "Return the picklable state of the BTree."},
2233 
2234     {"__setstate__", (PyCFunction) BTree_setstate, METH_O,
2235      "__setstate__(state)\n\n"
2236      "Set the state of the BTree."},
2237 
2238     {"has_key", (PyCFunction) BTree_has_key, METH_O,
2239      "has_key(key)\n\n"
2240      "Return true if the BTree contains the given key."},
2241 
2242     {"keys", (PyCFunction) BTree_keys, METH_VARARGS | METH_KEYWORDS,
2243      "keys([min, max]) -> list of keys\n\n"
2244      "Returns the keys of the BTree.  If min and max are supplied, only\n"
2245      "keys greater than min and less than max are returned."},
2246 
2247     {"values", (PyCFunction) BTree_values, METH_VARARGS | METH_KEYWORDS,
2248      "values([min, max]) -> list of values\n\n"
2249      "Returns the values of the BTree.  If min and max are supplied, only\n"
2250      "values corresponding to keys greater than min and less than max are\n"
2251      "returned."},
2252 
2253     {"items", (PyCFunction) BTree_items, METH_VARARGS | METH_KEYWORDS,
2254      "items([min, max]) -> -- list of key, value pairs\n\n"
2255      "Returns the items of the BTree.  If min and max are supplied, only\n"
2256      "items with keys greater than min and less than max are returned."},
2257 
2258     {"byValue", (PyCFunction) BTree_byValue, METH_O,
2259      "byValue(min) ->  list of value, key pairs\n\n"
2260      "Returns list of value, key pairs where the value is >= min.  The\n"
2261      "list is sorted by value.  Note that items() returns keys in the\n"
2262      "opposite order."},
2263 
2264     {"get", (PyCFunction) BTree_getm, METH_VARARGS,
2265      "get(key[, default=None]) -> Value for key or default\n\n"
2266      "Return the value or the default if the key is not found."},
2267 
2268     {"setdefault", (PyCFunction) BTree_setdefault, METH_VARARGS,
2269      "D.setdefault(k, d) -> D.get(k, d), also set D[k]=d if k not in D.\n\n"
2270      "Return the value like get() except that if key is missing, d is both\n"
2271      "returned and inserted into the BTree as the value of k."},
2272 
2273     {"pop", (PyCFunction) BTree_pop, METH_VARARGS,
2274      "D.pop(k[, d]) -> v, remove key and return the corresponding value.\n\n"
2275      "If key is not found, d is returned if given, otherwise KeyError\n"
2276      "is raised."},
2277 
2278     {"popitem", (PyCFunction)BTree_popitem, METH_VARARGS,
2279      "D.popitem() -> (k, v), remove and return some (key, value) pair\n"
2280      "as a 2-tuple; but raise KeyError if D is empty."},
2281 
2282     {"maxKey", (PyCFunction) BTree_maxKey, METH_VARARGS,
2283      "maxKey([max]) -> key\n\n"
2284      "Return the largest key in the BTree.  If max is specified, return\n"
2285      "the largest key <= max."},
2286 
2287     {"minKey", (PyCFunction) BTree_minKey, METH_VARARGS,
2288      "minKey([mi]) -> key\n\n"
2289      "Return the smallest key in the BTree.  If min is specified, return\n"
2290      "the smallest key >= min."},
2291 
2292     {"clear", (PyCFunction) BTree_clear, METH_NOARGS,
2293      "clear()\n\nRemove all of the items from the BTree."},
2294 
2295     {"insert", (PyCFunction)BTree_addUnique, METH_VARARGS,
2296      "insert(key, value) -> 0 or 1\n\n"
2297      "Add an item if the key is not already used. Return 1 if the item was\n"
2298      "added, or 0 otherwise."},
2299 
2300     {"update", (PyCFunction) Mapping_update, METH_O,
2301      "update(collection)\n\n Add the items from the given collection."},
2302 
2303     {"iterkeys", (PyCFunction) BTree_iterkeys, METH_VARARGS | METH_KEYWORDS,
2304      "B.iterkeys([min[,max]]) -> an iterator over the keys of B"},
2305 
2306     {"itervalues", (PyCFunction) BTree_itervalues, METH_VARARGS | METH_KEYWORDS,
2307      "B.itervalues([min[,max]]) -> an iterator over the values of B"},
2308 
2309     {"iteritems", (PyCFunction) BTree_iteritems, METH_VARARGS | METH_KEYWORDS,
2310      "B.iteritems([min[,max]]) -> an iterator over the (key, value) "
2311      "items of B"},
2312 
2313     {"_check", (PyCFunction) BTree_check, METH_NOARGS,
2314      "Perform sanity check on BTree, and raise exception if flawed."},
2315 
2316 #ifdef PERSISTENT
2317     {"_p_resolveConflict",
2318      (PyCFunction) BTree__p_resolveConflict, METH_VARARGS,
2319      "_p_resolveConflict() -- Reinitialize from a newly created copy"},
2320 
2321     {"_p_deactivate",
2322      (PyCFunction) BTree__p_deactivate, METH_VARARGS | METH_KEYWORDS,
2323      "_p_deactivate()\n\nReinitialize from a newly created copy."},
2324 #endif
2325     {NULL, NULL}
2326 };
2327 
2328 static int
BTree_init(PyObject * self,PyObject * args,PyObject * kwds)2329 BTree_init(PyObject *self, PyObject *args, PyObject *kwds)
2330 {
2331     PyObject *v = NULL;
2332 
2333     BTREE(self)->max_leaf_size = 0;
2334     BTREE(self)->max_internal_size = 0;
2335 
2336     if (!PyArg_ParseTuple(args, "|O:" MOD_NAME_PREFIX "BTree", &v))
2337         return -1;
2338 
2339     if (v)
2340         return update_from_seq(self, v);
2341     else
2342         return 0;
2343 }
2344 
2345 static void
BTree_dealloc(BTree * self)2346 BTree_dealloc(BTree *self)
2347 {
2348     PyObject_GC_UnTrack((PyObject *)self);
2349     if (self->state != cPersistent_GHOST_STATE) {
2350         _BTree_clear(self);
2351     }
2352     cPersistenceCAPI->pertype->tp_dealloc((PyObject *)self);
2353 }
2354 
2355 static int
BTree_traverse(BTree * self,visitproc visit,void * arg)2356 BTree_traverse(BTree *self, visitproc visit, void *arg)
2357 {
2358     int err = 0;
2359     int i, len;
2360 
2361 #define VISIT(SLOT)                             \
2362   if (SLOT) {                                   \
2363     err = visit((PyObject *)(SLOT), arg);       \
2364     if (err)                                    \
2365       goto Done;                                \
2366   }
2367 
2368     if (Py_TYPE(self) == &BTreeType)
2369         assert(Py_TYPE(self)->tp_dictoffset == 0);
2370 
2371     /* Call our base type's traverse function.  Because BTrees are
2372     * subclasses of Peristent, there must be one.
2373     */
2374     err = cPersistenceCAPI->pertype->tp_traverse((PyObject *)self, visit, arg);
2375     if (err)
2376         goto Done;
2377 
2378     /* If this is registered with the persistence system, cleaning up cycles
2379     * is the database's problem.  It would be horrid to unghostify BTree
2380     * nodes here just to chase pointers every time gc runs.
2381     */
2382     if (self->state == cPersistent_GHOST_STATE)
2383         goto Done;
2384 
2385     len = self->len;
2386 #ifdef KEY_TYPE_IS_PYOBJECT
2387     /* Keys are Python objects so need to be traversed.  Note that the
2388     * key 0 slot is unused and should not be traversed.
2389     */
2390     for (i = 1; i < len; i++)
2391         VISIT(self->data[i].key);
2392 #endif
2393 
2394     /* Children are always pointers, and child 0 is legit. */
2395     for (i = 0; i < len; i++)
2396         VISIT(self->data[i].child);
2397 
2398     VISIT(self->firstbucket);
2399 
2400 Done:
2401     return err;
2402 
2403 #undef VISIT
2404 }
2405 
2406 static int
BTree_tp_clear(BTree * self)2407 BTree_tp_clear(BTree *self)
2408 {
2409     if (self->state != cPersistent_GHOST_STATE)
2410         _BTree_clear(self);
2411     return 0;
2412 }
2413 
2414 /*
2415  * Return the number of elements in a BTree.  nonzero is a Boolean, and
2416  * when true requests just a non-empty/empty result.  Testing for emptiness
2417  * is efficient (constant-time).  Getting the true length takes time
2418  * proportional to the number of leaves (buckets).
2419  *
2420  * Return:
2421  *     When nonzero true:
2422  *          -1  error
2423  *           0  empty
2424  *           1  not empty
2425  *     When nonzero false (possibly expensive!):
2426  *          -1  error
2427  *        >= 0  number of elements.
2428  */
2429 static Py_ssize_t
BTree_length_or_nonzero(BTree * self,int nonzero)2430 BTree_length_or_nonzero(BTree *self, int nonzero)
2431 {
2432     int result;
2433     Bucket *b;
2434     Bucket *next;
2435 
2436     PER_USE_OR_RETURN(self, -1);
2437     b = self->firstbucket;
2438     PER_UNUSE(self);
2439     if (nonzero)
2440         return b != NULL;
2441 
2442     result = 0;
2443     while (b)
2444     {
2445         PER_USE_OR_RETURN(b, -1);
2446         result += b->len;
2447         next = b->next;
2448         PER_UNUSE(b);
2449         b = next;
2450     }
2451     return result;
2452 }
2453 
2454 static Py_ssize_t
BTree_length(BTree * self)2455 BTree_length(BTree *self)
2456 {
2457     return BTree_length_or_nonzero(self, 0);
2458 }
2459 
2460 static PyMappingMethods BTree_as_mapping = {
2461     (lenfunc)BTree_length,                  /* mp_length */
2462     (binaryfunc)BTree_get,                  /* mp_subscript */
2463     (objobjargproc)BTree_setitem,           /* mp_ass_subscript */
2464 };
2465 
2466 static PySequenceMethods BTree_as_sequence = {
2467     (lenfunc)0,                             /* sq_length */
2468     (binaryfunc)0,                          /* sq_concat */
2469     (ssizeargfunc)0,                        /* sq_repeat */
2470     (ssizeargfunc)0,                        /* sq_item */
2471     (ssizessizeargfunc)0,                   /* sq_slice */
2472     (ssizeobjargproc)0,                     /* sq_ass_item */
2473     (ssizessizeobjargproc)0,                /* sq_ass_slice */
2474     (objobjproc)BTree_contains,             /* sq_contains */
2475     0,                                      /* sq_inplace_concat */
2476     0,                                      /* sq_inplace_repeat */
2477 };
2478 
2479 static Py_ssize_t
BTree_nonzero(BTree * self)2480 BTree_nonzero(BTree *self)
2481 {
2482   return BTree_length_or_nonzero(self, 1);
2483 }
2484 
2485 static PyNumberMethods BTree_as_number_for_nonzero = {
2486     0,                                      /* nb_add */
2487     bucket_sub,                             /* nb_subtract */
2488     0,                                      /* nb_multiply */
2489 #ifndef PY3K
2490     0,                                      /* nb_divide */
2491 #endif
2492     0,                                      /* nb_remainder */
2493     0,                                      /* nb_divmod */
2494     0,                                      /* nb_power */
2495     0,                                      /* nb_negative */
2496     0,                                      /* nb_positive */
2497     0,                                      /* nb_absolute */
2498     (inquiry)BTree_nonzero,                 /* nb_nonzero */
2499     (unaryfunc)0,                           /* nb_invert */
2500     (binaryfunc)0,                          /* nb_lshift */
2501     (binaryfunc)0,                          /* nb_rshift */
2502     bucket_and,                             /* nb_and */
2503     (binaryfunc)0,                          /* nb_xor */
2504     bucket_or,                              /* nb_or */
2505 };
2506 
2507 static PyObject* BTreeType_setattro_allowed_names; /* initialized in module */
2508 
2509 static int
BTreeType_setattro(PyTypeObject * type,PyObject * name,PyObject * value)2510 BTreeType_setattro(PyTypeObject* type, PyObject* name, PyObject* value)
2511 {
2512     /*
2513       type.tp_setattro prohibits setting any attributes on a built-in type,
2514       so we need to use our own (metaclass) type to handle it. The set of
2515       allowable values needs to be carefully controlled (e.g., setting methods
2516       would be bad).
2517 
2518       Alternately, we could use heap-allocated types when they are supported
2519       an all the versions we care about, because those do allow setting attributes.
2520     */
2521     int allowed;
2522     allowed = PySequence_Contains(BTreeType_setattro_allowed_names, name);
2523     if (allowed < 0) {
2524         return -1;
2525     }
2526 
2527     if (allowed) {
2528         PyDict_SetItem(type->tp_dict, name, value);
2529         PyType_Modified(type);
2530         if (PyErr_Occurred()) {
2531             return -1;
2532         }
2533         return 0;
2534     }
2535     return PyType_Type.tp_setattro((PyObject*)type, name, value);
2536 }
2537 
2538 static PyTypeObject BTreeTypeType = {
2539     PyVarObject_HEAD_INIT(NULL, 0)
2540     MODULE_NAME MOD_NAME_PREFIX "BTreeType",
2541     0, /* tp_basicsize */
2542     0, /* tp_itemsize */
2543     0, /* tp_dealloc */
2544     0, /* tp_print */
2545     0, /* tp_getattr */
2546     0, /* tp_setattr */
2547     0, /* tp_compare */
2548     0, /* tp_repr */
2549     0, /* tp_as_number */
2550     0, /* tp_as_sequence */
2551     0, /* tp_as_mapping */
2552     0, /* tp_hash */
2553     0, /* tp_call */
2554     0, /* tp_str */
2555     0, /* tp_getattro */
2556     (setattrofunc)BTreeType_setattro, /* tp_setattro */
2557     0, /* tp_as_buffer */
2558 #ifndef PY3K
2559     Py_TPFLAGS_CHECKTYPES |
2560 #endif
2561     Py_TPFLAGS_DEFAULT |
2562     Py_TPFLAGS_BASETYPE, /* tp_flags */
2563     0, /* tp_doc */
2564     0, /* tp_traverse */
2565     0, /* tp_clear */
2566     0, /* tp_richcompare */
2567     0, /* tp_weaklistoffset */
2568     0, /* tp_iter */
2569     0, /* tp_iternext */
2570     0, /* tp_methods */
2571     0, /* tp_members */
2572 };
2573 
2574 static PyTypeObject BTreeType = {
2575     PyVarObject_HEAD_INIT(&BTreeTypeType, 0)
2576     MODULE_NAME MOD_NAME_PREFIX "BTree",    /* tp_name */
2577     sizeof(BTree),                          /* tp_basicsize */
2578     0,                                      /* tp_itemsize */
2579     (destructor)BTree_dealloc,              /* tp_dealloc */
2580     0,                                      /* tp_print */
2581     0,                                      /* tp_getattr */
2582     0,                                      /* tp_setattr */
2583     0,                                      /* tp_compare */
2584     0,                                      /* tp_repr */
2585     &BTree_as_number_for_nonzero,           /* tp_as_number */
2586     &BTree_as_sequence,                     /* tp_as_sequence */
2587     &BTree_as_mapping,                      /* tp_as_mapping */
2588     0,                                      /* tp_hash */
2589     0,                                      /* tp_call */
2590     0,                                      /* tp_str */
2591     0,                                      /* tp_getattro */
2592     0,                                      /* tp_setattro */
2593     0,                                      /* tp_as_buffer */
2594 #ifndef PY3K
2595     Py_TPFLAGS_CHECKTYPES |
2596 #endif
2597     Py_TPFLAGS_DEFAULT |
2598     Py_TPFLAGS_HAVE_GC |
2599     Py_TPFLAGS_BASETYPE,                    /* tp_flags */
2600     0,                                      /* tp_doc */
2601     (traverseproc)BTree_traverse,           /* tp_traverse */
2602     (inquiry)BTree_tp_clear,                /* tp_clear */
2603     0,                                      /* tp_richcompare */
2604     0,                                      /* tp_weaklistoffset */
2605     (getiterfunc)BTree_getiter,             /* tp_iter */
2606     0,                                      /* tp_iternext */
2607     BTree_methods,                          /* tp_methods */
2608     BTree_members,                          /* tp_members */
2609     0,                                      /* tp_getset */
2610     0,                                      /* tp_base */
2611     0,                                      /* tp_dict */
2612     0,                                      /* tp_descr_get */
2613     0,                                      /* tp_descr_set */
2614     0,                                      /* tp_dictoffset */
2615     BTree_init,                             /* tp_init */
2616     0,                                      /* tp_alloc */
2617     0, /*PyType_GenericNew,*/               /* tp_new */
2618 };
2619