1*53b37aa9Sespie /* $OpenBSD: bt_delete.c,v 1.11 2005/08/05 13:02:59 espie Exp $ */
21b727fc6Smillert
3df930be7Sderaadt /*-
4df930be7Sderaadt * Copyright (c) 1990, 1993, 1994
5df930be7Sderaadt * The Regents of the University of California. All rights reserved.
6df930be7Sderaadt *
7df930be7Sderaadt * This code is derived from software contributed to Berkeley by
8df930be7Sderaadt * Mike Olson.
9df930be7Sderaadt *
10df930be7Sderaadt * Redistribution and use in source and binary forms, with or without
11df930be7Sderaadt * modification, are permitted provided that the following conditions
12df930be7Sderaadt * are met:
13df930be7Sderaadt * 1. Redistributions of source code must retain the above copyright
14df930be7Sderaadt * notice, this list of conditions and the following disclaimer.
15df930be7Sderaadt * 2. Redistributions in binary form must reproduce the above copyright
16df930be7Sderaadt * notice, this list of conditions and the following disclaimer in the
17df930be7Sderaadt * documentation and/or other materials provided with the distribution.
186580fee3Smillert * 3. Neither the name of the University nor the names of its contributors
19df930be7Sderaadt * may be used to endorse or promote products derived from this software
20df930be7Sderaadt * without specific prior written permission.
21df930be7Sderaadt *
22df930be7Sderaadt * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df930be7Sderaadt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df930be7Sderaadt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df930be7Sderaadt * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df930be7Sderaadt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df930be7Sderaadt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df930be7Sderaadt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df930be7Sderaadt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df930be7Sderaadt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df930be7Sderaadt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df930be7Sderaadt * SUCH DAMAGE.
33df930be7Sderaadt */
34df930be7Sderaadt
35df930be7Sderaadt #include <sys/types.h>
36df930be7Sderaadt
37df930be7Sderaadt #include <errno.h>
38df930be7Sderaadt #include <stdio.h>
39df930be7Sderaadt #include <string.h>
40df930be7Sderaadt
41df930be7Sderaadt #include <db.h>
42df930be7Sderaadt #include "btree.h"
43df930be7Sderaadt
44c72b5b24Smillert static int __bt_bdelete(BTREE *, const DBT *);
45c72b5b24Smillert static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
46c72b5b24Smillert static int __bt_pdelete(BTREE *, PAGE *);
47c72b5b24Smillert static int __bt_relink(BTREE *, PAGE *);
48c72b5b24Smillert static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
49df930be7Sderaadt
50df930be7Sderaadt /*
51bec2d00aSderaadt * __bt_delete
52bec2d00aSderaadt * Delete the item(s) referenced by a key.
53df930be7Sderaadt *
54bec2d00aSderaadt * Return RET_SPECIAL if the key is not found.
55df930be7Sderaadt */
56df930be7Sderaadt int
__bt_delete(const DB * dbp,const DBT * key,u_int flags)57e20a56a5Sotto __bt_delete(const DB *dbp, const DBT *key, u_int flags)
58df930be7Sderaadt {
59df930be7Sderaadt BTREE *t;
60bec2d00aSderaadt CURSOR *c;
61bec2d00aSderaadt PAGE *h;
62df930be7Sderaadt int status;
63df930be7Sderaadt
64df930be7Sderaadt t = dbp->internal;
65df930be7Sderaadt
66df930be7Sderaadt /* Toss any page pinned across calls. */
67df930be7Sderaadt if (t->bt_pinned != NULL) {
68df930be7Sderaadt mpool_put(t->bt_mp, t->bt_pinned, 0);
69df930be7Sderaadt t->bt_pinned = NULL;
70df930be7Sderaadt }
71df930be7Sderaadt
72bec2d00aSderaadt /* Check for change to a read-only tree. */
73bec2d00aSderaadt if (F_ISSET(t, B_RDONLY)) {
74df930be7Sderaadt errno = EPERM;
75df930be7Sderaadt return (RET_ERROR);
76df930be7Sderaadt }
77df930be7Sderaadt
78df930be7Sderaadt switch (flags) {
79df930be7Sderaadt case 0:
80bec2d00aSderaadt status = __bt_bdelete(t, key);
81df930be7Sderaadt break;
82df930be7Sderaadt case R_CURSOR:
83df930be7Sderaadt /*
84bec2d00aSderaadt * If flags is R_CURSOR, delete the cursor. Must already
85bec2d00aSderaadt * have started a scan and not have already deleted it.
86df930be7Sderaadt */
87bec2d00aSderaadt c = &t->bt_cursor;
88bec2d00aSderaadt if (F_ISSET(c, CURS_INIT)) {
89bec2d00aSderaadt if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
90bec2d00aSderaadt return (RET_SPECIAL);
91bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
92bec2d00aSderaadt return (RET_ERROR);
93bec2d00aSderaadt
94bec2d00aSderaadt /*
95bec2d00aSderaadt * If the page is about to be emptied, we'll need to
96bec2d00aSderaadt * delete it, which means we have to acquire a stack.
97bec2d00aSderaadt */
98bec2d00aSderaadt if (NEXTINDEX(h) == 1)
99bec2d00aSderaadt if (__bt_stkacq(t, &h, &t->bt_cursor))
100bec2d00aSderaadt return (RET_ERROR);
101bec2d00aSderaadt
102bec2d00aSderaadt status = __bt_dleaf(t, NULL, h, c->pg.index);
103bec2d00aSderaadt
104bec2d00aSderaadt if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
105bec2d00aSderaadt if (__bt_pdelete(t, h))
106bec2d00aSderaadt return (RET_ERROR);
107bec2d00aSderaadt } else
108bec2d00aSderaadt mpool_put(t->bt_mp,
109bec2d00aSderaadt h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
110df930be7Sderaadt break;
111bec2d00aSderaadt }
112bec2d00aSderaadt /* FALLTHROUGH */
113df930be7Sderaadt default:
114bec2d00aSderaadt errno = EINVAL;
115df930be7Sderaadt return (RET_ERROR);
116df930be7Sderaadt }
117df930be7Sderaadt if (status == RET_SUCCESS)
118bec2d00aSderaadt F_SET(t, B_MODIFIED);
119df930be7Sderaadt return (status);
120df930be7Sderaadt }
121df930be7Sderaadt
122df930be7Sderaadt /*
123bec2d00aSderaadt * __bt_stkacq --
124bec2d00aSderaadt * Acquire a stack so we can delete a cursor entry.
125df930be7Sderaadt *
126df930be7Sderaadt * Parameters:
127bec2d00aSderaadt * t: tree
128bec2d00aSderaadt * hp: pointer to current, pinned PAGE pointer
129bec2d00aSderaadt * c: pointer to the cursor
130bec2d00aSderaadt *
131bec2d00aSderaadt * Returns:
132bec2d00aSderaadt * 0 on success, 1 on failure
133bec2d00aSderaadt */
134bec2d00aSderaadt static int
__bt_stkacq(BTREE * t,PAGE ** hp,CURSOR * c)135e20a56a5Sotto __bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
136bec2d00aSderaadt {
137bec2d00aSderaadt BINTERNAL *bi;
138bec2d00aSderaadt EPG *e;
139bec2d00aSderaadt EPGNO *parent;
140bec2d00aSderaadt PAGE *h;
1418871b767Smillert indx_t idx;
142bec2d00aSderaadt pgno_t pgno;
143bec2d00aSderaadt recno_t nextpg, prevpg;
144bec2d00aSderaadt int exact, level;
145bec2d00aSderaadt
146bec2d00aSderaadt /*
147bec2d00aSderaadt * Find the first occurrence of the key in the tree. Toss the
148bec2d00aSderaadt * currently locked page so we don't hit an already-locked page.
149bec2d00aSderaadt */
150bec2d00aSderaadt h = *hp;
151bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
152bec2d00aSderaadt if ((e = __bt_search(t, &c->key, &exact)) == NULL)
153bec2d00aSderaadt return (1);
154bec2d00aSderaadt h = e->page;
155bec2d00aSderaadt
156bec2d00aSderaadt /* See if we got it in one shot. */
157bec2d00aSderaadt if (h->pgno == c->pg.pgno)
158bec2d00aSderaadt goto ret;
159bec2d00aSderaadt
160bec2d00aSderaadt /*
161bec2d00aSderaadt * Move right, looking for the page. At each move we have to move
162bec2d00aSderaadt * up the stack until we don't have to move to the next page. If
163bec2d00aSderaadt * we have to change pages at an internal level, we have to fix the
164bec2d00aSderaadt * stack back up.
165bec2d00aSderaadt */
166bec2d00aSderaadt while (h->pgno != c->pg.pgno) {
167bec2d00aSderaadt if ((nextpg = h->nextpg) == P_INVALID)
168bec2d00aSderaadt break;
169bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
170bec2d00aSderaadt
171bec2d00aSderaadt /* Move up the stack. */
172bec2d00aSderaadt for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
173bec2d00aSderaadt /* Get the parent page. */
174bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
175bec2d00aSderaadt return (1);
176bec2d00aSderaadt
177bec2d00aSderaadt /* Move to the next index. */
178bec2d00aSderaadt if (parent->index != NEXTINDEX(h) - 1) {
1798871b767Smillert idx = parent->index + 1;
1808871b767Smillert BT_PUSH(t, h->pgno, idx);
181bec2d00aSderaadt break;
182bec2d00aSderaadt }
183bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
184bec2d00aSderaadt }
185bec2d00aSderaadt
186bec2d00aSderaadt /* Restore the stack. */
187bec2d00aSderaadt while (level--) {
188bec2d00aSderaadt /* Push the next level down onto the stack. */
1898871b767Smillert bi = GETBINTERNAL(h, idx);
190bec2d00aSderaadt pgno = bi->pgno;
191bec2d00aSderaadt BT_PUSH(t, pgno, 0);
192bec2d00aSderaadt
193bec2d00aSderaadt /* Lose the currently pinned page. */
194bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
195bec2d00aSderaadt
196bec2d00aSderaadt /* Get the next level down. */
197bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
198bec2d00aSderaadt return (1);
1998871b767Smillert idx = 0;
200bec2d00aSderaadt }
201bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
202bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
203bec2d00aSderaadt return (1);
204bec2d00aSderaadt }
205bec2d00aSderaadt
206bec2d00aSderaadt if (h->pgno == c->pg.pgno)
207bec2d00aSderaadt goto ret;
208bec2d00aSderaadt
209bec2d00aSderaadt /* Reacquire the original stack. */
210bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
211bec2d00aSderaadt if ((e = __bt_search(t, &c->key, &exact)) == NULL)
212bec2d00aSderaadt return (1);
213bec2d00aSderaadt h = e->page;
214bec2d00aSderaadt
215bec2d00aSderaadt /*
216bec2d00aSderaadt * Move left, looking for the page. At each move we have to move
217bec2d00aSderaadt * up the stack until we don't have to change pages to move to the
218bec2d00aSderaadt * next page. If we have to change pages at an internal level, we
219bec2d00aSderaadt * have to fix the stack back up.
220bec2d00aSderaadt */
221bec2d00aSderaadt while (h->pgno != c->pg.pgno) {
222bec2d00aSderaadt if ((prevpg = h->prevpg) == P_INVALID)
223bec2d00aSderaadt break;
224bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
225bec2d00aSderaadt
226bec2d00aSderaadt /* Move up the stack. */
227bec2d00aSderaadt for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
228bec2d00aSderaadt /* Get the parent page. */
229bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
230bec2d00aSderaadt return (1);
231bec2d00aSderaadt
232bec2d00aSderaadt /* Move to the next index. */
233bec2d00aSderaadt if (parent->index != 0) {
2348871b767Smillert idx = parent->index - 1;
2358871b767Smillert BT_PUSH(t, h->pgno, idx);
236bec2d00aSderaadt break;
237bec2d00aSderaadt }
238bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
239bec2d00aSderaadt }
240bec2d00aSderaadt
241bec2d00aSderaadt /* Restore the stack. */
242bec2d00aSderaadt while (level--) {
243bec2d00aSderaadt /* Push the next level down onto the stack. */
2448871b767Smillert bi = GETBINTERNAL(h, idx);
245bec2d00aSderaadt pgno = bi->pgno;
246bec2d00aSderaadt
247bec2d00aSderaadt /* Lose the currently pinned page. */
248bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
249bec2d00aSderaadt
250bec2d00aSderaadt /* Get the next level down. */
251bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
252bec2d00aSderaadt return (1);
253bec2d00aSderaadt
2548871b767Smillert idx = NEXTINDEX(h) - 1;
2558871b767Smillert BT_PUSH(t, pgno, idx);
256bec2d00aSderaadt }
257bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
258bec2d00aSderaadt if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
259bec2d00aSderaadt return (1);
260bec2d00aSderaadt }
261bec2d00aSderaadt
262bec2d00aSderaadt
263bec2d00aSderaadt ret: mpool_put(t->bt_mp, h, 0);
264bec2d00aSderaadt return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
265bec2d00aSderaadt }
266bec2d00aSderaadt
267bec2d00aSderaadt /*
268bec2d00aSderaadt * __bt_bdelete --
269bec2d00aSderaadt * Delete all key/data pairs matching the specified key.
270bec2d00aSderaadt *
271bec2d00aSderaadt * Parameters:
272bec2d00aSderaadt * t: tree
273df930be7Sderaadt * key: key to delete
274df930be7Sderaadt *
275df930be7Sderaadt * Returns:
276df930be7Sderaadt * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
277df930be7Sderaadt */
278df930be7Sderaadt static int
__bt_bdelete(BTREE * t,const DBT * key)279e20a56a5Sotto __bt_bdelete(BTREE *t, const DBT *key)
280df930be7Sderaadt {
281bec2d00aSderaadt EPG *e;
282df930be7Sderaadt PAGE *h;
283bec2d00aSderaadt int deleted, exact, redo;
284bec2d00aSderaadt
285bec2d00aSderaadt deleted = 0;
286df930be7Sderaadt
287df930be7Sderaadt /* Find any matching record; __bt_search pins the page. */
288bec2d00aSderaadt loop: if ((e = __bt_search(t, key, &exact)) == NULL)
289bec2d00aSderaadt return (deleted ? RET_SUCCESS : RET_ERROR);
290df930be7Sderaadt if (!exact) {
291df930be7Sderaadt mpool_put(t->bt_mp, e->page, 0);
292df930be7Sderaadt return (deleted ? RET_SUCCESS : RET_SPECIAL);
293df930be7Sderaadt }
294df930be7Sderaadt
295df930be7Sderaadt /*
296bec2d00aSderaadt * Delete forward, then delete backward, from the found key. If
297bec2d00aSderaadt * there are duplicates and we reach either side of the page, do
298bec2d00aSderaadt * the key search again, so that we get them all.
299bec2d00aSderaadt */
300bec2d00aSderaadt redo = 0;
301bec2d00aSderaadt h = e->page;
302bec2d00aSderaadt do {
303bec2d00aSderaadt if (__bt_dleaf(t, key, h, e->index)) {
304bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
305bec2d00aSderaadt return (RET_ERROR);
306bec2d00aSderaadt }
307bec2d00aSderaadt if (F_ISSET(t, B_NODUPS)) {
308bec2d00aSderaadt if (NEXTINDEX(h) == 0) {
309bec2d00aSderaadt if (__bt_pdelete(t, h))
310bec2d00aSderaadt return (RET_ERROR);
311bec2d00aSderaadt } else
312bec2d00aSderaadt mpool_put(t->bt_mp, h, MPOOL_DIRTY);
313bec2d00aSderaadt return (RET_SUCCESS);
314bec2d00aSderaadt }
315bec2d00aSderaadt deleted = 1;
316bec2d00aSderaadt } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
317bec2d00aSderaadt
318bec2d00aSderaadt /* Check for right-hand edge of the page. */
319bec2d00aSderaadt if (e->index == NEXTINDEX(h))
320bec2d00aSderaadt redo = 1;
321bec2d00aSderaadt
322bec2d00aSderaadt /* Delete from the key to the beginning of the page. */
323bec2d00aSderaadt while (e->index-- > 0) {
324bec2d00aSderaadt if (__bt_cmp(t, key, e) != 0)
325bec2d00aSderaadt break;
326bec2d00aSderaadt if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
327bec2d00aSderaadt mpool_put(t->bt_mp, h, 0);
328bec2d00aSderaadt return (RET_ERROR);
329bec2d00aSderaadt }
330bec2d00aSderaadt if (e->index == 0)
331bec2d00aSderaadt redo = 1;
332bec2d00aSderaadt }
333bec2d00aSderaadt
334bec2d00aSderaadt /* Check for an empty page. */
335bec2d00aSderaadt if (NEXTINDEX(h) == 0) {
336bec2d00aSderaadt if (__bt_pdelete(t, h))
337bec2d00aSderaadt return (RET_ERROR);
338bec2d00aSderaadt goto loop;
339bec2d00aSderaadt }
340bec2d00aSderaadt
341bec2d00aSderaadt /* Put the page. */
342bec2d00aSderaadt mpool_put(t->bt_mp, h, MPOOL_DIRTY);
343bec2d00aSderaadt
344bec2d00aSderaadt if (redo)
345bec2d00aSderaadt goto loop;
346bec2d00aSderaadt return (RET_SUCCESS);
347bec2d00aSderaadt }
348bec2d00aSderaadt
349bec2d00aSderaadt /*
350bec2d00aSderaadt * __bt_pdelete --
351bec2d00aSderaadt * Delete a single page from the tree.
352df930be7Sderaadt *
353df930be7Sderaadt * Parameters:
354df930be7Sderaadt * t: tree
355bec2d00aSderaadt * h: leaf page
356bec2d00aSderaadt *
357bec2d00aSderaadt * Returns:
358bec2d00aSderaadt * RET_SUCCESS, RET_ERROR.
359bec2d00aSderaadt *
360bec2d00aSderaadt * Side-effects:
361bec2d00aSderaadt * mpool_put's the page
362bec2d00aSderaadt */
363bec2d00aSderaadt static int
__bt_pdelete(BTREE * t,PAGE * h)364e20a56a5Sotto __bt_pdelete(BTREE *t, PAGE *h)
365bec2d00aSderaadt {
366bec2d00aSderaadt BINTERNAL *bi;
367bec2d00aSderaadt PAGE *pg;
368bec2d00aSderaadt EPGNO *parent;
3698871b767Smillert indx_t cnt, idx, *ip, offset;
370bec2d00aSderaadt u_int32_t nksize;
371bec2d00aSderaadt char *from;
372bec2d00aSderaadt
373bec2d00aSderaadt /*
374bec2d00aSderaadt * Walk the parent page stack -- a LIFO stack of the pages that were
375bec2d00aSderaadt * traversed when we searched for the page where the delete occurred.
376bec2d00aSderaadt * Each stack entry is a page number and a page index offset. The
377bec2d00aSderaadt * offset is for the page traversed on the search. We've just deleted
378bec2d00aSderaadt * a page, so we have to delete the key from the parent page.
379bec2d00aSderaadt *
380bec2d00aSderaadt * If the delete from the parent page makes it empty, this process may
381bec2d00aSderaadt * continue all the way up the tree. We stop if we reach the root page
382bec2d00aSderaadt * (which is never deleted, it's just not worth the effort) or if the
383bec2d00aSderaadt * delete does not empty the page.
384bec2d00aSderaadt */
385bec2d00aSderaadt while ((parent = BT_POP(t)) != NULL) {
386bec2d00aSderaadt /* Get the parent page. */
387bec2d00aSderaadt if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
388bec2d00aSderaadt return (RET_ERROR);
389bec2d00aSderaadt
3908871b767Smillert idx = parent->index;
3918871b767Smillert bi = GETBINTERNAL(pg, idx);
392bec2d00aSderaadt
393bec2d00aSderaadt /* Free any overflow pages. */
394bec2d00aSderaadt if (bi->flags & P_BIGKEY &&
395bec2d00aSderaadt __ovfl_delete(t, bi->bytes) == RET_ERROR) {
396bec2d00aSderaadt mpool_put(t->bt_mp, pg, 0);
397bec2d00aSderaadt return (RET_ERROR);
398bec2d00aSderaadt }
399bec2d00aSderaadt
400bec2d00aSderaadt /*
401bec2d00aSderaadt * Free the parent if it has only the one key and it's not the
402bec2d00aSderaadt * root page. If it's the rootpage, turn it back into an empty
403bec2d00aSderaadt * leaf page.
404bec2d00aSderaadt */
405dfa5a4f6Smillert if (NEXTINDEX(pg) == 1) {
406bec2d00aSderaadt if (pg->pgno == P_ROOT) {
407bec2d00aSderaadt pg->lower = BTDATAOFF;
408bec2d00aSderaadt pg->upper = t->bt_psize;
409bec2d00aSderaadt pg->flags = P_BLEAF;
410bec2d00aSderaadt } else {
411bec2d00aSderaadt if (__bt_relink(t, pg) || __bt_free(t, pg))
412bec2d00aSderaadt return (RET_ERROR);
413bec2d00aSderaadt continue;
414bec2d00aSderaadt }
415dfa5a4f6Smillert } else {
416bec2d00aSderaadt /* Pack remaining key items at the end of the page. */
417bec2d00aSderaadt nksize = NBINTERNAL(bi->ksize);
418bec2d00aSderaadt from = (char *)pg + pg->upper;
419bec2d00aSderaadt memmove(from + nksize, from, (char *)bi - from);
420bec2d00aSderaadt pg->upper += nksize;
421bec2d00aSderaadt
422bec2d00aSderaadt /* Adjust indices' offsets, shift the indices down. */
4238871b767Smillert offset = pg->linp[idx];
4248871b767Smillert for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
425bec2d00aSderaadt if (ip[0] < offset)
426bec2d00aSderaadt ip[0] += nksize;
4278871b767Smillert for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
428bec2d00aSderaadt ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
429bec2d00aSderaadt pg->lower -= sizeof(indx_t);
430bec2d00aSderaadt }
431bec2d00aSderaadt
432bec2d00aSderaadt mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
433bec2d00aSderaadt break;
434bec2d00aSderaadt }
435bec2d00aSderaadt
436bec2d00aSderaadt /* Free the leaf page, as long as it wasn't the root. */
437bec2d00aSderaadt if (h->pgno == P_ROOT) {
438bec2d00aSderaadt mpool_put(t->bt_mp, h, MPOOL_DIRTY);
439bec2d00aSderaadt return (RET_SUCCESS);
440bec2d00aSderaadt }
441bec2d00aSderaadt return (__bt_relink(t, h) || __bt_free(t, h));
442bec2d00aSderaadt }
443bec2d00aSderaadt
444bec2d00aSderaadt /*
445bec2d00aSderaadt * __bt_dleaf --
446bec2d00aSderaadt * Delete a single record from a leaf page.
447bec2d00aSderaadt *
448bec2d00aSderaadt * Parameters:
449bec2d00aSderaadt * t: tree
450bec2d00aSderaadt * key: referenced key
451bec2d00aSderaadt * h: page
4528871b767Smillert * idx: index on page to delete
453df930be7Sderaadt *
454df930be7Sderaadt * Returns:
455df930be7Sderaadt * RET_SUCCESS, RET_ERROR.
456df930be7Sderaadt */
457df930be7Sderaadt int
__bt_dleaf(BTREE * t,const DBT * key,PAGE * h,u_int idx)458e20a56a5Sotto __bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
459df930be7Sderaadt {
460bec2d00aSderaadt BLEAF *bl;
461bec2d00aSderaadt indx_t cnt, *ip, offset;
462bec2d00aSderaadt u_int32_t nbytes;
463df930be7Sderaadt void *to;
464bec2d00aSderaadt char *from;
465df930be7Sderaadt
466bec2d00aSderaadt /* If this record is referenced by the cursor, delete the cursor. */
467bec2d00aSderaadt if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
468bec2d00aSderaadt !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
4698871b767Smillert t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
4708871b767Smillert __bt_curdel(t, key, h, idx))
471bec2d00aSderaadt return (RET_ERROR);
472bec2d00aSderaadt
473bec2d00aSderaadt /* If the entry uses overflow pages, make them available for reuse. */
4748871b767Smillert to = bl = GETBLEAF(h, idx);
475df930be7Sderaadt if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
476df930be7Sderaadt return (RET_ERROR);
477df930be7Sderaadt if (bl->flags & P_BIGDATA &&
478df930be7Sderaadt __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
479df930be7Sderaadt return (RET_ERROR);
480df930be7Sderaadt
481bec2d00aSderaadt /* Pack the remaining key/data items at the end of the page. */
482bec2d00aSderaadt nbytes = NBLEAF(bl);
483df930be7Sderaadt from = (char *)h + h->upper;
484df930be7Sderaadt memmove(from + nbytes, from, (char *)to - from);
485df930be7Sderaadt h->upper += nbytes;
486df930be7Sderaadt
487bec2d00aSderaadt /* Adjust the indices' offsets, shift the indices down. */
4888871b767Smillert offset = h->linp[idx];
4898871b767Smillert for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
490df930be7Sderaadt if (ip[0] < offset)
491df930be7Sderaadt ip[0] += nbytes;
4928871b767Smillert for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
493df930be7Sderaadt ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
494df930be7Sderaadt h->lower -= sizeof(indx_t);
495bec2d00aSderaadt
496bec2d00aSderaadt /* If the cursor is on this page, adjust it as necessary. */
497bec2d00aSderaadt if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
498bec2d00aSderaadt !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
4998871b767Smillert t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
500bec2d00aSderaadt --t->bt_cursor.pg.index;
501bec2d00aSderaadt
502df930be7Sderaadt return (RET_SUCCESS);
503df930be7Sderaadt }
504bec2d00aSderaadt
505bec2d00aSderaadt /*
506bec2d00aSderaadt * __bt_curdel --
507bec2d00aSderaadt * Delete the cursor.
508bec2d00aSderaadt *
509bec2d00aSderaadt * Parameters:
510bec2d00aSderaadt * t: tree
511bec2d00aSderaadt * key: referenced key (or NULL)
512bec2d00aSderaadt * h: page
5138871b767Smillert * idx: index on page to delete
514bec2d00aSderaadt *
515bec2d00aSderaadt * Returns:
516bec2d00aSderaadt * RET_SUCCESS, RET_ERROR.
517bec2d00aSderaadt */
518bec2d00aSderaadt static int
__bt_curdel(BTREE * t,const DBT * key,PAGE * h,u_int idx)519e20a56a5Sotto __bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
520bec2d00aSderaadt {
521bec2d00aSderaadt CURSOR *c;
522bec2d00aSderaadt EPG e;
523bec2d00aSderaadt PAGE *pg;
524bec2d00aSderaadt int curcopy, status;
525bec2d00aSderaadt
526bec2d00aSderaadt /*
527bec2d00aSderaadt * If there are duplicates, move forward or backward to one.
528bec2d00aSderaadt * Otherwise, copy the key into the cursor area.
529bec2d00aSderaadt */
530bec2d00aSderaadt c = &t->bt_cursor;
531bec2d00aSderaadt F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
532bec2d00aSderaadt
533bec2d00aSderaadt curcopy = 0;
534bec2d00aSderaadt if (!F_ISSET(t, B_NODUPS)) {
535bec2d00aSderaadt /*
536bec2d00aSderaadt * We're going to have to do comparisons. If we weren't
537bec2d00aSderaadt * provided a copy of the key, i.e. the user is deleting
538bec2d00aSderaadt * the current cursor position, get one.
539bec2d00aSderaadt */
540bec2d00aSderaadt if (key == NULL) {
541bec2d00aSderaadt e.page = h;
5428871b767Smillert e.index = idx;
543bec2d00aSderaadt if ((status = __bt_ret(t, &e,
544bec2d00aSderaadt &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
545bec2d00aSderaadt return (status);
546bec2d00aSderaadt curcopy = 1;
547bec2d00aSderaadt key = &c->key;
548bec2d00aSderaadt }
549bec2d00aSderaadt /* Check previous key, if not at the beginning of the page. */
5508871b767Smillert if (idx > 0) {
551bec2d00aSderaadt e.page = h;
5528871b767Smillert e.index = idx - 1;
553bec2d00aSderaadt if (__bt_cmp(t, key, &e) == 0) {
554bec2d00aSderaadt F_SET(c, CURS_BEFORE);
555bec2d00aSderaadt goto dup2;
556bec2d00aSderaadt }
557bec2d00aSderaadt }
558bec2d00aSderaadt /* Check next key, if not at the end of the page. */
5598871b767Smillert if (idx < NEXTINDEX(h) - 1) {
560bec2d00aSderaadt e.page = h;
5618871b767Smillert e.index = idx + 1;
562bec2d00aSderaadt if (__bt_cmp(t, key, &e) == 0) {
563bec2d00aSderaadt F_SET(c, CURS_AFTER);
564bec2d00aSderaadt goto dup2;
565bec2d00aSderaadt }
566bec2d00aSderaadt }
567bec2d00aSderaadt /* Check previous key if at the beginning of the page. */
5688871b767Smillert if (idx == 0 && h->prevpg != P_INVALID) {
569bec2d00aSderaadt if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
570bec2d00aSderaadt return (RET_ERROR);
571bec2d00aSderaadt e.page = pg;
572bec2d00aSderaadt e.index = NEXTINDEX(pg) - 1;
573bec2d00aSderaadt if (__bt_cmp(t, key, &e) == 0) {
574bec2d00aSderaadt F_SET(c, CURS_BEFORE);
575bec2d00aSderaadt goto dup1;
576bec2d00aSderaadt }
577bec2d00aSderaadt mpool_put(t->bt_mp, pg, 0);
578bec2d00aSderaadt }
579bec2d00aSderaadt /* Check next key if at the end of the page. */
5808871b767Smillert if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
581bec2d00aSderaadt if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
582bec2d00aSderaadt return (RET_ERROR);
583bec2d00aSderaadt e.page = pg;
584bec2d00aSderaadt e.index = 0;
585bec2d00aSderaadt if (__bt_cmp(t, key, &e) == 0) {
586bec2d00aSderaadt F_SET(c, CURS_AFTER);
587bec2d00aSderaadt dup1: mpool_put(t->bt_mp, pg, 0);
588bec2d00aSderaadt dup2: c->pg.pgno = e.page->pgno;
589bec2d00aSderaadt c->pg.index = e.index;
590bec2d00aSderaadt return (RET_SUCCESS);
591bec2d00aSderaadt }
592bec2d00aSderaadt mpool_put(t->bt_mp, pg, 0);
593bec2d00aSderaadt }
594bec2d00aSderaadt }
595bec2d00aSderaadt e.page = h;
5968871b767Smillert e.index = idx;
597bec2d00aSderaadt if (curcopy || (status =
598bec2d00aSderaadt __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
599bec2d00aSderaadt F_SET(c, CURS_ACQUIRE);
600bec2d00aSderaadt return (RET_SUCCESS);
601bec2d00aSderaadt }
602bec2d00aSderaadt return (status);
603bec2d00aSderaadt }
604bec2d00aSderaadt
605bec2d00aSderaadt /*
606bec2d00aSderaadt * __bt_relink --
607bec2d00aSderaadt * Link around a deleted page.
608bec2d00aSderaadt *
609bec2d00aSderaadt * Parameters:
610bec2d00aSderaadt * t: tree
611bec2d00aSderaadt * h: page to be deleted
612bec2d00aSderaadt */
613bec2d00aSderaadt static int
__bt_relink(BTREE * t,PAGE * h)614e20a56a5Sotto __bt_relink(BTREE *t, PAGE *h)
615bec2d00aSderaadt {
616bec2d00aSderaadt PAGE *pg;
617bec2d00aSderaadt
618bec2d00aSderaadt if (h->nextpg != P_INVALID) {
619bec2d00aSderaadt if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
620bec2d00aSderaadt return (RET_ERROR);
621bec2d00aSderaadt pg->prevpg = h->prevpg;
622bec2d00aSderaadt mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
623bec2d00aSderaadt }
624bec2d00aSderaadt if (h->prevpg != P_INVALID) {
625bec2d00aSderaadt if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
626bec2d00aSderaadt return (RET_ERROR);
627bec2d00aSderaadt pg->nextpg = h->nextpg;
628bec2d00aSderaadt mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
629bec2d00aSderaadt }
630bec2d00aSderaadt return (0);
631bec2d00aSderaadt }
632