xref: /openbsd/lib/libc/db/btree/bt_delete.c (revision 53b37aa9)
1*53b37aa9Sespie /*	$OpenBSD: bt_delete.c,v 1.11 2005/08/05 13:02:59 espie Exp $	*/
21b727fc6Smillert 
3df930be7Sderaadt /*-
4df930be7Sderaadt  * Copyright (c) 1990, 1993, 1994
5df930be7Sderaadt  *	The Regents of the University of California.  All rights reserved.
6df930be7Sderaadt  *
7df930be7Sderaadt  * This code is derived from software contributed to Berkeley by
8df930be7Sderaadt  * Mike Olson.
9df930be7Sderaadt  *
10df930be7Sderaadt  * Redistribution and use in source and binary forms, with or without
11df930be7Sderaadt  * modification, are permitted provided that the following conditions
12df930be7Sderaadt  * are met:
13df930be7Sderaadt  * 1. Redistributions of source code must retain the above copyright
14df930be7Sderaadt  *    notice, this list of conditions and the following disclaimer.
15df930be7Sderaadt  * 2. Redistributions in binary form must reproduce the above copyright
16df930be7Sderaadt  *    notice, this list of conditions and the following disclaimer in the
17df930be7Sderaadt  *    documentation and/or other materials provided with the distribution.
186580fee3Smillert  * 3. Neither the name of the University nor the names of its contributors
19df930be7Sderaadt  *    may be used to endorse or promote products derived from this software
20df930be7Sderaadt  *    without specific prior written permission.
21df930be7Sderaadt  *
22df930be7Sderaadt  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df930be7Sderaadt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df930be7Sderaadt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df930be7Sderaadt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df930be7Sderaadt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df930be7Sderaadt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df930be7Sderaadt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df930be7Sderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df930be7Sderaadt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df930be7Sderaadt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df930be7Sderaadt  * SUCH DAMAGE.
33df930be7Sderaadt  */
34df930be7Sderaadt 
35df930be7Sderaadt #include <sys/types.h>
36df930be7Sderaadt 
37df930be7Sderaadt #include <errno.h>
38df930be7Sderaadt #include <stdio.h>
39df930be7Sderaadt #include <string.h>
40df930be7Sderaadt 
41df930be7Sderaadt #include <db.h>
42df930be7Sderaadt #include "btree.h"
43df930be7Sderaadt 
44c72b5b24Smillert static int __bt_bdelete(BTREE *, const DBT *);
45c72b5b24Smillert static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
46c72b5b24Smillert static int __bt_pdelete(BTREE *, PAGE *);
47c72b5b24Smillert static int __bt_relink(BTREE *, PAGE *);
48c72b5b24Smillert static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
49df930be7Sderaadt 
50df930be7Sderaadt /*
51bec2d00aSderaadt  * __bt_delete
52bec2d00aSderaadt  *	Delete the item(s) referenced by a key.
53df930be7Sderaadt  *
54bec2d00aSderaadt  * Return RET_SPECIAL if the key is not found.
55df930be7Sderaadt  */
56df930be7Sderaadt int
__bt_delete(const DB * dbp,const DBT * key,u_int flags)57e20a56a5Sotto __bt_delete(const DB *dbp, const DBT *key, u_int flags)
58df930be7Sderaadt {
59df930be7Sderaadt 	BTREE *t;
60bec2d00aSderaadt 	CURSOR *c;
61bec2d00aSderaadt 	PAGE *h;
62df930be7Sderaadt 	int status;
63df930be7Sderaadt 
64df930be7Sderaadt 	t = dbp->internal;
65df930be7Sderaadt 
66df930be7Sderaadt 	/* Toss any page pinned across calls. */
67df930be7Sderaadt 	if (t->bt_pinned != NULL) {
68df930be7Sderaadt 		mpool_put(t->bt_mp, t->bt_pinned, 0);
69df930be7Sderaadt 		t->bt_pinned = NULL;
70df930be7Sderaadt 	}
71df930be7Sderaadt 
72bec2d00aSderaadt 	/* Check for change to a read-only tree. */
73bec2d00aSderaadt 	if (F_ISSET(t, B_RDONLY)) {
74df930be7Sderaadt 		errno = EPERM;
75df930be7Sderaadt 		return (RET_ERROR);
76df930be7Sderaadt 	}
77df930be7Sderaadt 
78df930be7Sderaadt 	switch (flags) {
79df930be7Sderaadt 	case 0:
80bec2d00aSderaadt 		status = __bt_bdelete(t, key);
81df930be7Sderaadt 		break;
82df930be7Sderaadt 	case R_CURSOR:
83df930be7Sderaadt 		/*
84bec2d00aSderaadt 		 * If flags is R_CURSOR, delete the cursor.  Must already
85bec2d00aSderaadt 		 * have started a scan and not have already deleted it.
86df930be7Sderaadt 		 */
87bec2d00aSderaadt 		c = &t->bt_cursor;
88bec2d00aSderaadt 		if (F_ISSET(c, CURS_INIT)) {
89bec2d00aSderaadt 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
90bec2d00aSderaadt 				return (RET_SPECIAL);
91bec2d00aSderaadt 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
92bec2d00aSderaadt 				return (RET_ERROR);
93bec2d00aSderaadt 
94bec2d00aSderaadt 			/*
95bec2d00aSderaadt 			 * If the page is about to be emptied, we'll need to
96bec2d00aSderaadt 			 * delete it, which means we have to acquire a stack.
97bec2d00aSderaadt 			 */
98bec2d00aSderaadt 			if (NEXTINDEX(h) == 1)
99bec2d00aSderaadt 				if (__bt_stkacq(t, &h, &t->bt_cursor))
100bec2d00aSderaadt 					return (RET_ERROR);
101bec2d00aSderaadt 
102bec2d00aSderaadt 			status = __bt_dleaf(t, NULL, h, c->pg.index);
103bec2d00aSderaadt 
104bec2d00aSderaadt 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
105bec2d00aSderaadt 				if (__bt_pdelete(t, h))
106bec2d00aSderaadt 					return (RET_ERROR);
107bec2d00aSderaadt 			} else
108bec2d00aSderaadt 				mpool_put(t->bt_mp,
109bec2d00aSderaadt 				    h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
110df930be7Sderaadt 			break;
111bec2d00aSderaadt 		}
112bec2d00aSderaadt 		/* FALLTHROUGH */
113df930be7Sderaadt 	default:
114bec2d00aSderaadt 		errno = EINVAL;
115df930be7Sderaadt 		return (RET_ERROR);
116df930be7Sderaadt 	}
117df930be7Sderaadt 	if (status == RET_SUCCESS)
118bec2d00aSderaadt 		F_SET(t, B_MODIFIED);
119df930be7Sderaadt 	return (status);
120df930be7Sderaadt }
121df930be7Sderaadt 
122df930be7Sderaadt /*
123bec2d00aSderaadt  * __bt_stkacq --
124bec2d00aSderaadt  *	Acquire a stack so we can delete a cursor entry.
125df930be7Sderaadt  *
126df930be7Sderaadt  * Parameters:
127bec2d00aSderaadt  *	  t:	tree
128bec2d00aSderaadt  *	 hp:	pointer to current, pinned PAGE pointer
129bec2d00aSderaadt  *	  c:	pointer to the cursor
130bec2d00aSderaadt  *
131bec2d00aSderaadt  * Returns:
132bec2d00aSderaadt  *	0 on success, 1 on failure
133bec2d00aSderaadt  */
134bec2d00aSderaadt static int
__bt_stkacq(BTREE * t,PAGE ** hp,CURSOR * c)135e20a56a5Sotto __bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
136bec2d00aSderaadt {
137bec2d00aSderaadt 	BINTERNAL *bi;
138bec2d00aSderaadt 	EPG *e;
139bec2d00aSderaadt 	EPGNO *parent;
140bec2d00aSderaadt 	PAGE *h;
1418871b767Smillert 	indx_t idx;
142bec2d00aSderaadt 	pgno_t pgno;
143bec2d00aSderaadt 	recno_t nextpg, prevpg;
144bec2d00aSderaadt 	int exact, level;
145bec2d00aSderaadt 
146bec2d00aSderaadt 	/*
147bec2d00aSderaadt 	 * Find the first occurrence of the key in the tree.  Toss the
148bec2d00aSderaadt 	 * currently locked page so we don't hit an already-locked page.
149bec2d00aSderaadt 	 */
150bec2d00aSderaadt 	h = *hp;
151bec2d00aSderaadt 	mpool_put(t->bt_mp, h, 0);
152bec2d00aSderaadt 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
153bec2d00aSderaadt 		return (1);
154bec2d00aSderaadt 	h = e->page;
155bec2d00aSderaadt 
156bec2d00aSderaadt 	/* See if we got it in one shot. */
157bec2d00aSderaadt 	if (h->pgno == c->pg.pgno)
158bec2d00aSderaadt 		goto ret;
159bec2d00aSderaadt 
160bec2d00aSderaadt 	/*
161bec2d00aSderaadt 	 * Move right, looking for the page.  At each move we have to move
162bec2d00aSderaadt 	 * up the stack until we don't have to move to the next page.  If
163bec2d00aSderaadt 	 * we have to change pages at an internal level, we have to fix the
164bec2d00aSderaadt 	 * stack back up.
165bec2d00aSderaadt 	 */
166bec2d00aSderaadt 	while (h->pgno != c->pg.pgno) {
167bec2d00aSderaadt 		if ((nextpg = h->nextpg) == P_INVALID)
168bec2d00aSderaadt 			break;
169bec2d00aSderaadt 		mpool_put(t->bt_mp, h, 0);
170bec2d00aSderaadt 
171bec2d00aSderaadt 		/* Move up the stack. */
172bec2d00aSderaadt 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
173bec2d00aSderaadt 			/* Get the parent page. */
174bec2d00aSderaadt 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
175bec2d00aSderaadt 				return (1);
176bec2d00aSderaadt 
177bec2d00aSderaadt 			/* Move to the next index. */
178bec2d00aSderaadt 			if (parent->index != NEXTINDEX(h) - 1) {
1798871b767Smillert 				idx = parent->index + 1;
1808871b767Smillert 				BT_PUSH(t, h->pgno, idx);
181bec2d00aSderaadt 				break;
182bec2d00aSderaadt 			}
183bec2d00aSderaadt 			mpool_put(t->bt_mp, h, 0);
184bec2d00aSderaadt 		}
185bec2d00aSderaadt 
186bec2d00aSderaadt 		/* Restore the stack. */
187bec2d00aSderaadt 		while (level--) {
188bec2d00aSderaadt 			/* Push the next level down onto the stack. */
1898871b767Smillert 			bi = GETBINTERNAL(h, idx);
190bec2d00aSderaadt 			pgno = bi->pgno;
191bec2d00aSderaadt 			BT_PUSH(t, pgno, 0);
192bec2d00aSderaadt 
193bec2d00aSderaadt 			/* Lose the currently pinned page. */
194bec2d00aSderaadt 			mpool_put(t->bt_mp, h, 0);
195bec2d00aSderaadt 
196bec2d00aSderaadt 			/* Get the next level down. */
197bec2d00aSderaadt 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
198bec2d00aSderaadt 				return (1);
1998871b767Smillert 			idx = 0;
200bec2d00aSderaadt 		}
201bec2d00aSderaadt 		mpool_put(t->bt_mp, h, 0);
202bec2d00aSderaadt 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
203bec2d00aSderaadt 			return (1);
204bec2d00aSderaadt 	}
205bec2d00aSderaadt 
206bec2d00aSderaadt 	if (h->pgno == c->pg.pgno)
207bec2d00aSderaadt 		goto ret;
208bec2d00aSderaadt 
209bec2d00aSderaadt 	/* Reacquire the original stack. */
210bec2d00aSderaadt 	mpool_put(t->bt_mp, h, 0);
211bec2d00aSderaadt 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
212bec2d00aSderaadt 		return (1);
213bec2d00aSderaadt 	h = e->page;
214bec2d00aSderaadt 
215bec2d00aSderaadt 	/*
216bec2d00aSderaadt 	 * Move left, looking for the page.  At each move we have to move
217bec2d00aSderaadt 	 * up the stack until we don't have to change pages to move to the
218bec2d00aSderaadt 	 * next page.  If we have to change pages at an internal level, we
219bec2d00aSderaadt 	 * have to fix the stack back up.
220bec2d00aSderaadt 	 */
221bec2d00aSderaadt 	while (h->pgno != c->pg.pgno) {
222bec2d00aSderaadt 		if ((prevpg = h->prevpg) == P_INVALID)
223bec2d00aSderaadt 			break;
224bec2d00aSderaadt 		mpool_put(t->bt_mp, h, 0);
225bec2d00aSderaadt 
226bec2d00aSderaadt 		/* Move up the stack. */
227bec2d00aSderaadt 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
228bec2d00aSderaadt 			/* Get the parent page. */
229bec2d00aSderaadt 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
230bec2d00aSderaadt 				return (1);
231bec2d00aSderaadt 
232bec2d00aSderaadt 			/* Move to the next index. */
233bec2d00aSderaadt 			if (parent->index != 0) {
2348871b767Smillert 				idx = parent->index - 1;
2358871b767Smillert 				BT_PUSH(t, h->pgno, idx);
236bec2d00aSderaadt 				break;
237bec2d00aSderaadt 			}
238bec2d00aSderaadt 			mpool_put(t->bt_mp, h, 0);
239bec2d00aSderaadt 		}
240bec2d00aSderaadt 
241bec2d00aSderaadt 		/* Restore the stack. */
242bec2d00aSderaadt 		while (level--) {
243bec2d00aSderaadt 			/* Push the next level down onto the stack. */
2448871b767Smillert 			bi = GETBINTERNAL(h, idx);
245bec2d00aSderaadt 			pgno = bi->pgno;
246bec2d00aSderaadt 
247bec2d00aSderaadt 			/* Lose the currently pinned page. */
248bec2d00aSderaadt 			mpool_put(t->bt_mp, h, 0);
249bec2d00aSderaadt 
250bec2d00aSderaadt 			/* Get the next level down. */
251bec2d00aSderaadt 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
252bec2d00aSderaadt 				return (1);
253bec2d00aSderaadt 
2548871b767Smillert 			idx = NEXTINDEX(h) - 1;
2558871b767Smillert 			BT_PUSH(t, pgno, idx);
256bec2d00aSderaadt 		}
257bec2d00aSderaadt 		mpool_put(t->bt_mp, h, 0);
258bec2d00aSderaadt 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
259bec2d00aSderaadt 			return (1);
260bec2d00aSderaadt 	}
261bec2d00aSderaadt 
262bec2d00aSderaadt 
263bec2d00aSderaadt ret:	mpool_put(t->bt_mp, h, 0);
264bec2d00aSderaadt 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
265bec2d00aSderaadt }
266bec2d00aSderaadt 
267bec2d00aSderaadt /*
268bec2d00aSderaadt  * __bt_bdelete --
269bec2d00aSderaadt  *	Delete all key/data pairs matching the specified key.
270bec2d00aSderaadt  *
271bec2d00aSderaadt  * Parameters:
272bec2d00aSderaadt  *	  t:	tree
273df930be7Sderaadt  *	key:	key to delete
274df930be7Sderaadt  *
275df930be7Sderaadt  * Returns:
276df930be7Sderaadt  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
277df930be7Sderaadt  */
278df930be7Sderaadt static int
__bt_bdelete(BTREE * t,const DBT * key)279e20a56a5Sotto __bt_bdelete(BTREE *t, const DBT *key)
280df930be7Sderaadt {
281bec2d00aSderaadt 	EPG *e;
282df930be7Sderaadt 	PAGE *h;
283bec2d00aSderaadt 	int deleted, exact, redo;
284bec2d00aSderaadt 
285bec2d00aSderaadt 	deleted = 0;
286df930be7Sderaadt 
287df930be7Sderaadt 	/* Find any matching record; __bt_search pins the page. */
288bec2d00aSderaadt loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
289bec2d00aSderaadt 		return (deleted ? RET_SUCCESS : RET_ERROR);
290df930be7Sderaadt 	if (!exact) {
291df930be7Sderaadt 		mpool_put(t->bt_mp, e->page, 0);
292df930be7Sderaadt 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
293df930be7Sderaadt 	}
294df930be7Sderaadt 
295df930be7Sderaadt 	/*
296bec2d00aSderaadt 	 * Delete forward, then delete backward, from the found key.  If
297bec2d00aSderaadt 	 * there are duplicates and we reach either side of the page, do
298bec2d00aSderaadt 	 * the key search again, so that we get them all.
299bec2d00aSderaadt 	 */
300bec2d00aSderaadt 	redo = 0;
301bec2d00aSderaadt 	h = e->page;
302bec2d00aSderaadt 	do {
303bec2d00aSderaadt 		if (__bt_dleaf(t, key, h, e->index)) {
304bec2d00aSderaadt 			mpool_put(t->bt_mp, h, 0);
305bec2d00aSderaadt 			return (RET_ERROR);
306bec2d00aSderaadt 		}
307bec2d00aSderaadt 		if (F_ISSET(t, B_NODUPS)) {
308bec2d00aSderaadt 			if (NEXTINDEX(h) == 0) {
309bec2d00aSderaadt 				if (__bt_pdelete(t, h))
310bec2d00aSderaadt 					return (RET_ERROR);
311bec2d00aSderaadt 			} else
312bec2d00aSderaadt 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
313bec2d00aSderaadt 			return (RET_SUCCESS);
314bec2d00aSderaadt 		}
315bec2d00aSderaadt 		deleted = 1;
316bec2d00aSderaadt 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
317bec2d00aSderaadt 
318bec2d00aSderaadt 	/* Check for right-hand edge of the page. */
319bec2d00aSderaadt 	if (e->index == NEXTINDEX(h))
320bec2d00aSderaadt 		redo = 1;
321bec2d00aSderaadt 
322bec2d00aSderaadt 	/* Delete from the key to the beginning of the page. */
323bec2d00aSderaadt 	while (e->index-- > 0) {
324bec2d00aSderaadt 		if (__bt_cmp(t, key, e) != 0)
325bec2d00aSderaadt 			break;
326bec2d00aSderaadt 		if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
327bec2d00aSderaadt 			mpool_put(t->bt_mp, h, 0);
328bec2d00aSderaadt 			return (RET_ERROR);
329bec2d00aSderaadt 		}
330bec2d00aSderaadt 		if (e->index == 0)
331bec2d00aSderaadt 			redo = 1;
332bec2d00aSderaadt 	}
333bec2d00aSderaadt 
334bec2d00aSderaadt 	/* Check for an empty page. */
335bec2d00aSderaadt 	if (NEXTINDEX(h) == 0) {
336bec2d00aSderaadt 		if (__bt_pdelete(t, h))
337bec2d00aSderaadt 			return (RET_ERROR);
338bec2d00aSderaadt 		goto loop;
339bec2d00aSderaadt 	}
340bec2d00aSderaadt 
341bec2d00aSderaadt 	/* Put the page. */
342bec2d00aSderaadt 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
343bec2d00aSderaadt 
344bec2d00aSderaadt 	if (redo)
345bec2d00aSderaadt 		goto loop;
346bec2d00aSderaadt 	return (RET_SUCCESS);
347bec2d00aSderaadt }
348bec2d00aSderaadt 
349bec2d00aSderaadt /*
350bec2d00aSderaadt  * __bt_pdelete --
351bec2d00aSderaadt  *	Delete a single page from the tree.
352df930be7Sderaadt  *
353df930be7Sderaadt  * Parameters:
354df930be7Sderaadt  *	t:	tree
355bec2d00aSderaadt  *	h:	leaf page
356bec2d00aSderaadt  *
357bec2d00aSderaadt  * Returns:
358bec2d00aSderaadt  *	RET_SUCCESS, RET_ERROR.
359bec2d00aSderaadt  *
360bec2d00aSderaadt  * Side-effects:
361bec2d00aSderaadt  *	mpool_put's the page
362bec2d00aSderaadt  */
363bec2d00aSderaadt static int
__bt_pdelete(BTREE * t,PAGE * h)364e20a56a5Sotto __bt_pdelete(BTREE *t, PAGE *h)
365bec2d00aSderaadt {
366bec2d00aSderaadt 	BINTERNAL *bi;
367bec2d00aSderaadt 	PAGE *pg;
368bec2d00aSderaadt 	EPGNO *parent;
3698871b767Smillert 	indx_t cnt, idx, *ip, offset;
370bec2d00aSderaadt 	u_int32_t nksize;
371bec2d00aSderaadt 	char *from;
372bec2d00aSderaadt 
373bec2d00aSderaadt 	/*
374bec2d00aSderaadt 	 * Walk the parent page stack -- a LIFO stack of the pages that were
375bec2d00aSderaadt 	 * traversed when we searched for the page where the delete occurred.
376bec2d00aSderaadt 	 * Each stack entry is a page number and a page index offset.  The
377bec2d00aSderaadt 	 * offset is for the page traversed on the search.  We've just deleted
378bec2d00aSderaadt 	 * a page, so we have to delete the key from the parent page.
379bec2d00aSderaadt 	 *
380bec2d00aSderaadt 	 * If the delete from the parent page makes it empty, this process may
381bec2d00aSderaadt 	 * continue all the way up the tree.  We stop if we reach the root page
382bec2d00aSderaadt 	 * (which is never deleted, it's just not worth the effort) or if the
383bec2d00aSderaadt 	 * delete does not empty the page.
384bec2d00aSderaadt 	 */
385bec2d00aSderaadt 	while ((parent = BT_POP(t)) != NULL) {
386bec2d00aSderaadt 		/* Get the parent page. */
387bec2d00aSderaadt 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
388bec2d00aSderaadt 			return (RET_ERROR);
389bec2d00aSderaadt 
3908871b767Smillert 		idx = parent->index;
3918871b767Smillert 		bi = GETBINTERNAL(pg, idx);
392bec2d00aSderaadt 
393bec2d00aSderaadt 		/* Free any overflow pages. */
394bec2d00aSderaadt 		if (bi->flags & P_BIGKEY &&
395bec2d00aSderaadt 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
396bec2d00aSderaadt 			mpool_put(t->bt_mp, pg, 0);
397bec2d00aSderaadt 			return (RET_ERROR);
398bec2d00aSderaadt 		}
399bec2d00aSderaadt 
400bec2d00aSderaadt 		/*
401bec2d00aSderaadt 		 * Free the parent if it has only the one key and it's not the
402bec2d00aSderaadt 		 * root page. If it's the rootpage, turn it back into an empty
403bec2d00aSderaadt 		 * leaf page.
404bec2d00aSderaadt 		 */
405dfa5a4f6Smillert 		if (NEXTINDEX(pg) == 1) {
406bec2d00aSderaadt 			if (pg->pgno == P_ROOT) {
407bec2d00aSderaadt 				pg->lower = BTDATAOFF;
408bec2d00aSderaadt 				pg->upper = t->bt_psize;
409bec2d00aSderaadt 				pg->flags = P_BLEAF;
410bec2d00aSderaadt 			} else {
411bec2d00aSderaadt 				if (__bt_relink(t, pg) || __bt_free(t, pg))
412bec2d00aSderaadt 					return (RET_ERROR);
413bec2d00aSderaadt 				continue;
414bec2d00aSderaadt 			}
415dfa5a4f6Smillert 		} else {
416bec2d00aSderaadt 			/* Pack remaining key items at the end of the page. */
417bec2d00aSderaadt 			nksize = NBINTERNAL(bi->ksize);
418bec2d00aSderaadt 			from = (char *)pg + pg->upper;
419bec2d00aSderaadt 			memmove(from + nksize, from, (char *)bi - from);
420bec2d00aSderaadt 			pg->upper += nksize;
421bec2d00aSderaadt 
422bec2d00aSderaadt 			/* Adjust indices' offsets, shift the indices down. */
4238871b767Smillert 			offset = pg->linp[idx];
4248871b767Smillert 			for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
425bec2d00aSderaadt 				if (ip[0] < offset)
426bec2d00aSderaadt 					ip[0] += nksize;
4278871b767Smillert 			for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
428bec2d00aSderaadt 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
429bec2d00aSderaadt 			pg->lower -= sizeof(indx_t);
430bec2d00aSderaadt 		}
431bec2d00aSderaadt 
432bec2d00aSderaadt 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
433bec2d00aSderaadt 		break;
434bec2d00aSderaadt 	}
435bec2d00aSderaadt 
436bec2d00aSderaadt 	/* Free the leaf page, as long as it wasn't the root. */
437bec2d00aSderaadt 	if (h->pgno == P_ROOT) {
438bec2d00aSderaadt 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
439bec2d00aSderaadt 		return (RET_SUCCESS);
440bec2d00aSderaadt 	}
441bec2d00aSderaadt 	return (__bt_relink(t, h) || __bt_free(t, h));
442bec2d00aSderaadt }
443bec2d00aSderaadt 
444bec2d00aSderaadt /*
445bec2d00aSderaadt  * __bt_dleaf --
446bec2d00aSderaadt  *	Delete a single record from a leaf page.
447bec2d00aSderaadt  *
448bec2d00aSderaadt  * Parameters:
449bec2d00aSderaadt  *	t:	tree
450bec2d00aSderaadt  *    key:	referenced key
451bec2d00aSderaadt  *	h:	page
4528871b767Smillert  *	idx:	index on page to delete
453df930be7Sderaadt  *
454df930be7Sderaadt  * Returns:
455df930be7Sderaadt  *	RET_SUCCESS, RET_ERROR.
456df930be7Sderaadt  */
457df930be7Sderaadt int
__bt_dleaf(BTREE * t,const DBT * key,PAGE * h,u_int idx)458e20a56a5Sotto __bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
459df930be7Sderaadt {
460bec2d00aSderaadt 	BLEAF *bl;
461bec2d00aSderaadt 	indx_t cnt, *ip, offset;
462bec2d00aSderaadt 	u_int32_t nbytes;
463df930be7Sderaadt 	void *to;
464bec2d00aSderaadt 	char *from;
465df930be7Sderaadt 
466bec2d00aSderaadt 	/* If this record is referenced by the cursor, delete the cursor. */
467bec2d00aSderaadt 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
468bec2d00aSderaadt 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
4698871b767Smillert 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
4708871b767Smillert 	    __bt_curdel(t, key, h, idx))
471bec2d00aSderaadt 		return (RET_ERROR);
472bec2d00aSderaadt 
473bec2d00aSderaadt 	/* If the entry uses overflow pages, make them available for reuse. */
4748871b767Smillert 	to = bl = GETBLEAF(h, idx);
475df930be7Sderaadt 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
476df930be7Sderaadt 		return (RET_ERROR);
477df930be7Sderaadt 	if (bl->flags & P_BIGDATA &&
478df930be7Sderaadt 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
479df930be7Sderaadt 		return (RET_ERROR);
480df930be7Sderaadt 
481bec2d00aSderaadt 	/* Pack the remaining key/data items at the end of the page. */
482bec2d00aSderaadt 	nbytes = NBLEAF(bl);
483df930be7Sderaadt 	from = (char *)h + h->upper;
484df930be7Sderaadt 	memmove(from + nbytes, from, (char *)to - from);
485df930be7Sderaadt 	h->upper += nbytes;
486df930be7Sderaadt 
487bec2d00aSderaadt 	/* Adjust the indices' offsets, shift the indices down. */
4888871b767Smillert 	offset = h->linp[idx];
4898871b767Smillert 	for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
490df930be7Sderaadt 		if (ip[0] < offset)
491df930be7Sderaadt 			ip[0] += nbytes;
4928871b767Smillert 	for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
493df930be7Sderaadt 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
494df930be7Sderaadt 	h->lower -= sizeof(indx_t);
495bec2d00aSderaadt 
496bec2d00aSderaadt 	/* If the cursor is on this page, adjust it as necessary. */
497bec2d00aSderaadt 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
498bec2d00aSderaadt 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
4998871b767Smillert 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
500bec2d00aSderaadt 		--t->bt_cursor.pg.index;
501bec2d00aSderaadt 
502df930be7Sderaadt 	return (RET_SUCCESS);
503df930be7Sderaadt }
504bec2d00aSderaadt 
505bec2d00aSderaadt /*
506bec2d00aSderaadt  * __bt_curdel --
507bec2d00aSderaadt  *	Delete the cursor.
508bec2d00aSderaadt  *
509bec2d00aSderaadt  * Parameters:
510bec2d00aSderaadt  *	t:	tree
511bec2d00aSderaadt  *    key:	referenced key (or NULL)
512bec2d00aSderaadt  *	h:	page
5138871b767Smillert  *    idx:	index on page to delete
514bec2d00aSderaadt  *
515bec2d00aSderaadt  * Returns:
516bec2d00aSderaadt  *	RET_SUCCESS, RET_ERROR.
517bec2d00aSderaadt  */
518bec2d00aSderaadt static int
__bt_curdel(BTREE * t,const DBT * key,PAGE * h,u_int idx)519e20a56a5Sotto __bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
520bec2d00aSderaadt {
521bec2d00aSderaadt 	CURSOR *c;
522bec2d00aSderaadt 	EPG e;
523bec2d00aSderaadt 	PAGE *pg;
524bec2d00aSderaadt 	int curcopy, status;
525bec2d00aSderaadt 
526bec2d00aSderaadt 	/*
527bec2d00aSderaadt 	 * If there are duplicates, move forward or backward to one.
528bec2d00aSderaadt 	 * Otherwise, copy the key into the cursor area.
529bec2d00aSderaadt 	 */
530bec2d00aSderaadt 	c = &t->bt_cursor;
531bec2d00aSderaadt 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
532bec2d00aSderaadt 
533bec2d00aSderaadt 	curcopy = 0;
534bec2d00aSderaadt 	if (!F_ISSET(t, B_NODUPS)) {
535bec2d00aSderaadt 		/*
536bec2d00aSderaadt 		 * We're going to have to do comparisons.  If we weren't
537bec2d00aSderaadt 		 * provided a copy of the key, i.e. the user is deleting
538bec2d00aSderaadt 		 * the current cursor position, get one.
539bec2d00aSderaadt 		 */
540bec2d00aSderaadt 		if (key == NULL) {
541bec2d00aSderaadt 			e.page = h;
5428871b767Smillert 			e.index = idx;
543bec2d00aSderaadt 			if ((status = __bt_ret(t, &e,
544bec2d00aSderaadt 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
545bec2d00aSderaadt 				return (status);
546bec2d00aSderaadt 			curcopy = 1;
547bec2d00aSderaadt 			key = &c->key;
548bec2d00aSderaadt 		}
549bec2d00aSderaadt 		/* Check previous key, if not at the beginning of the page. */
5508871b767Smillert 		if (idx > 0) {
551bec2d00aSderaadt 			e.page = h;
5528871b767Smillert 			e.index = idx - 1;
553bec2d00aSderaadt 			if (__bt_cmp(t, key, &e) == 0) {
554bec2d00aSderaadt 				F_SET(c, CURS_BEFORE);
555bec2d00aSderaadt 				goto dup2;
556bec2d00aSderaadt 			}
557bec2d00aSderaadt 		}
558bec2d00aSderaadt 		/* Check next key, if not at the end of the page. */
5598871b767Smillert 		if (idx < NEXTINDEX(h) - 1) {
560bec2d00aSderaadt 			e.page = h;
5618871b767Smillert 			e.index = idx + 1;
562bec2d00aSderaadt 			if (__bt_cmp(t, key, &e) == 0) {
563bec2d00aSderaadt 				F_SET(c, CURS_AFTER);
564bec2d00aSderaadt 				goto dup2;
565bec2d00aSderaadt 			}
566bec2d00aSderaadt 		}
567bec2d00aSderaadt 		/* Check previous key if at the beginning of the page. */
5688871b767Smillert 		if (idx == 0 && h->prevpg != P_INVALID) {
569bec2d00aSderaadt 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
570bec2d00aSderaadt 				return (RET_ERROR);
571bec2d00aSderaadt 			e.page = pg;
572bec2d00aSderaadt 			e.index = NEXTINDEX(pg) - 1;
573bec2d00aSderaadt 			if (__bt_cmp(t, key, &e) == 0) {
574bec2d00aSderaadt 				F_SET(c, CURS_BEFORE);
575bec2d00aSderaadt 				goto dup1;
576bec2d00aSderaadt 			}
577bec2d00aSderaadt 			mpool_put(t->bt_mp, pg, 0);
578bec2d00aSderaadt 		}
579bec2d00aSderaadt 		/* Check next key if at the end of the page. */
5808871b767Smillert 		if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
581bec2d00aSderaadt 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
582bec2d00aSderaadt 				return (RET_ERROR);
583bec2d00aSderaadt 			e.page = pg;
584bec2d00aSderaadt 			e.index = 0;
585bec2d00aSderaadt 			if (__bt_cmp(t, key, &e) == 0) {
586bec2d00aSderaadt 				F_SET(c, CURS_AFTER);
587bec2d00aSderaadt dup1:				mpool_put(t->bt_mp, pg, 0);
588bec2d00aSderaadt dup2:				c->pg.pgno = e.page->pgno;
589bec2d00aSderaadt 				c->pg.index = e.index;
590bec2d00aSderaadt 				return (RET_SUCCESS);
591bec2d00aSderaadt 			}
592bec2d00aSderaadt 			mpool_put(t->bt_mp, pg, 0);
593bec2d00aSderaadt 		}
594bec2d00aSderaadt 	}
595bec2d00aSderaadt 	e.page = h;
5968871b767Smillert 	e.index = idx;
597bec2d00aSderaadt 	if (curcopy || (status =
598bec2d00aSderaadt 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
599bec2d00aSderaadt 		F_SET(c, CURS_ACQUIRE);
600bec2d00aSderaadt 		return (RET_SUCCESS);
601bec2d00aSderaadt 	}
602bec2d00aSderaadt 	return (status);
603bec2d00aSderaadt }
604bec2d00aSderaadt 
605bec2d00aSderaadt /*
606bec2d00aSderaadt  * __bt_relink --
607bec2d00aSderaadt  *	Link around a deleted page.
608bec2d00aSderaadt  *
609bec2d00aSderaadt  * Parameters:
610bec2d00aSderaadt  *	t:	tree
611bec2d00aSderaadt  *	h:	page to be deleted
612bec2d00aSderaadt  */
613bec2d00aSderaadt static int
__bt_relink(BTREE * t,PAGE * h)614e20a56a5Sotto __bt_relink(BTREE *t, PAGE *h)
615bec2d00aSderaadt {
616bec2d00aSderaadt 	PAGE *pg;
617bec2d00aSderaadt 
618bec2d00aSderaadt 	if (h->nextpg != P_INVALID) {
619bec2d00aSderaadt 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
620bec2d00aSderaadt 			return (RET_ERROR);
621bec2d00aSderaadt 		pg->prevpg = h->prevpg;
622bec2d00aSderaadt 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
623bec2d00aSderaadt 	}
624bec2d00aSderaadt 	if (h->prevpg != P_INVALID) {
625bec2d00aSderaadt 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
626bec2d00aSderaadt 			return (RET_ERROR);
627bec2d00aSderaadt 		pg->nextpg = h->nextpg;
628bec2d00aSderaadt 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
629bec2d00aSderaadt 	}
630bec2d00aSderaadt 	return (0);
631bec2d00aSderaadt }
632