1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 02/21/94";
13 #endif /* LIBC_SCCS and not lint */
14
15 #include <sys/types.h>
16
17 #include <errno.h>
18 #include <stdio.h>
19 #include <string.h>
20
21 #include <db.h>
22 #include "btree.h"
23
24 static int bt_bdelete __P((BTREE *, const DBT *));
25
26 /*
27 * __BT_DELETE -- Delete the item(s) referenced by a key.
28 *
29 * Parameters:
30 * dbp: pointer to access method
31 * key: key to delete
32 * flags: R_CURSOR if deleting what the cursor references
33 *
34 * Returns:
35 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
36 */
37 int
__bt_delete(dbp,key,flags)38 __bt_delete(dbp, key, flags)
39 const DB *dbp;
40 const DBT *key;
41 u_int flags;
42 {
43 BTREE *t;
44 int status;
45
46 t = dbp->internal;
47
48 /* Toss any page pinned across calls. */
49 if (t->bt_pinned != NULL) {
50 mpool_put(t->bt_mp, t->bt_pinned, 0);
51 t->bt_pinned = NULL;
52 }
53
54 if (ISSET(t, B_RDONLY)) {
55 errno = EPERM;
56 return (RET_ERROR);
57 }
58
59 switch(flags) {
60 case 0:
61 status = bt_bdelete(t, key);
62 break;
63 case R_CURSOR:
64 /*
65 * If flags is R_CURSOR, delete the cursor; must already have
66 * started a scan and not have already deleted the record. For
67 * the delete cursor bit to have been set requires that the
68 * scan be initialized, so no reason to check.
69 */
70 if (!ISSET(t, B_SEQINIT))
71 goto einval;
72 status = ISSET(t, B_DELCRSR) ?
73 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
74 break;
75 default:
76 einval: errno = EINVAL;
77 return (RET_ERROR);
78 }
79 if (status == RET_SUCCESS)
80 SET(t, B_MODIFIED);
81 return (status);
82 }
83
84 /*
85 * BT_BDELETE -- Delete all key/data pairs matching the specified key.
86 *
87 * Parameters:
88 * tree: tree
89 * key: key to delete
90 *
91 * Returns:
92 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
93 */
94 static int
bt_bdelete(t,key)95 bt_bdelete(t, key)
96 BTREE *t;
97 const DBT *key;
98 {
99 EPG *e, save;
100 PAGE *h;
101 pgno_t cpgno, pg;
102 indx_t cindex;
103 int deleted, dirty1, dirty2, exact;
104
105 /* Find any matching record; __bt_search pins the page. */
106 if ((e = __bt_search(t, key, &exact)) == NULL)
107 return (RET_ERROR);
108 if (!exact) {
109 mpool_put(t->bt_mp, e->page, 0);
110 return (RET_SPECIAL);
111 }
112
113 /*
114 * Delete forward, then delete backward, from the found key. The
115 * ordering is so that the deletions don't mess up the page refs.
116 * The first loop deletes the key from the original page, the second
117 * unpins the original page. In the first loop, dirty1 is set if
118 * the original page is modified, and dirty2 is set if any subsequent
119 * pages are modified. In the second loop, dirty1 starts off set if
120 * the original page has been modified, and is set if any subsequent
121 * pages are modified.
122 *
123 * If find the key referenced by the cursor, don't delete it, just
124 * flag it for future deletion. The cursor page number is P_INVALID
125 * unless the sequential scan is initialized, so no reason to check.
126 * A special case is when the already deleted cursor record was the
127 * only record found. If so, then the delete opertion fails as no
128 * records were deleted.
129 *
130 * Cycle in place in the current page until the current record doesn't
131 * match the key or the page is empty. If the latter, walk forward,
132 * skipping empty pages and repeating until a record doesn't match
133 * the key or the end of the tree is reached.
134 */
135 cpgno = t->bt_bcursor.pgno;
136 cindex = t->bt_bcursor.index;
137 save = *e;
138 dirty1 = 0;
139 for (h = e->page, deleted = 0;;) {
140 dirty2 = 0;
141 do {
142 if (h->pgno == cpgno && e->index == cindex) {
143 if (!ISSET(t, B_DELCRSR)) {
144 SET(t, B_DELCRSR);
145 deleted = 1;
146 }
147 ++e->index;
148 } else {
149 if (__bt_dleaf(t, h, e->index)) {
150 if (h->pgno != save.page->pgno)
151 mpool_put(t->bt_mp, h, dirty2);
152 mpool_put(t->bt_mp, save.page, dirty1);
153 return (RET_ERROR);
154 }
155 if (h->pgno == save.page->pgno)
156 dirty1 = MPOOL_DIRTY;
157 else
158 dirty2 = MPOOL_DIRTY;
159 deleted = 1;
160 }
161 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
162
163 /*
164 * Quit if didn't find a match, no next page, or first key on
165 * the next page doesn't match. Don't unpin the original page
166 * unless an error occurs.
167 */
168 if (e->index < NEXTINDEX(h))
169 break;
170 for (;;) {
171 if ((pg = h->nextpg) == P_INVALID)
172 goto done1;
173 if (h->pgno != save.page->pgno)
174 mpool_put(t->bt_mp, h, dirty2);
175 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
176 mpool_put(t->bt_mp, save.page, dirty1);
177 return (RET_ERROR);
178 }
179 if (NEXTINDEX(h) != 0) {
180 e->page = h;
181 e->index = 0;
182 break;
183 }
184 }
185
186 if (__bt_cmp(t, key, e) != 0)
187 break;
188 }
189
190 /*
191 * Reach here with the original page and the last page referenced
192 * pinned (they may be the same). Release it if not the original.
193 */
194 done1: if (h->pgno != save.page->pgno)
195 mpool_put(t->bt_mp, h, dirty2);
196
197 /*
198 * Walk backwards from the record previous to the record returned by
199 * __bt_search, skipping empty pages, until a record doesn't match
200 * the key or reach the beginning of the tree.
201 */
202 *e = save;
203 for (;;) {
204 if (e->index)
205 --e->index;
206 for (h = e->page; e->index; --e->index) {
207 if (__bt_cmp(t, key, e) != 0)
208 goto done2;
209 if (h->pgno == cpgno && e->index == cindex) {
210 if (!ISSET(t, B_DELCRSR)) {
211 SET(t, B_DELCRSR);
212 deleted = 1;
213 }
214 } else {
215 if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
216 mpool_put(t->bt_mp, h, dirty1);
217 return (RET_ERROR);
218 }
219 if (h->pgno == save.page->pgno)
220 dirty1 = MPOOL_DIRTY;
221 deleted = 1;
222 }
223 }
224
225 if ((pg = h->prevpg) == P_INVALID)
226 goto done2;
227 mpool_put(t->bt_mp, h, dirty1);
228 dirty1 = 0;
229 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
230 return (RET_ERROR);
231 e->index = NEXTINDEX(e->page);
232 }
233
234 /*
235 * Reach here with the last page that was looked at pinned. Release
236 * it.
237 */
238 done2: mpool_put(t->bt_mp, h, dirty1);
239 return (deleted ? RET_SUCCESS : RET_SPECIAL);
240 }
241
242 /*
243 * __BT_DLEAF -- Delete a single record from a leaf page.
244 *
245 * Parameters:
246 * t: tree
247 * index: index on current page to delete
248 *
249 * Returns:
250 * RET_SUCCESS, RET_ERROR.
251 */
252 int
__bt_dleaf(t,h,index)253 __bt_dleaf(t, h, index)
254 BTREE *t;
255 PAGE *h;
256 indx_t index;
257 {
258 register BLEAF *bl;
259 register indx_t cnt, *ip, offset;
260 register size_t nbytes;
261 char *from;
262 void *to;
263
264 /*
265 * Delete a record from a btree leaf page. Internal records are never
266 * deleted from internal pages, regardless of the records that caused
267 * them to be added being deleted. Pages made empty by deletion are
268 * not reclaimed. They are, however, made available for reuse.
269 *
270 * Pack the remaining entries at the end of the page, shift the indices
271 * down, overwriting the deleted record and its index. If the record
272 * uses overflow pages, make them available for reuse.
273 */
274 to = bl = GETBLEAF(h, index);
275 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
276 return (RET_ERROR);
277 if (bl->flags & P_BIGDATA &&
278 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
279 return (RET_ERROR);
280 nbytes = NBLEAF(bl);
281
282 /*
283 * Compress the key/data pairs. Compress and adjust the [BR]LEAF
284 * offsets. Reset the headers.
285 */
286 from = (char *)h + h->upper;
287 memmove(from + nbytes, from, (char *)to - from);
288 h->upper += nbytes;
289
290 offset = h->linp[index];
291 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
292 if (ip[0] < offset)
293 ip[0] += nbytes;
294 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
295 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
296 h->lower -= sizeof(indx_t);
297 return (RET_SUCCESS);
298 }
299