1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 02/21/94"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 17 #include <errno.h> 18 #include <stdio.h> 19 #include <string.h> 20 21 #include <db.h> 22 #include "btree.h" 23 24 static int bt_bdelete __P((BTREE *, const DBT *)); 25 26 /* 27 * __BT_DELETE -- Delete the item(s) referenced by a key. 28 * 29 * Parameters: 30 * dbp: pointer to access method 31 * key: key to delete 32 * flags: R_CURSOR if deleting what the cursor references 33 * 34 * Returns: 35 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 36 */ 37 int 38 __bt_delete(dbp, key, flags) 39 const DB *dbp; 40 const DBT *key; 41 u_int flags; 42 { 43 BTREE *t; 44 int status; 45 46 t = dbp->internal; 47 48 /* Toss any page pinned across calls. */ 49 if (t->bt_pinned != NULL) { 50 mpool_put(t->bt_mp, t->bt_pinned, 0); 51 t->bt_pinned = NULL; 52 } 53 54 if (ISSET(t, B_RDONLY)) { 55 errno = EPERM; 56 return (RET_ERROR); 57 } 58 59 switch(flags) { 60 case 0: 61 status = bt_bdelete(t, key); 62 break; 63 case R_CURSOR: 64 /* 65 * If flags is R_CURSOR, delete the cursor; must already have 66 * started a scan and not have already deleted the record. For 67 * the delete cursor bit to have been set requires that the 68 * scan be initialized, so no reason to check. 69 */ 70 if (!ISSET(t, B_SEQINIT)) 71 goto einval; 72 status = ISSET(t, B_DELCRSR) ? 73 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 74 break; 75 default: 76 einval: errno = EINVAL; 77 return (RET_ERROR); 78 } 79 if (status == RET_SUCCESS) 80 SET(t, B_MODIFIED); 81 return (status); 82 } 83 84 /* 85 * BT_BDELETE -- Delete all key/data pairs matching the specified key. 86 * 87 * Parameters: 88 * tree: tree 89 * key: key to delete 90 * 91 * Returns: 92 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 93 */ 94 static int 95 bt_bdelete(t, key) 96 BTREE *t; 97 const DBT *key; 98 { 99 EPG *e, save; 100 PAGE *h; 101 pgno_t cpgno, pg; 102 indx_t cindex; 103 int deleted, dirty1, dirty2, exact; 104 105 /* Find any matching record; __bt_search pins the page. */ 106 if ((e = __bt_search(t, key, &exact)) == NULL) 107 return (RET_ERROR); 108 if (!exact) { 109 mpool_put(t->bt_mp, e->page, 0); 110 return (RET_SPECIAL); 111 } 112 113 /* 114 * Delete forward, then delete backward, from the found key. The 115 * ordering is so that the deletions don't mess up the page refs. 116 * The first loop deletes the key from the original page, the second 117 * unpins the original page. In the first loop, dirty1 is set if 118 * the original page is modified, and dirty2 is set if any subsequent 119 * pages are modified. In the second loop, dirty1 starts off set if 120 * the original page has been modified, and is set if any subsequent 121 * pages are modified. 122 * 123 * If find the key referenced by the cursor, don't delete it, just 124 * flag it for future deletion. The cursor page number is P_INVALID 125 * unless the sequential scan is initialized, so no reason to check. 126 * A special case is when the already deleted cursor record was the 127 * only record found. If so, then the delete opertion fails as no 128 * records were deleted. 129 * 130 * Cycle in place in the current page until the current record doesn't 131 * match the key or the page is empty. If the latter, walk forward, 132 * skipping empty pages and repeating until a record doesn't match 133 * the key or the end of the tree is reached. 134 */ 135 cpgno = t->bt_bcursor.pgno; 136 cindex = t->bt_bcursor.index; 137 save = *e; 138 dirty1 = 0; 139 for (h = e->page, deleted = 0;;) { 140 dirty2 = 0; 141 do { 142 if (h->pgno == cpgno && e->index == cindex) { 143 if (!ISSET(t, B_DELCRSR)) { 144 SET(t, B_DELCRSR); 145 deleted = 1; 146 } 147 ++e->index; 148 } else { 149 if (__bt_dleaf(t, h, e->index)) { 150 if (h->pgno != save.page->pgno) 151 mpool_put(t->bt_mp, h, dirty2); 152 mpool_put(t->bt_mp, save.page, dirty1); 153 return (RET_ERROR); 154 } 155 if (h->pgno == save.page->pgno) 156 dirty1 = MPOOL_DIRTY; 157 else 158 dirty2 = MPOOL_DIRTY; 159 deleted = 1; 160 } 161 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 162 163 /* 164 * Quit if didn't find a match, no next page, or first key on 165 * the next page doesn't match. Don't unpin the original page 166 * unless an error occurs. 167 */ 168 if (e->index < NEXTINDEX(h)) 169 break; 170 for (;;) { 171 if ((pg = h->nextpg) == P_INVALID) 172 goto done1; 173 if (h->pgno != save.page->pgno) 174 mpool_put(t->bt_mp, h, dirty2); 175 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 176 mpool_put(t->bt_mp, save.page, dirty1); 177 return (RET_ERROR); 178 } 179 if (NEXTINDEX(h) != 0) { 180 e->page = h; 181 e->index = 0; 182 break; 183 } 184 } 185 186 if (__bt_cmp(t, key, e) != 0) 187 break; 188 } 189 190 /* 191 * Reach here with the original page and the last page referenced 192 * pinned (they may be the same). Release it if not the original. 193 */ 194 done1: if (h->pgno != save.page->pgno) 195 mpool_put(t->bt_mp, h, dirty2); 196 197 /* 198 * Walk backwards from the record previous to the record returned by 199 * __bt_search, skipping empty pages, until a record doesn't match 200 * the key or reach the beginning of the tree. 201 */ 202 *e = save; 203 for (;;) { 204 if (e->index) 205 --e->index; 206 for (h = e->page; e->index; --e->index) { 207 if (__bt_cmp(t, key, e) != 0) 208 goto done2; 209 if (h->pgno == cpgno && e->index == cindex) { 210 if (!ISSET(t, B_DELCRSR)) { 211 SET(t, B_DELCRSR); 212 deleted = 1; 213 } 214 } else { 215 if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 216 mpool_put(t->bt_mp, h, dirty1); 217 return (RET_ERROR); 218 } 219 if (h->pgno == save.page->pgno) 220 dirty1 = MPOOL_DIRTY; 221 deleted = 1; 222 } 223 } 224 225 if ((pg = h->prevpg) == P_INVALID) 226 goto done2; 227 mpool_put(t->bt_mp, h, dirty1); 228 dirty1 = 0; 229 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 230 return (RET_ERROR); 231 e->index = NEXTINDEX(e->page); 232 } 233 234 /* 235 * Reach here with the last page that was looked at pinned. Release 236 * it. 237 */ 238 done2: mpool_put(t->bt_mp, h, dirty1); 239 return (deleted ? RET_SUCCESS : RET_SPECIAL); 240 } 241 242 /* 243 * __BT_DLEAF -- Delete a single record from a leaf page. 244 * 245 * Parameters: 246 * t: tree 247 * index: index on current page to delete 248 * 249 * Returns: 250 * RET_SUCCESS, RET_ERROR. 251 */ 252 int 253 __bt_dleaf(t, h, index) 254 BTREE *t; 255 PAGE *h; 256 indx_t index; 257 { 258 register BLEAF *bl; 259 register indx_t cnt, *ip, offset; 260 register size_t nbytes; 261 char *from; 262 void *to; 263 264 /* 265 * Delete a record from a btree leaf page. Internal records are never 266 * deleted from internal pages, regardless of the records that caused 267 * them to be added being deleted. Pages made empty by deletion are 268 * not reclaimed. They are, however, made available for reuse. 269 * 270 * Pack the remaining entries at the end of the page, shift the indices 271 * down, overwriting the deleted record and its index. If the record 272 * uses overflow pages, make them available for reuse. 273 */ 274 to = bl = GETBLEAF(h, index); 275 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 276 return (RET_ERROR); 277 if (bl->flags & P_BIGDATA && 278 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 279 return (RET_ERROR); 280 nbytes = NBLEAF(bl); 281 282 /* 283 * Compress the key/data pairs. Compress and adjust the [BR]LEAF 284 * offsets. Reset the headers. 285 */ 286 from = (char *)h + h->upper; 287 memmove(from + nbytes, from, (char *)to - from); 288 h->upper += nbytes; 289 290 offset = h->linp[index]; 291 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 292 if (ip[0] < offset) 293 ip[0] += nbytes; 294 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 295 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 296 h->lower -= sizeof(indx_t); 297 return (RET_SUCCESS); 298 } 299