1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_delete.c 8.1 (Berkeley) 06/04/93"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 17 #include <errno.h> 18 #include <stdio.h> 19 #include <string.h> 20 21 #include <db.h> 22 #include "btree.h" 23 24 static int bt_bdelete __P((BTREE *, const DBT *)); 25 26 /* 27 * __BT_DELETE -- Delete the item(s) referenced by a key. 28 * 29 * Parameters: 30 * dbp: pointer to access method 31 * key: key to delete 32 * flags: R_CURSOR if deleting what the cursor references 33 * 34 * Returns: 35 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 36 */ 37 int 38 __bt_delete(dbp, key, flags) 39 const DB *dbp; 40 const DBT *key; 41 u_int flags; 42 { 43 BTREE *t; 44 int status; 45 46 t = dbp->internal; 47 if (ISSET(t, B_RDONLY)) { 48 errno = EPERM; 49 return (RET_ERROR); 50 } 51 switch(flags) { 52 case 0: 53 status = bt_bdelete(t, key); 54 break; 55 case R_CURSOR: 56 /* 57 * If flags is R_CURSOR, delete the cursor; must already have 58 * started a scan and not have already deleted the record. For 59 * the delete cursor bit to have been set requires that the 60 * scan be initialized, so no reason to check. 61 */ 62 if (!ISSET(t, B_SEQINIT)) 63 goto einval; 64 status = ISSET(t, B_DELCRSR) ? 65 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 66 break; 67 default: 68 einval: errno = EINVAL; 69 return (RET_ERROR); 70 } 71 if (status == RET_SUCCESS) 72 SET(t, B_MODIFIED); 73 return (status); 74 } 75 76 /* 77 * BT_BDELETE -- Delete all key/data pairs matching the specified key. 78 * 79 * Parameters: 80 * tree: tree 81 * key: key to delete 82 * 83 * Returns: 84 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 85 */ 86 static int 87 bt_bdelete(t, key) 88 BTREE *t; 89 const DBT *key; 90 { 91 EPG *e, save; 92 PAGE *h; 93 pgno_t cpgno, pg; 94 indx_t cindex; 95 int deleted, dirty1, dirty2, exact; 96 97 /* Find any matching record; __bt_search pins the page. */ 98 if ((e = __bt_search(t, key, &exact)) == NULL) 99 return (RET_ERROR); 100 if (!exact) { 101 mpool_put(t->bt_mp, e->page, 0); 102 return (RET_SPECIAL); 103 } 104 105 /* 106 * Delete forward, then delete backward, from the found key. The 107 * ordering is so that the deletions don't mess up the page refs. 108 * The first loop deletes the key from the original page, the second 109 * unpins the original page. In the first loop, dirty1 is set if 110 * the original page is modified, and dirty2 is set if any subsequent 111 * pages are modified. In the second loop, dirty1 starts off set if 112 * the original page has been modified, and is set if any subsequent 113 * pages are modified. 114 * 115 * If find the key referenced by the cursor, don't delete it, just 116 * flag it for future deletion. The cursor page number is P_INVALID 117 * unless the sequential scan is initialized, so no reason to check. 118 * A special case is when the already deleted cursor record was the 119 * only record found. If so, then the delete opertion fails as no 120 * records were deleted. 121 * 122 * Cycle in place in the current page until the current record doesn't 123 * match the key or the page is empty. If the latter, walk forward, 124 * skipping empty pages and repeating until a record doesn't match 125 * the key or the end of the tree is reached. 126 */ 127 cpgno = t->bt_bcursor.pgno; 128 cindex = t->bt_bcursor.index; 129 save = *e; 130 dirty1 = 0; 131 for (h = e->page, deleted = 0;;) { 132 dirty2 = 0; 133 do { 134 if (h->pgno == cpgno && e->index == cindex) { 135 if (!ISSET(t, B_DELCRSR)) { 136 SET(t, B_DELCRSR); 137 deleted = 1; 138 } 139 ++e->index; 140 } else { 141 if (__bt_dleaf(t, h, e->index)) { 142 if (h->pgno != save.page->pgno) 143 mpool_put(t->bt_mp, h, dirty2); 144 mpool_put(t->bt_mp, save.page, dirty1); 145 return (RET_ERROR); 146 } 147 if (h->pgno == save.page->pgno) 148 dirty1 = MPOOL_DIRTY; 149 else 150 dirty2 = MPOOL_DIRTY; 151 deleted = 1; 152 } 153 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 154 155 /* 156 * Quit if didn't find a match, no next page, or first key on 157 * the next page doesn't match. Don't unpin the original page 158 * unless an error occurs. 159 */ 160 if (e->index < NEXTINDEX(h)) 161 break; 162 for (;;) { 163 if ((pg = h->nextpg) == P_INVALID) 164 goto done1; 165 if (h->pgno != save.page->pgno) 166 mpool_put(t->bt_mp, h, dirty2); 167 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 168 mpool_put(t->bt_mp, save.page, dirty1); 169 return (RET_ERROR); 170 } 171 if (NEXTINDEX(h) != 0) { 172 e->page = h; 173 e->index = 0; 174 break; 175 } 176 } 177 178 if (__bt_cmp(t, key, e) != 0) 179 break; 180 } 181 182 /* 183 * Reach here with the original page and the last page referenced 184 * pinned (they may be the same). Release it if not the original. 185 */ 186 done1: if (h->pgno != save.page->pgno) 187 mpool_put(t->bt_mp, h, dirty2); 188 189 /* 190 * Walk backwards from the record previous to the record returned by 191 * __bt_search, skipping empty pages, until a record doesn't match 192 * the key or reach the beginning of the tree. 193 */ 194 *e = save; 195 for (;;) { 196 if (e->index) 197 --e->index; 198 for (h = e->page; e->index; --e->index) { 199 if (__bt_cmp(t, key, e) != 0) 200 goto done2; 201 if (h->pgno == cpgno && e->index == cindex) { 202 if (!ISSET(t, B_DELCRSR)) { 203 SET(t, B_DELCRSR); 204 deleted = 1; 205 } 206 } else { 207 if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 208 mpool_put(t->bt_mp, h, dirty1); 209 return (RET_ERROR); 210 } 211 if (h->pgno == save.page->pgno) 212 dirty1 = MPOOL_DIRTY; 213 deleted = 1; 214 } 215 } 216 217 if ((pg = h->prevpg) == P_INVALID) 218 goto done2; 219 mpool_put(t->bt_mp, h, dirty1); 220 dirty1 = 0; 221 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 222 return (RET_ERROR); 223 e->index = NEXTINDEX(e->page); 224 } 225 226 /* 227 * Reach here with the last page that was looked at pinned. Release 228 * it. 229 */ 230 done2: mpool_put(t->bt_mp, h, dirty1); 231 return (deleted ? RET_SUCCESS : RET_SPECIAL); 232 } 233 234 /* 235 * __BT_DLEAF -- Delete a single record from a leaf page. 236 * 237 * Parameters: 238 * t: tree 239 * index: index on current page to delete 240 * 241 * Returns: 242 * RET_SUCCESS, RET_ERROR. 243 */ 244 int 245 __bt_dleaf(t, h, index) 246 BTREE *t; 247 PAGE *h; 248 int index; 249 { 250 register BLEAF *bl; 251 register indx_t *ip, offset; 252 register size_t nbytes; 253 register int cnt; 254 char *from; 255 void *to; 256 257 /* 258 * Delete a record from a btree leaf page. Internal records are never 259 * deleted from internal pages, regardless of the records that caused 260 * them to be added being deleted. Pages made empty by deletion are 261 * not reclaimed. They are, however, made available for reuse. 262 * 263 * Pack the remaining entries at the end of the page, shift the indices 264 * down, overwriting the deleted record and its index. If the record 265 * uses overflow pages, make them available for reuse. 266 */ 267 to = bl = GETBLEAF(h, index); 268 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 269 return (RET_ERROR); 270 if (bl->flags & P_BIGDATA && 271 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 272 return (RET_ERROR); 273 nbytes = NBLEAF(bl); 274 275 /* 276 * Compress the key/data pairs. Compress and adjust the [BR]LEAF 277 * offsets. Reset the headers. 278 */ 279 from = (char *)h + h->upper; 280 memmove(from + nbytes, from, (char *)to - from); 281 h->upper += nbytes; 282 283 offset = h->linp[index]; 284 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 285 if (ip[0] < offset) 286 ip[0] += nbytes; 287 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 288 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 289 h->lower -= sizeof(indx_t); 290 return (RET_SUCCESS); 291 } 292