1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_delete.c 5.4 (Berkeley) 10/03/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #include <errno.h> 17 #include <db.h> 18 #include <stdio.h> 19 #include <string.h> 20 #include "btree.h" 21 22 static int bt_bdelete __P((BTREE *, const DBT *)); 23 24 /* 25 * __BT_DELETE -- Delete the item(s) referenced by a key. 26 * 27 * Parameters: 28 * dbp: pointer to access method 29 * key: key to delete 30 * flags: R_CURSOR if deleting what the cursor references 31 * 32 * Returns: 33 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 34 */ 35 int 36 __bt_delete(dbp, key, flags) 37 const DB *dbp; 38 const DBT *key; 39 u_int flags; 40 { 41 BTREE *t; 42 int status; 43 44 t = dbp->internal; 45 if (ISSET(t, BTF_RDONLY)) { 46 errno = EPERM; 47 return (RET_ERROR); 48 } 49 switch(flags) { 50 case 0: 51 status = bt_bdelete(t, key); 52 break; 53 case R_CURSOR: 54 /* 55 * If flags is R_CURSOR, delete the cursor; must already have 56 * started a scan and not have already deleted the record. For 57 * the delete cursor bit to have been set requires that the 58 * scan be initialized, so no reason to check. 59 */ 60 status = ISSET(t, BTF_DELCRSR) ? 61 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 62 break; 63 default: 64 errno = EINVAL; 65 return (RET_ERROR); 66 } 67 if (status == RET_SUCCESS) 68 SET(t, BTF_MODIFIED); 69 return (status); 70 } 71 72 /* 73 * BT_BDELETE -- Delete all key/data pairs matching the specified key. 74 * 75 * Parameters: 76 * tree: tree 77 * key: key to delete 78 * 79 * Returns: 80 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 81 */ 82 static int 83 bt_bdelete(t, key) 84 BTREE *t; 85 const DBT *key; 86 { 87 EPG *e, save; 88 PAGE *h; 89 pgno_t cpgno, pg; 90 index_t cindex; 91 int deleted, dirty1, dirty2, exact; 92 93 /* Find any matching record; __bt_search pins the page. */ 94 if ((e = __bt_search(t, key, &exact)) == NULL) 95 return (RET_ERROR); 96 if (!exact) { 97 mpool_put(t->bt_mp, e->page, 0); 98 return (RET_SPECIAL); 99 } 100 101 /* 102 * Delete forward, then delete backward, from the found key. The 103 * ordering is so that the deletions don't mess up the page refs. 104 * The first loop deletes the key from the original page, the second 105 * unpins the original page. In the first loop, dirty1 is set if 106 * the original page is modified, and dirty2 is set if any subsequent 107 * pages are modified. In the second loop, dirty1 starts off set if 108 * the original page has been modified, and is set if any subsequent 109 * pages are modified. 110 * 111 * If find the key referenced by the cursor, don't delete it, just 112 * flag it for future deletion. The cursor page number is P_INVALID 113 * unless the sequential scan is initialized, so no reason to check. 114 * A special case is when the already deleted cursor record was the 115 * only record found. If so, then the delete opertion fails as no 116 * records were deleted. 117 * 118 * Cycle in place in the current page until the current record doesn't 119 * match the key or the page is empty. If the latter, walk forward, 120 * skipping empty pages and repeating until a record doesn't match 121 * the key or the end of the tree is reached. 122 */ 123 cpgno = t->bt_bcursor.pgno; 124 cindex = t->bt_bcursor.index; 125 save = *e; 126 dirty1 = 0; 127 for (h = e->page, deleted = 0;;) { 128 dirty2 = 0; 129 do { 130 if (h->pgno == cpgno && e->index == cindex) { 131 if (NOTSET(t, BTF_DELCRSR)) { 132 SET(t, BTF_DELCRSR); 133 deleted = 1; 134 } 135 ++e->index; 136 } else { 137 if (__bt_dleaf(t, h, e->index)) { 138 if (h->pgno != save.page->pgno) 139 mpool_put(t->bt_mp, h, dirty2); 140 mpool_put(t->bt_mp, save.page, dirty1); 141 return (RET_ERROR); 142 } 143 if (h->pgno == save.page->pgno) 144 dirty1 = MPOOL_DIRTY; 145 else 146 dirty2 = MPOOL_DIRTY; 147 deleted = 1; 148 } 149 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 150 151 /* 152 * Quit if didn't find a match, no next page, or first key on 153 * the next page doesn't match. Don't unpin the original page 154 * unless an error occurs. 155 */ 156 if (e->index < NEXTINDEX(h)) 157 break; 158 for (;;) { 159 if ((pg = h->nextpg) == P_INVALID) 160 goto done1; 161 if (h->pgno != save.page->pgno) 162 mpool_put(t->bt_mp, h, dirty2); 163 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 164 mpool_put(t->bt_mp, save.page, dirty1); 165 return (RET_ERROR); 166 } 167 if (NEXTINDEX(h) != 0) { 168 e->page = h; 169 e->index = 0; 170 break; 171 } 172 } 173 174 if (__bt_cmp(t, key, e) != 0) 175 break; 176 } 177 178 /* 179 * Reach here with the original page and the last page referenced 180 * pinned (they may be the same). Release it if not the original. 181 */ 182 done1: if (h->pgno != save.page->pgno) 183 mpool_put(t->bt_mp, h, dirty2); 184 185 /* 186 * Walk backwards from the record previous to the record returned by 187 * __bt_search, skipping empty pages, until a record doesn't match 188 * the key or reach the beginning of the tree. 189 */ 190 *e = save; 191 for (;;) { 192 if (e->index) 193 --e->index; 194 for (h = e->page; e->index; --e->index) { 195 if (__bt_cmp(t, key, e) != 0) 196 goto done2; 197 if (h->pgno == cpgno && e->index == cindex) { 198 if (NOTSET(t, BTF_DELCRSR)) { 199 SET(t, BTF_DELCRSR); 200 deleted = 1; 201 } 202 } else { 203 if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 204 mpool_put(t->bt_mp, h, dirty1); 205 return (RET_ERROR); 206 } 207 if (h->pgno == save.page->pgno) 208 dirty1 = MPOOL_DIRTY; 209 deleted = 1; 210 } 211 } 212 213 if ((pg = h->prevpg) == P_INVALID) 214 goto done2; 215 mpool_put(t->bt_mp, h, dirty1); 216 dirty1 = 0; 217 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 218 return (RET_ERROR); 219 e->index = NEXTINDEX(h); 220 } 221 222 /* 223 * Reach here with the last page that was looked at pinned. Release 224 * it. 225 */ 226 done2: mpool_put(t->bt_mp, h, dirty1); 227 return (deleted ? RET_SUCCESS : RET_SPECIAL); 228 } 229 230 /* 231 * __BT_DLEAF -- Delete a single record from a leaf page. 232 * 233 * Parameters: 234 * t: tree 235 * index: index on current page to delete 236 * 237 * Returns: 238 * RET_SUCCESS, RET_ERROR. 239 */ 240 int 241 __bt_dleaf(t, h, index) 242 BTREE *t; 243 PAGE *h; 244 int index; 245 { 246 register BLEAF *bl; 247 register index_t *ip, offset; 248 register size_t nbytes; 249 register int cnt; 250 char *from; 251 void *to; 252 253 /* 254 * Delete a record from a btree leaf page. Internal records are never 255 * deleted from internal pages, regardless of the records that caused 256 * them to be added being deleted. Pages made empty by deletion are 257 * not reclaimed. They are, however, made available for reuse. 258 * 259 * Pack the remaining entries at the end of the page, shift the indices 260 * down, overwriting the deleted record and its index. If the record 261 * uses overflow pages, make them available for reuse. 262 */ 263 to = bl = GETBLEAF(h, index); 264 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 265 return (RET_ERROR); 266 if (bl->flags & P_BIGDATA && 267 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 268 return (RET_ERROR); 269 nbytes = NBLEAF(bl); 270 271 /* 272 * Compress the key/data pairs. Compress and adjust the [BR]LEAF 273 * offsets. Reset the headers. 274 */ 275 from = (char *)h + h->upper; 276 bcopy(from, from + nbytes, (char *)to - from); 277 h->upper += nbytes; 278 279 offset = h->linp[index]; 280 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 281 if (ip[0] < offset) 282 ip[0] += nbytes; 283 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 284 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 285 h->lower -= sizeof(index_t); 286 return (RET_SUCCESS); 287 } 288