1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 09/07/93"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 17 #include <errno.h> 18 #include <stddef.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 22 #include <db.h> 23 #include "btree.h" 24 25 static int bt_seqadv __P((BTREE *, EPG *, int)); 26 static int bt_seqset __P((BTREE *, EPG *, DBT *, int)); 27 28 /* 29 * Sequential scan support. 30 * 31 * The tree can be scanned sequentially, starting from either end of the tree 32 * or from any specific key. A scan request before any scanning is done is 33 * initialized as starting from the least node. 34 * 35 * Each tree has an EPGNO which has the current position of the cursor. The 36 * cursor has to survive deletions/insertions in the tree without losing its 37 * position. This is done by noting deletions without doing them, and then 38 * doing them when the cursor moves (or the tree is closed). 39 */ 40 41 /* 42 * __BT_SEQ -- Btree sequential scan interface. 43 * 44 * Parameters: 45 * dbp: pointer to access method 46 * key: key for positioning and return value 47 * data: data return value 48 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 49 * 50 * Returns: 51 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 52 */ 53 int 54 __bt_seq(dbp, key, data, flags) 55 const DB *dbp; 56 DBT *key, *data; 57 u_int flags; 58 { 59 BTREE *t; 60 EPG e; 61 int status; 62 63 t = dbp->internal; 64 65 /* Toss any page pinned across calls. */ 66 if (t->bt_pinned != NULL) { 67 mpool_put(t->bt_mp, t->bt_pinned, 0); 68 t->bt_pinned = NULL; 69 } 70 71 /* 72 * If scan unitialized as yet, or starting at a specific record, set 73 * the scan to a specific key. Both bt_seqset and bt_seqadv pin the 74 * page the cursor references if they're successful. 75 */ 76 switch(flags) { 77 case R_NEXT: 78 case R_PREV: 79 if (ISSET(t, B_SEQINIT)) { 80 status = bt_seqadv(t, &e, flags); 81 break; 82 } 83 /* FALLTHROUGH */ 84 case R_CURSOR: 85 case R_FIRST: 86 case R_LAST: 87 status = bt_seqset(t, &e, key, flags); 88 break; 89 default: 90 errno = EINVAL; 91 return (RET_ERROR); 92 } 93 94 if (status == RET_SUCCESS) { 95 status = __bt_ret(t, &e, key, data); 96 97 /* Update the actual cursor. */ 98 t->bt_bcursor.pgno = e.page->pgno; 99 t->bt_bcursor.index = e.index; 100 101 /* 102 * If the user is doing concurrent access, we copied the 103 * key/data, toss the page. 104 */ 105 if (ISSET(t, B_DB_LOCK)) 106 mpool_put(t->bt_mp, e.page, 0); 107 else 108 t->bt_pinned = e.page; 109 SET(t, B_SEQINIT); 110 } 111 return (status); 112 } 113 114 /* 115 * BT_SEQSET -- Set the sequential scan to a specific key. 116 * 117 * Parameters: 118 * t: tree 119 * ep: storage for returned key 120 * key: key for initial scan position 121 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 122 * 123 * Side effects: 124 * Pins the page the cursor references. 125 * 126 * Returns: 127 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 128 */ 129 static int 130 bt_seqset(t, ep, key, flags) 131 BTREE *t; 132 EPG *ep; 133 DBT *key; 134 int flags; 135 { 136 EPG *e; 137 PAGE *h; 138 pgno_t pg; 139 int exact; 140 141 /* 142 * Delete any already deleted record that we've been saving because 143 * the cursor pointed to it. Since going to a specific key, should 144 * delete any logically deleted records so they aren't found. 145 */ 146 if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) 147 return (RET_ERROR); 148 149 /* 150 * Find the first, last or specific key in the tree and point the cursor 151 * at it. The cursor may not be moved until a new key has been found. 152 */ 153 switch(flags) { 154 case R_CURSOR: /* Keyed scan. */ 155 /* 156 * Find the first instance of the key or the smallest key which 157 * is greater than or equal to the specified key. If run out 158 * of keys, return RET_SPECIAL. 159 */ 160 if (key->data == NULL || key->size == 0) { 161 errno = EINVAL; 162 return (RET_ERROR); 163 } 164 e = __bt_first(t, key, &exact); /* Returns pinned page. */ 165 if (e == NULL) 166 return (RET_ERROR); 167 /* 168 * If at the end of a page, skip any empty pages and find the 169 * next entry. 170 */ 171 if (e->index == NEXTINDEX(e->page)) { 172 h = e->page; 173 do { 174 pg = h->nextpg; 175 mpool_put(t->bt_mp, h, 0); 176 if (pg == P_INVALID) 177 return (RET_SPECIAL); 178 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 179 return (RET_ERROR); 180 } while (NEXTINDEX(h) == 0); 181 e->index = 0; 182 e->page = h; 183 } 184 *ep = *e; 185 break; 186 case R_FIRST: /* First record. */ 187 case R_NEXT: 188 /* Walk down the left-hand side of the tree. */ 189 for (pg = P_ROOT;;) { 190 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 191 return (RET_ERROR); 192 if (h->flags & (P_BLEAF | P_RLEAF)) 193 break; 194 pg = GETBINTERNAL(h, 0)->pgno; 195 mpool_put(t->bt_mp, h, 0); 196 } 197 198 /* Skip any empty pages. */ 199 while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) { 200 pg = h->nextpg; 201 mpool_put(t->bt_mp, h, 0); 202 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 203 return (RET_ERROR); 204 } 205 206 if (NEXTINDEX(h) == 0) { 207 mpool_put(t->bt_mp, h, 0); 208 return (RET_SPECIAL); 209 } 210 211 ep->page = h; 212 ep->index = 0; 213 break; 214 case R_LAST: /* Last record. */ 215 case R_PREV: 216 /* Walk down the right-hand side of the tree. */ 217 for (pg = P_ROOT;;) { 218 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 219 return (RET_ERROR); 220 if (h->flags & (P_BLEAF | P_RLEAF)) 221 break; 222 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 223 mpool_put(t->bt_mp, h, 0); 224 } 225 226 /* Skip any empty pages. */ 227 while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) { 228 pg = h->prevpg; 229 mpool_put(t->bt_mp, h, 0); 230 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 231 return (RET_ERROR); 232 } 233 234 if (NEXTINDEX(h) == 0) { 235 mpool_put(t->bt_mp, h, 0); 236 return (RET_SPECIAL); 237 } 238 239 ep->page = h; 240 ep->index = NEXTINDEX(h) - 1; 241 break; 242 } 243 return (RET_SUCCESS); 244 } 245 246 /* 247 * BT_SEQADVANCE -- Advance the sequential scan. 248 * 249 * Parameters: 250 * t: tree 251 * flags: R_NEXT, R_PREV 252 * 253 * Side effects: 254 * Pins the page the new key/data record is on. 255 * 256 * Returns: 257 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 258 */ 259 static int 260 bt_seqadv(t, e, flags) 261 BTREE *t; 262 EPG *e; 263 int flags; 264 { 265 EPGNO *c, delc; 266 PAGE *h; 267 indx_t index; 268 pgno_t pg; 269 270 /* Save the current cursor if going to delete it. */ 271 c = &t->bt_bcursor; 272 if (ISSET(t, B_DELCRSR)) 273 delc = *c; 274 275 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) 276 return (RET_ERROR); 277 278 /* 279 * Find the next/previous record in the tree and point the cursor at it. 280 * The cursor may not be moved until a new key has been found. 281 */ 282 index = c->index; 283 switch(flags) { 284 case R_NEXT: /* Next record. */ 285 if (++index == NEXTINDEX(h)) { 286 do { 287 pg = h->nextpg; 288 mpool_put(t->bt_mp, h, 0); 289 if (pg == P_INVALID) 290 return (RET_SPECIAL); 291 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 292 return (RET_ERROR); 293 } while (NEXTINDEX(h) == 0); 294 index = 0; 295 } 296 break; 297 case R_PREV: /* Previous record. */ 298 if (index-- == 0) { 299 do { 300 pg = h->prevpg; 301 mpool_put(t->bt_mp, h, 0); 302 if (pg == P_INVALID) 303 return (RET_SPECIAL); 304 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 305 return (RET_ERROR); 306 } while (NEXTINDEX(h) == 0); 307 index = NEXTINDEX(h) - 1; 308 } 309 break; 310 } 311 312 e->page = h; 313 e->index = index; 314 315 /* 316 * Delete any already deleted record that we've been saving because the 317 * cursor pointed to it. This could cause the new index to be shifted 318 * down by one if the record we're deleting is on the same page and has 319 * a larger index. 320 */ 321 if (ISSET(t, B_DELCRSR)) { 322 CLR(t, B_DELCRSR); /* Don't try twice. */ 323 if (c->pgno == delc.pgno && c->index > delc.index) 324 --c->index; 325 if (__bt_crsrdel(t, &delc)) 326 return (RET_ERROR); 327 } 328 return (RET_SUCCESS); 329 } 330 331 /* 332 * __BT_CRSRDEL -- Delete the record referenced by the cursor. 333 * 334 * Parameters: 335 * t: tree 336 * 337 * Returns: 338 * RET_ERROR, RET_SUCCESS 339 */ 340 int 341 __bt_crsrdel(t, c) 342 BTREE *t; 343 EPGNO *c; 344 { 345 PAGE *h; 346 int status; 347 348 CLR(t, B_DELCRSR); /* Don't try twice. */ 349 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) 350 return (RET_ERROR); 351 status = __bt_dleaf(t, h, c->index); 352 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 353 return (status); 354 } 355