1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)bt_seq.c 8.7 (Berkeley) 7/20/94 33 * $DragonFly: src/lib/libc/db/btree/bt_seq.c,v 1.6 2005/09/19 09:20:37 asmodai Exp $ 34 */ 35 36 #include <sys/types.h> 37 38 #include <errno.h> 39 #include <stddef.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 43 #include <db.h> 44 #include "btree.h" 45 46 static int __bt_first (BTREE *, const DBT *, EPG *, int *); 47 static int __bt_seqadv (BTREE *, EPG *, int); 48 static int __bt_seqset (BTREE *, EPG *, DBT *, int); 49 50 /* 51 * Sequential scan support. 52 * 53 * The tree can be scanned sequentially, starting from either end of the 54 * tree or from any specific key. A scan request before any scanning is 55 * done is initialized as starting from the least node. 56 */ 57 58 /* 59 * __bt_seq -- 60 * Btree sequential scan interface. 61 * 62 * Parameters: 63 * dbp: pointer to access method 64 * key: key for positioning and return value 65 * data: data return value 66 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 67 * 68 * Returns: 69 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 70 */ 71 int 72 __bt_seq(dbp, key, data, flags) 73 const DB *dbp; 74 DBT *key, *data; 75 u_int flags; 76 { 77 BTREE *t; 78 EPG e; 79 int status; 80 81 t = dbp->internal; 82 83 /* Toss any page pinned across calls. */ 84 if (t->bt_pinned != NULL) { 85 mpool_put(t->bt_mp, t->bt_pinned, 0); 86 t->bt_pinned = NULL; 87 } 88 89 /* 90 * If scan unitialized as yet, or starting at a specific record, set 91 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 92 * the page the cursor references if they're successful. 93 */ 94 switch (flags) { 95 case R_NEXT: 96 case R_PREV: 97 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 98 status = __bt_seqadv(t, &e, flags); 99 break; 100 } 101 /* FALLTHROUGH */ 102 case R_FIRST: 103 case R_LAST: 104 case R_CURSOR: 105 status = __bt_seqset(t, &e, key, flags); 106 break; 107 default: 108 errno = EINVAL; 109 return (RET_ERROR); 110 } 111 112 if (status == RET_SUCCESS) { 113 __bt_setcur(t, e.page->pgno, e.index); 114 115 status = 116 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 117 118 /* 119 * If the user is doing concurrent access, we copied the 120 * key/data, toss the page. 121 */ 122 if (F_ISSET(t, B_DB_LOCK)) 123 mpool_put(t->bt_mp, e.page, 0); 124 else 125 t->bt_pinned = e.page; 126 } 127 return (status); 128 } 129 130 /* 131 * __bt_seqset -- 132 * Set the sequential scan to a specific key. 133 * 134 * Parameters: 135 * t: tree 136 * ep: storage for returned key 137 * key: key for initial scan position 138 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 139 * 140 * Side effects: 141 * Pins the page the cursor references. 142 * 143 * Returns: 144 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 145 */ 146 static int 147 __bt_seqset(t, ep, key, flags) 148 BTREE *t; 149 EPG *ep; 150 DBT *key; 151 int flags; 152 { 153 PAGE *h; 154 pgno_t pg; 155 int exact; 156 157 /* 158 * Find the first, last or specific key in the tree and point the 159 * cursor at it. The cursor may not be moved until a new key has 160 * been found. 161 */ 162 switch (flags) { 163 case R_CURSOR: /* Keyed scan. */ 164 /* 165 * Find the first instance of the key or the smallest key 166 * which is greater than or equal to the specified key. 167 */ 168 if (key->data == NULL || key->size == 0) { 169 errno = EINVAL; 170 return (RET_ERROR); 171 } 172 return (__bt_first(t, key, ep, &exact)); 173 case R_FIRST: /* First record. */ 174 case R_NEXT: 175 /* Walk down the left-hand side of the tree. */ 176 for (pg = P_ROOT;;) { 177 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 178 return (RET_ERROR); 179 180 /* Check for an empty tree. */ 181 if (NEXTINDEX(h) == 0) { 182 mpool_put(t->bt_mp, h, 0); 183 return (RET_SPECIAL); 184 } 185 186 if (h->flags & (P_BLEAF | P_RLEAF)) 187 break; 188 pg = GETBINTERNAL(h, 0)->pgno; 189 mpool_put(t->bt_mp, h, 0); 190 } 191 ep->page = h; 192 ep->index = 0; 193 break; 194 case R_LAST: /* Last record. */ 195 case R_PREV: 196 /* Walk down the right-hand side of the tree. */ 197 for (pg = P_ROOT;;) { 198 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 199 return (RET_ERROR); 200 201 /* Check for an empty tree. */ 202 if (NEXTINDEX(h) == 0) { 203 mpool_put(t->bt_mp, h, 0); 204 return (RET_SPECIAL); 205 } 206 207 if (h->flags & (P_BLEAF | P_RLEAF)) 208 break; 209 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 210 mpool_put(t->bt_mp, h, 0); 211 } 212 213 ep->page = h; 214 ep->index = NEXTINDEX(h) - 1; 215 break; 216 } 217 return (RET_SUCCESS); 218 } 219 220 /* 221 * __bt_seqadvance -- 222 * Advance the sequential scan. 223 * 224 * Parameters: 225 * t: tree 226 * flags: R_NEXT, R_PREV 227 * 228 * Side effects: 229 * Pins the page the new key/data record is on. 230 * 231 * Returns: 232 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 233 */ 234 static int 235 __bt_seqadv(t, ep, flags) 236 BTREE *t; 237 EPG *ep; 238 int flags; 239 { 240 CURSOR *c; 241 PAGE *h; 242 indx_t idx; 243 pgno_t pg; 244 int exact; 245 246 /* 247 * There are a couple of states that we can be in. The cursor has 248 * been initialized by the time we get here, but that's all we know. 249 */ 250 c = &t->bt_cursor; 251 252 /* 253 * The cursor was deleted where there weren't any duplicate records, 254 * so the key was saved. Find out where that key would go in the 255 * current tree. It doesn't matter if the returned key is an exact 256 * match or not -- if it's an exact match, the record was added after 257 * the delete so we can just return it. If not, as long as there's 258 * a record there, return it. 259 */ 260 if (F_ISSET(c, CURS_ACQUIRE)) 261 return (__bt_first(t, &c->key, ep, &exact)); 262 263 /* Get the page referenced by the cursor. */ 264 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 265 return (RET_ERROR); 266 267 /* 268 * Find the next/previous record in the tree and point the cursor at 269 * it. The cursor may not be moved until a new key has been found. 270 */ 271 switch (flags) { 272 case R_NEXT: /* Next record. */ 273 /* 274 * The cursor was deleted in duplicate records, and moved 275 * forward to a record that has yet to be returned. Clear 276 * that flag, and return the record. 277 */ 278 if (F_ISSET(c, CURS_AFTER)) 279 goto usecurrent; 280 idx = c->pg.index; 281 if (++idx == NEXTINDEX(h)) { 282 pg = h->nextpg; 283 mpool_put(t->bt_mp, h, 0); 284 if (pg == P_INVALID) 285 return (RET_SPECIAL); 286 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 287 return (RET_ERROR); 288 idx = 0; 289 } 290 break; 291 case R_PREV: /* Previous record. */ 292 /* 293 * The cursor was deleted in duplicate records, and moved 294 * backward to a record that has yet to be returned. Clear 295 * that flag, and return the record. 296 */ 297 if (F_ISSET(c, CURS_BEFORE)) { 298 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 299 ep->page = h; 300 ep->index = c->pg.index; 301 return (RET_SUCCESS); 302 } 303 idx = c->pg.index; 304 if (idx == 0) { 305 pg = h->prevpg; 306 mpool_put(t->bt_mp, h, 0); 307 if (pg == P_INVALID) 308 return (RET_SPECIAL); 309 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 310 return (RET_ERROR); 311 idx = NEXTINDEX(h) - 1; 312 } else 313 --idx; 314 break; 315 default: 316 return (RET_ERROR); 317 } 318 319 ep->page = h; 320 ep->index = idx; 321 return (RET_SUCCESS); 322 } 323 324 /* 325 * __bt_first -- 326 * Find the first entry. 327 * 328 * Parameters: 329 * t: the tree 330 * key: the key 331 * erval: return EPG 332 * exactp: pointer to exact match flag 333 * 334 * Returns: 335 * The first entry in the tree greater than or equal to key, 336 * or RET_SPECIAL if no such key exists. 337 */ 338 static int 339 __bt_first(t, key, erval, exactp) 340 BTREE *t; 341 const DBT *key; 342 EPG *erval; 343 int *exactp; 344 { 345 PAGE *h; 346 EPG *ep, save; 347 pgno_t pg; 348 349 /* 350 * Find any matching record; __bt_search pins the page. 351 * 352 * If it's an exact match and duplicates are possible, walk backwards 353 * in the tree until we find the first one. Otherwise, make sure it's 354 * a valid key (__bt_search may return an index just past the end of a 355 * page) and return it. 356 */ 357 if ((ep = __bt_search(t, key, exactp)) == NULL) 358 return (0); 359 if (*exactp) { 360 if (F_ISSET(t, B_NODUPS)) { 361 *erval = *ep; 362 return (RET_SUCCESS); 363 } 364 365 /* 366 * Walk backwards, as long as the entry matches and there are 367 * keys left in the tree. Save a copy of each match in case 368 * we go too far. 369 */ 370 save = *ep; 371 h = ep->page; 372 do { 373 if (save.page->pgno != ep->page->pgno) { 374 mpool_put(t->bt_mp, save.page, 0); 375 save = *ep; 376 } else 377 save.index = ep->index; 378 379 /* 380 * Don't unpin the page the last (or original) match 381 * was on, but make sure it's unpinned if an error 382 * occurs. 383 */ 384 if (ep->index == 0) { 385 if (h->prevpg == P_INVALID) 386 break; 387 if (h->pgno != save.page->pgno) 388 mpool_put(t->bt_mp, h, 0); 389 if ((h = mpool_get(t->bt_mp, 390 h->prevpg, 0)) == NULL) { 391 if (h->pgno == save.page->pgno) 392 mpool_put(t->bt_mp, 393 save.page, 0); 394 return (RET_ERROR); 395 } 396 ep->page = h; 397 ep->index = NEXTINDEX(h); 398 } 399 --ep->index; 400 } while (__bt_cmp(t, key, ep) == 0); 401 402 /* 403 * Reach here with the last page that was looked at pinned, 404 * which may or may not be the same as the last (or original) 405 * match page. If it's not useful, release it. 406 */ 407 if (h->pgno != save.page->pgno) 408 mpool_put(t->bt_mp, h, 0); 409 410 *erval = save; 411 return (RET_SUCCESS); 412 } 413 414 /* If at the end of a page, find the next entry. */ 415 if (ep->index == NEXTINDEX(ep->page)) { 416 h = ep->page; 417 pg = h->nextpg; 418 mpool_put(t->bt_mp, h, 0); 419 if (pg == P_INVALID) 420 return (RET_SPECIAL); 421 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 422 return (RET_ERROR); 423 ep->index = 0; 424 ep->page = h; 425 } 426 *erval = *ep; 427 return (RET_SUCCESS); 428 } 429 430 /* 431 * __bt_setcur -- 432 * Set the cursor to an entry in the tree. 433 * 434 * Parameters: 435 * t: the tree 436 * pgno: page number 437 * index: page index 438 */ 439 void 440 __bt_setcur(t, pgno, index) 441 BTREE *t; 442 pgno_t pgno; 443 u_int index; 444 { 445 /* Lose any already deleted key. */ 446 if (t->bt_cursor.key.data != NULL) { 447 free(t->bt_cursor.key.data); 448 t->bt_cursor.key.size = 0; 449 t->bt_cursor.key.data = NULL; 450 } 451 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 452 453 /* Update the cursor. */ 454 t->bt_cursor.pg.pgno = pgno; 455 t->bt_cursor.pg.index = index; 456 F_SET(&t->bt_cursor, CURS_INIT); 457 } 458