1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bt_seq.c 8.7 (Berkeley) 7/20/94 37 * $DragonFly: src/lib/libc/db/btree/bt_seq.c,v 1.5 2005/03/16 06:27:17 joerg Exp $ 38 */ 39 40 #include <sys/types.h> 41 42 #include <errno.h> 43 #include <stddef.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 47 #include <db.h> 48 #include "btree.h" 49 50 static int __bt_first (BTREE *, const DBT *, EPG *, int *); 51 static int __bt_seqadv (BTREE *, EPG *, int); 52 static int __bt_seqset (BTREE *, EPG *, DBT *, int); 53 54 /* 55 * Sequential scan support. 56 * 57 * The tree can be scanned sequentially, starting from either end of the 58 * tree or from any specific key. A scan request before any scanning is 59 * done is initialized as starting from the least node. 60 */ 61 62 /* 63 * __bt_seq -- 64 * Btree sequential scan interface. 65 * 66 * Parameters: 67 * dbp: pointer to access method 68 * key: key for positioning and return value 69 * data: data return value 70 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 71 * 72 * Returns: 73 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 74 */ 75 int 76 __bt_seq(dbp, key, data, flags) 77 const DB *dbp; 78 DBT *key, *data; 79 u_int flags; 80 { 81 BTREE *t; 82 EPG e; 83 int status; 84 85 t = dbp->internal; 86 87 /* Toss any page pinned across calls. */ 88 if (t->bt_pinned != NULL) { 89 mpool_put(t->bt_mp, t->bt_pinned, 0); 90 t->bt_pinned = NULL; 91 } 92 93 /* 94 * If scan unitialized as yet, or starting at a specific record, set 95 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 96 * the page the cursor references if they're successful. 97 */ 98 switch (flags) { 99 case R_NEXT: 100 case R_PREV: 101 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 102 status = __bt_seqadv(t, &e, flags); 103 break; 104 } 105 /* FALLTHROUGH */ 106 case R_FIRST: 107 case R_LAST: 108 case R_CURSOR: 109 status = __bt_seqset(t, &e, key, flags); 110 break; 111 default: 112 errno = EINVAL; 113 return (RET_ERROR); 114 } 115 116 if (status == RET_SUCCESS) { 117 __bt_setcur(t, e.page->pgno, e.index); 118 119 status = 120 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 121 122 /* 123 * If the user is doing concurrent access, we copied the 124 * key/data, toss the page. 125 */ 126 if (F_ISSET(t, B_DB_LOCK)) 127 mpool_put(t->bt_mp, e.page, 0); 128 else 129 t->bt_pinned = e.page; 130 } 131 return (status); 132 } 133 134 /* 135 * __bt_seqset -- 136 * Set the sequential scan to a specific key. 137 * 138 * Parameters: 139 * t: tree 140 * ep: storage for returned key 141 * key: key for initial scan position 142 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 143 * 144 * Side effects: 145 * Pins the page the cursor references. 146 * 147 * Returns: 148 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 149 */ 150 static int 151 __bt_seqset(t, ep, key, flags) 152 BTREE *t; 153 EPG *ep; 154 DBT *key; 155 int flags; 156 { 157 PAGE *h; 158 pgno_t pg; 159 int exact; 160 161 /* 162 * Find the first, last or specific key in the tree and point the 163 * cursor at it. The cursor may not be moved until a new key has 164 * been found. 165 */ 166 switch (flags) { 167 case R_CURSOR: /* Keyed scan. */ 168 /* 169 * Find the first instance of the key or the smallest key 170 * which is greater than or equal to the specified key. 171 */ 172 if (key->data == NULL || key->size == 0) { 173 errno = EINVAL; 174 return (RET_ERROR); 175 } 176 return (__bt_first(t, key, ep, &exact)); 177 case R_FIRST: /* First record. */ 178 case R_NEXT: 179 /* Walk down the left-hand side of the tree. */ 180 for (pg = P_ROOT;;) { 181 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 182 return (RET_ERROR); 183 184 /* Check for an empty tree. */ 185 if (NEXTINDEX(h) == 0) { 186 mpool_put(t->bt_mp, h, 0); 187 return (RET_SPECIAL); 188 } 189 190 if (h->flags & (P_BLEAF | P_RLEAF)) 191 break; 192 pg = GETBINTERNAL(h, 0)->pgno; 193 mpool_put(t->bt_mp, h, 0); 194 } 195 ep->page = h; 196 ep->index = 0; 197 break; 198 case R_LAST: /* Last record. */ 199 case R_PREV: 200 /* Walk down the right-hand side of the tree. */ 201 for (pg = P_ROOT;;) { 202 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 203 return (RET_ERROR); 204 205 /* Check for an empty tree. */ 206 if (NEXTINDEX(h) == 0) { 207 mpool_put(t->bt_mp, h, 0); 208 return (RET_SPECIAL); 209 } 210 211 if (h->flags & (P_BLEAF | P_RLEAF)) 212 break; 213 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 214 mpool_put(t->bt_mp, h, 0); 215 } 216 217 ep->page = h; 218 ep->index = NEXTINDEX(h) - 1; 219 break; 220 } 221 return (RET_SUCCESS); 222 } 223 224 /* 225 * __bt_seqadvance -- 226 * Advance the sequential scan. 227 * 228 * Parameters: 229 * t: tree 230 * flags: R_NEXT, R_PREV 231 * 232 * Side effects: 233 * Pins the page the new key/data record is on. 234 * 235 * Returns: 236 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 237 */ 238 static int 239 __bt_seqadv(t, ep, flags) 240 BTREE *t; 241 EPG *ep; 242 int flags; 243 { 244 CURSOR *c; 245 PAGE *h; 246 indx_t idx; 247 pgno_t pg; 248 int exact; 249 250 /* 251 * There are a couple of states that we can be in. The cursor has 252 * been initialized by the time we get here, but that's all we know. 253 */ 254 c = &t->bt_cursor; 255 256 /* 257 * The cursor was deleted where there weren't any duplicate records, 258 * so the key was saved. Find out where that key would go in the 259 * current tree. It doesn't matter if the returned key is an exact 260 * match or not -- if it's an exact match, the record was added after 261 * the delete so we can just return it. If not, as long as there's 262 * a record there, return it. 263 */ 264 if (F_ISSET(c, CURS_ACQUIRE)) 265 return (__bt_first(t, &c->key, ep, &exact)); 266 267 /* Get the page referenced by the cursor. */ 268 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 269 return (RET_ERROR); 270 271 /* 272 * Find the next/previous record in the tree and point the cursor at 273 * it. The cursor may not be moved until a new key has been found. 274 */ 275 switch (flags) { 276 case R_NEXT: /* Next record. */ 277 /* 278 * The cursor was deleted in duplicate records, and moved 279 * forward to a record that has yet to be returned. Clear 280 * that flag, and return the record. 281 */ 282 if (F_ISSET(c, CURS_AFTER)) 283 goto usecurrent; 284 idx = c->pg.index; 285 if (++idx == NEXTINDEX(h)) { 286 pg = h->nextpg; 287 mpool_put(t->bt_mp, h, 0); 288 if (pg == P_INVALID) 289 return (RET_SPECIAL); 290 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 291 return (RET_ERROR); 292 idx = 0; 293 } 294 break; 295 case R_PREV: /* Previous record. */ 296 /* 297 * The cursor was deleted in duplicate records, and moved 298 * backward to a record that has yet to be returned. Clear 299 * that flag, and return the record. 300 */ 301 if (F_ISSET(c, CURS_BEFORE)) { 302 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 303 ep->page = h; 304 ep->index = c->pg.index; 305 return (RET_SUCCESS); 306 } 307 idx = c->pg.index; 308 if (idx == 0) { 309 pg = h->prevpg; 310 mpool_put(t->bt_mp, h, 0); 311 if (pg == P_INVALID) 312 return (RET_SPECIAL); 313 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 314 return (RET_ERROR); 315 idx = NEXTINDEX(h) - 1; 316 } else 317 --idx; 318 break; 319 default: 320 return (RET_ERROR); 321 } 322 323 ep->page = h; 324 ep->index = idx; 325 return (RET_SUCCESS); 326 } 327 328 /* 329 * __bt_first -- 330 * Find the first entry. 331 * 332 * Parameters: 333 * t: the tree 334 * key: the key 335 * erval: return EPG 336 * exactp: pointer to exact match flag 337 * 338 * Returns: 339 * The first entry in the tree greater than or equal to key, 340 * or RET_SPECIAL if no such key exists. 341 */ 342 static int 343 __bt_first(t, key, erval, exactp) 344 BTREE *t; 345 const DBT *key; 346 EPG *erval; 347 int *exactp; 348 { 349 PAGE *h; 350 EPG *ep, save; 351 pgno_t pg; 352 353 /* 354 * Find any matching record; __bt_search pins the page. 355 * 356 * If it's an exact match and duplicates are possible, walk backwards 357 * in the tree until we find the first one. Otherwise, make sure it's 358 * a valid key (__bt_search may return an index just past the end of a 359 * page) and return it. 360 */ 361 if ((ep = __bt_search(t, key, exactp)) == NULL) 362 return (0); 363 if (*exactp) { 364 if (F_ISSET(t, B_NODUPS)) { 365 *erval = *ep; 366 return (RET_SUCCESS); 367 } 368 369 /* 370 * Walk backwards, as long as the entry matches and there are 371 * keys left in the tree. Save a copy of each match in case 372 * we go too far. 373 */ 374 save = *ep; 375 h = ep->page; 376 do { 377 if (save.page->pgno != ep->page->pgno) { 378 mpool_put(t->bt_mp, save.page, 0); 379 save = *ep; 380 } else 381 save.index = ep->index; 382 383 /* 384 * Don't unpin the page the last (or original) match 385 * was on, but make sure it's unpinned if an error 386 * occurs. 387 */ 388 if (ep->index == 0) { 389 if (h->prevpg == P_INVALID) 390 break; 391 if (h->pgno != save.page->pgno) 392 mpool_put(t->bt_mp, h, 0); 393 if ((h = mpool_get(t->bt_mp, 394 h->prevpg, 0)) == NULL) { 395 if (h->pgno == save.page->pgno) 396 mpool_put(t->bt_mp, 397 save.page, 0); 398 return (RET_ERROR); 399 } 400 ep->page = h; 401 ep->index = NEXTINDEX(h); 402 } 403 --ep->index; 404 } while (__bt_cmp(t, key, ep) == 0); 405 406 /* 407 * Reach here with the last page that was looked at pinned, 408 * which may or may not be the same as the last (or original) 409 * match page. If it's not useful, release it. 410 */ 411 if (h->pgno != save.page->pgno) 412 mpool_put(t->bt_mp, h, 0); 413 414 *erval = save; 415 return (RET_SUCCESS); 416 } 417 418 /* If at the end of a page, find the next entry. */ 419 if (ep->index == NEXTINDEX(ep->page)) { 420 h = ep->page; 421 pg = h->nextpg; 422 mpool_put(t->bt_mp, h, 0); 423 if (pg == P_INVALID) 424 return (RET_SPECIAL); 425 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 426 return (RET_ERROR); 427 ep->index = 0; 428 ep->page = h; 429 } 430 *erval = *ep; 431 return (RET_SUCCESS); 432 } 433 434 /* 435 * __bt_setcur -- 436 * Set the cursor to an entry in the tree. 437 * 438 * Parameters: 439 * t: the tree 440 * pgno: page number 441 * index: page index 442 */ 443 void 444 __bt_setcur(t, pgno, index) 445 BTREE *t; 446 pgno_t pgno; 447 u_int index; 448 { 449 /* Lose any already deleted key. */ 450 if (t->bt_cursor.key.data != NULL) { 451 free(t->bt_cursor.key.data); 452 t->bt_cursor.key.size = 0; 453 t->bt_cursor.key.data = NULL; 454 } 455 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 456 457 /* Update the cursor. */ 458 t->bt_cursor.pg.pgno = pgno; 459 t->bt_cursor.pg.index = index; 460 F_SET(&t->bt_cursor, CURS_INIT); 461 } 462