1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)bt_seq.c 8.7 (Berkeley) 7/20/94 33 * $FreeBSD: head/lib/libc/db/btree/bt_seq.c 189327 2009-03-04 00:58:04Z delphij $ 34 */ 35 36 #include <sys/types.h> 37 38 #include <errno.h> 39 #include <stddef.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 43 #include <db.h> 44 #include "btree.h" 45 46 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 47 static int __bt_seqadv(BTREE *, EPG *, int); 48 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 49 50 /* 51 * Sequential scan support. 52 * 53 * The tree can be scanned sequentially, starting from either end of the 54 * tree or from any specific key. A scan request before any scanning is 55 * done is initialized as starting from the least node. 56 */ 57 58 /* 59 * __bt_seq -- 60 * Btree sequential scan interface. 61 * 62 * Parameters: 63 * dbp: pointer to access method 64 * key: key for positioning and return value 65 * data: data return value 66 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 67 * 68 * Returns: 69 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 70 */ 71 int 72 __bt_seq(const DB *dbp, DBT *key, DBT *data, unsigned int flags) 73 { 74 BTREE *t; 75 EPG e; 76 int status; 77 78 t = dbp->internal; 79 80 /* Toss any page pinned across calls. */ 81 if (t->bt_pinned != NULL) { 82 mpool_put(t->bt_mp, t->bt_pinned, 0); 83 t->bt_pinned = NULL; 84 } 85 86 /* 87 * If scan uninitialized as yet, or starting at a specific record, set 88 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 89 * the page the cursor references if they're successful. 90 */ 91 switch (flags) { 92 case R_NEXT: 93 case R_PREV: 94 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 95 status = __bt_seqadv(t, &e, flags); 96 break; 97 } 98 /* FALLTHROUGH */ 99 case R_FIRST: 100 case R_LAST: 101 case R_CURSOR: 102 status = __bt_seqset(t, &e, key, flags); 103 break; 104 default: 105 errno = EINVAL; 106 return (RET_ERROR); 107 } 108 109 if (status == RET_SUCCESS) { 110 __bt_setcur(t, e.page->pgno, e.index); 111 112 status = 113 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 114 115 /* 116 * If the user is doing concurrent access, we copied the 117 * key/data, toss the page. 118 */ 119 if (F_ISSET(t, B_DB_LOCK)) 120 mpool_put(t->bt_mp, e.page, 0); 121 else 122 t->bt_pinned = e.page; 123 } 124 return (status); 125 } 126 127 /* 128 * __bt_seqset -- 129 * Set the sequential scan to a specific key. 130 * 131 * Parameters: 132 * t: tree 133 * ep: storage for returned key 134 * key: key for initial scan position 135 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 136 * 137 * Side effects: 138 * Pins the page the cursor references. 139 * 140 * Returns: 141 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 142 */ 143 static int 144 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 145 { 146 PAGE *h; 147 pgno_t pg; 148 int exact; 149 150 /* 151 * Find the first, last or specific key in the tree and point the 152 * cursor at it. The cursor may not be moved until a new key has 153 * been found. 154 */ 155 switch (flags) { 156 case R_CURSOR: /* Keyed scan. */ 157 /* 158 * Find the first instance of the key or the smallest key 159 * which is greater than or equal to the specified key. 160 */ 161 if (key->data == NULL || key->size == 0) { 162 errno = EINVAL; 163 return (RET_ERROR); 164 } 165 return (__bt_first(t, key, ep, &exact)); 166 case R_FIRST: /* First record. */ 167 case R_NEXT: 168 /* Walk down the left-hand side of the tree. */ 169 for (pg = P_ROOT;;) { 170 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 171 return (RET_ERROR); 172 173 /* Check for an empty tree. */ 174 if (NEXTINDEX(h) == 0) { 175 mpool_put(t->bt_mp, h, 0); 176 return (RET_SPECIAL); 177 } 178 179 if (h->flags & (P_BLEAF | P_RLEAF)) 180 break; 181 pg = GETBINTERNAL(h, 0)->pgno; 182 mpool_put(t->bt_mp, h, 0); 183 } 184 ep->page = h; 185 ep->index = 0; 186 break; 187 case R_LAST: /* Last record. */ 188 case R_PREV: 189 /* Walk down the right-hand side of the tree. */ 190 for (pg = P_ROOT;;) { 191 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 192 return (RET_ERROR); 193 194 /* Check for an empty tree. */ 195 if (NEXTINDEX(h) == 0) { 196 mpool_put(t->bt_mp, h, 0); 197 return (RET_SPECIAL); 198 } 199 200 if (h->flags & (P_BLEAF | P_RLEAF)) 201 break; 202 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 203 mpool_put(t->bt_mp, h, 0); 204 } 205 206 ep->page = h; 207 ep->index = NEXTINDEX(h) - 1; 208 break; 209 } 210 return (RET_SUCCESS); 211 } 212 213 /* 214 * __bt_seqadvance -- 215 * Advance the sequential scan. 216 * 217 * Parameters: 218 * t: tree 219 * flags: R_NEXT, R_PREV 220 * 221 * Side effects: 222 * Pins the page the new key/data record is on. 223 * 224 * Returns: 225 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 226 */ 227 static int 228 __bt_seqadv(BTREE *t, EPG *ep, int flags) 229 { 230 CURSOR *c; 231 PAGE *h; 232 indx_t idx; 233 pgno_t pg; 234 int exact; 235 236 /* 237 * There are a couple of states that we can be in. The cursor has 238 * been initialized by the time we get here, but that's all we know. 239 */ 240 c = &t->bt_cursor; 241 242 /* 243 * The cursor was deleted where there weren't any duplicate records, 244 * so the key was saved. Find out where that key would go in the 245 * current tree. It doesn't matter if the returned key is an exact 246 * match or not -- if it's an exact match, the record was added after 247 * the delete so we can just return it. If not, as long as there's 248 * a record there, return it. 249 */ 250 if (F_ISSET(c, CURS_ACQUIRE)) 251 return (__bt_first(t, &c->key, ep, &exact)); 252 253 /* Get the page referenced by the cursor. */ 254 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 255 return (RET_ERROR); 256 257 /* 258 * Find the next/previous record in the tree and point the cursor at 259 * it. The cursor may not be moved until a new key has been found. 260 */ 261 switch (flags) { 262 case R_NEXT: /* Next record. */ 263 /* 264 * The cursor was deleted in duplicate records, and moved 265 * forward to a record that has yet to be returned. Clear 266 * that flag, and return the record. 267 */ 268 if (F_ISSET(c, CURS_AFTER)) 269 goto usecurrent; 270 idx = c->pg.index; 271 if (++idx == NEXTINDEX(h)) { 272 pg = h->nextpg; 273 mpool_put(t->bt_mp, h, 0); 274 if (pg == P_INVALID) 275 return (RET_SPECIAL); 276 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 277 return (RET_ERROR); 278 idx = 0; 279 } 280 break; 281 case R_PREV: /* Previous record. */ 282 /* 283 * The cursor was deleted in duplicate records, and moved 284 * backward to a record that has yet to be returned. Clear 285 * that flag, and return the record. 286 */ 287 if (F_ISSET(c, CURS_BEFORE)) { 288 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 289 ep->page = h; 290 ep->index = c->pg.index; 291 return (RET_SUCCESS); 292 } 293 idx = c->pg.index; 294 if (idx == 0) { 295 pg = h->prevpg; 296 mpool_put(t->bt_mp, h, 0); 297 if (pg == P_INVALID) 298 return (RET_SPECIAL); 299 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 300 return (RET_ERROR); 301 idx = NEXTINDEX(h) - 1; 302 } else 303 --idx; 304 break; 305 } 306 307 ep->page = h; 308 ep->index = idx; 309 return (RET_SUCCESS); 310 } 311 312 /* 313 * __bt_first -- 314 * Find the first entry. 315 * 316 * Parameters: 317 * t: the tree 318 * key: the key 319 * erval: return EPG 320 * exactp: pointer to exact match flag 321 * 322 * Returns: 323 * The first entry in the tree greater than or equal to key, 324 * or RET_SPECIAL if no such key exists. 325 */ 326 static int 327 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 328 { 329 PAGE *h; 330 EPG *ep, save; 331 pgno_t pg; 332 333 /* 334 * Find any matching record; __bt_search pins the page. 335 * 336 * If it's an exact match and duplicates are possible, walk backwards 337 * in the tree until we find the first one. Otherwise, make sure it's 338 * a valid key (__bt_search may return an index just past the end of a 339 * page) and return it. 340 */ 341 if ((ep = __bt_search(t, key, exactp)) == NULL) 342 return (0); 343 if (*exactp) { 344 if (F_ISSET(t, B_NODUPS)) { 345 *erval = *ep; 346 return (RET_SUCCESS); 347 } 348 349 /* 350 * Walk backwards, as long as the entry matches and there are 351 * keys left in the tree. Save a copy of each match in case 352 * we go too far. 353 */ 354 save = *ep; 355 h = ep->page; 356 do { 357 if (save.page->pgno != ep->page->pgno) { 358 mpool_put(t->bt_mp, save.page, 0); 359 save = *ep; 360 } else 361 save.index = ep->index; 362 363 /* 364 * Don't unpin the page the last (or original) match 365 * was on, but make sure it's unpinned if an error 366 * occurs. 367 */ 368 if (ep->index == 0) { 369 if (h->prevpg == P_INVALID) 370 break; 371 if (h->pgno != save.page->pgno) 372 mpool_put(t->bt_mp, h, 0); 373 if ((h = mpool_get(t->bt_mp, 374 h->prevpg, 0)) == NULL) { 375 if (h->pgno == save.page->pgno) 376 mpool_put(t->bt_mp, 377 save.page, 0); 378 return (RET_ERROR); 379 } 380 ep->page = h; 381 ep->index = NEXTINDEX(h); 382 } 383 --ep->index; 384 } while (__bt_cmp(t, key, ep) == 0); 385 386 /* 387 * Reach here with the last page that was looked at pinned, 388 * which may or may not be the same as the last (or original) 389 * match page. If it's not useful, release it. 390 */ 391 if (h->pgno != save.page->pgno) 392 mpool_put(t->bt_mp, h, 0); 393 394 *erval = save; 395 return (RET_SUCCESS); 396 } 397 398 /* If at the end of a page, find the next entry. */ 399 if (ep->index == NEXTINDEX(ep->page)) { 400 h = ep->page; 401 pg = h->nextpg; 402 mpool_put(t->bt_mp, h, 0); 403 if (pg == P_INVALID) 404 return (RET_SPECIAL); 405 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 406 return (RET_ERROR); 407 ep->index = 0; 408 ep->page = h; 409 } 410 *erval = *ep; 411 return (RET_SUCCESS); 412 } 413 414 /* 415 * __bt_setcur -- 416 * Set the cursor to an entry in the tree. 417 * 418 * Parameters: 419 * t: the tree 420 * pgno: page number 421 * idx: page index 422 */ 423 void 424 __bt_setcur(BTREE *t, pgno_t pgno, unsigned int idx) 425 { 426 /* Lose any already deleted key. */ 427 if (t->bt_cursor.key.data != NULL) { 428 free(t->bt_cursor.key.data); 429 t->bt_cursor.key.size = 0; 430 t->bt_cursor.key.data = NULL; 431 } 432 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 433 434 /* Update the cursor. */ 435 t->bt_cursor.pg.pgno = pgno; 436 t->bt_cursor.pg.index = idx; 437 F_SET(&t->bt_cursor, CURS_INIT); 438 } 439