1 /* $NetBSD: bt_seq.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #if HAVE_NBTOOL_CONFIG_H 36 #include "nbtool_config.h" 37 #endif 38 39 #include <sys/cdefs.h> 40 __RCSID("$NetBSD: bt_seq.c,v 1.17 2008/09/11 12:58:00 joerg Exp $"); 41 42 #include "namespace.h" 43 #include <sys/types.h> 44 45 #include <assert.h> 46 #include <errno.h> 47 #include <stddef.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 51 #include <db.h> 52 #include "btree.h" 53 54 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 55 static int __bt_seqadv(BTREE *, EPG *, int); 56 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 57 58 /* 59 * Sequential scan support. 60 * 61 * The tree can be scanned sequentially, starting from either end of the 62 * tree or from any specific key. A scan request before any scanning is 63 * done is initialized as starting from the least node. 64 */ 65 66 /* 67 * __bt_seq -- 68 * Btree sequential scan interface. 69 * 70 * Parameters: 71 * dbp: pointer to access method 72 * key: key for positioning and return value 73 * data: data return value 74 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 75 * 76 * Returns: 77 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 78 */ 79 int 80 __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 81 { 82 BTREE *t; 83 EPG e; 84 int status; 85 86 t = dbp->internal; 87 88 /* Toss any page pinned across calls. */ 89 if (t->bt_pinned != NULL) { 90 mpool_put(t->bt_mp, t->bt_pinned, 0); 91 t->bt_pinned = NULL; 92 } 93 94 /* 95 * If scan unitialized as yet, or starting at a specific record, set 96 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 97 * the page the cursor references if they're successful. 98 */ 99 switch (flags) { 100 case R_NEXT: 101 case R_PREV: 102 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 103 status = __bt_seqadv(t, &e, (int)flags); 104 break; 105 } 106 /* FALLTHROUGH */ 107 case R_FIRST: 108 case R_LAST: 109 case R_CURSOR: 110 status = __bt_seqset(t, &e, key, (int)flags); 111 break; 112 default: 113 errno = EINVAL; 114 return (RET_ERROR); 115 } 116 117 if (status == RET_SUCCESS) { 118 __bt_setcur(t, e.page->pgno, (u_int)e.index); 119 120 status = 121 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 122 123 /* 124 * If the user is doing concurrent access, we copied the 125 * key/data, toss the page. 126 */ 127 if (F_ISSET(t, B_DB_LOCK)) 128 mpool_put(t->bt_mp, e.page, 0); 129 else 130 t->bt_pinned = e.page; 131 } 132 return (status); 133 } 134 135 /* 136 * __bt_seqset -- 137 * Set the sequential scan to a specific key. 138 * 139 * Parameters: 140 * t: tree 141 * ep: storage for returned key 142 * key: key for initial scan position 143 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 144 * 145 * Side effects: 146 * Pins the page the cursor references. 147 * 148 * Returns: 149 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 150 */ 151 static int 152 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 153 { 154 PAGE *h; 155 pgno_t pg; 156 int exact; 157 158 /* 159 * Find the first, last or specific key in the tree and point the 160 * cursor at it. The cursor may not be moved until a new key has 161 * been found. 162 */ 163 switch (flags) { 164 case R_CURSOR: /* Keyed scan. */ 165 /* 166 * Find the first instance of the key or the smallest key 167 * which is greater than or equal to the specified key. 168 */ 169 if (key->data == NULL || key->size == 0) { 170 errno = EINVAL; 171 return (RET_ERROR); 172 } 173 return (__bt_first(t, key, ep, &exact)); 174 case R_FIRST: /* First record. */ 175 case R_NEXT: 176 /* Walk down the left-hand side of the tree. */ 177 for (pg = P_ROOT;;) { 178 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 179 return (RET_ERROR); 180 181 /* Check for an empty tree. */ 182 if (NEXTINDEX(h) == 0) { 183 mpool_put(t->bt_mp, h, 0); 184 return (RET_SPECIAL); 185 } 186 187 if (h->flags & (P_BLEAF | P_RLEAF)) 188 break; 189 pg = GETBINTERNAL(h, 0)->pgno; 190 mpool_put(t->bt_mp, h, 0); 191 } 192 ep->page = h; 193 ep->index = 0; 194 break; 195 case R_LAST: /* Last record. */ 196 case R_PREV: 197 /* Walk down the right-hand side of the tree. */ 198 for (pg = P_ROOT;;) { 199 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 200 return (RET_ERROR); 201 202 /* Check for an empty tree. */ 203 if (NEXTINDEX(h) == 0) { 204 mpool_put(t->bt_mp, h, 0); 205 return (RET_SPECIAL); 206 } 207 208 if (h->flags & (P_BLEAF | P_RLEAF)) 209 break; 210 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 211 mpool_put(t->bt_mp, h, 0); 212 } 213 214 ep->page = h; 215 ep->index = NEXTINDEX(h) - 1; 216 break; 217 } 218 return (RET_SUCCESS); 219 } 220 221 /* 222 * __bt_seqadvance -- 223 * Advance the sequential scan. 224 * 225 * Parameters: 226 * t: tree 227 * flags: R_NEXT, R_PREV 228 * 229 * Side effects: 230 * Pins the page the new key/data record is on. 231 * 232 * Returns: 233 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 234 */ 235 static int 236 __bt_seqadv(BTREE *t, EPG *ep, int flags) 237 { 238 CURSOR *c; 239 PAGE *h; 240 indx_t idx = 0; /* pacify gcc */ 241 pgno_t pg; 242 int exact; 243 244 /* 245 * There are a couple of states that we can be in. The cursor has 246 * been initialized by the time we get here, but that's all we know. 247 */ 248 c = &t->bt_cursor; 249 250 /* 251 * The cursor was deleted where there weren't any duplicate records, 252 * so the key was saved. Find out where that key would go in the 253 * current tree. It doesn't matter if the returned key is an exact 254 * match or not -- if it's an exact match, the record was added after 255 * the delete so we can just return it. If not, as long as there's 256 * a record there, return it. 257 */ 258 if (F_ISSET(c, CURS_ACQUIRE)) 259 return (__bt_first(t, &c->key, ep, &exact)); 260 261 /* Get the page referenced by the cursor. */ 262 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 263 return (RET_ERROR); 264 265 /* 266 * Find the next/previous record in the tree and point the cursor at 267 * it. The cursor may not be moved until a new key has been found. 268 */ 269 switch (flags) { 270 case R_NEXT: /* Next record. */ 271 /* 272 * The cursor was deleted in duplicate records, and moved 273 * forward to a record that has yet to be returned. Clear 274 * that flag, and return the record. 275 */ 276 if (F_ISSET(c, CURS_AFTER)) 277 goto usecurrent; 278 idx = c->pg.index; 279 if (++idx == NEXTINDEX(h)) { 280 pg = h->nextpg; 281 mpool_put(t->bt_mp, h, 0); 282 if (pg == P_INVALID) 283 return (RET_SPECIAL); 284 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 285 return (RET_ERROR); 286 idx = 0; 287 } 288 break; 289 case R_PREV: /* Previous record. */ 290 /* 291 * The cursor was deleted in duplicate records, and moved 292 * backward to a record that has yet to be returned. Clear 293 * that flag, and return the record. 294 */ 295 if (F_ISSET(c, CURS_BEFORE)) { 296 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 297 ep->page = h; 298 ep->index = c->pg.index; 299 return (RET_SUCCESS); 300 } 301 idx = c->pg.index; 302 if (idx == 0) { 303 pg = h->prevpg; 304 mpool_put(t->bt_mp, h, 0); 305 if (pg == P_INVALID) 306 return (RET_SPECIAL); 307 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 308 return (RET_ERROR); 309 idx = NEXTINDEX(h) - 1; 310 } else 311 --idx; 312 break; 313 } 314 315 ep->page = h; 316 ep->index = idx; 317 return (RET_SUCCESS); 318 } 319 320 /* 321 * __bt_first -- 322 * Find the first entry. 323 * 324 * Parameters: 325 * t: the tree 326 * key: the key 327 * erval: return EPG 328 * exactp: pointer to exact match flag 329 * 330 * Returns: 331 * The first entry in the tree greater than or equal to key, 332 * or RET_SPECIAL if no such key exists. 333 */ 334 static int 335 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 336 { 337 PAGE *h; 338 EPG *ep, save; 339 pgno_t pg; 340 341 /* 342 * Find any matching record; __bt_search pins the page. 343 * 344 * If it's an exact match and duplicates are possible, walk backwards 345 * in the tree until we find the first one. Otherwise, make sure it's 346 * a valid key (__bt_search may return an index just past the end of a 347 * page) and return it. 348 */ 349 if ((ep = __bt_search(t, key, exactp)) == NULL) 350 return (0); 351 if (*exactp) { 352 if (F_ISSET(t, B_NODUPS)) { 353 *erval = *ep; 354 return (RET_SUCCESS); 355 } 356 357 /* 358 * Walk backwards, as long as the entry matches and there are 359 * keys left in the tree. Save a copy of each match in case 360 * we go too far. 361 */ 362 save = *ep; 363 h = ep->page; 364 do { 365 if (save.page->pgno != ep->page->pgno) { 366 mpool_put(t->bt_mp, save.page, 0); 367 save = *ep; 368 } else 369 save.index = ep->index; 370 371 /* 372 * Don't unpin the page the last (or original) match 373 * was on, but make sure it's unpinned if an error 374 * occurs. 375 */ 376 if (ep->index == 0) { 377 if (h->prevpg == P_INVALID) 378 break; 379 if (h->pgno != save.page->pgno) 380 mpool_put(t->bt_mp, h, 0); 381 if ((h = mpool_get(t->bt_mp, 382 h->prevpg, 0)) == NULL) 383 return (RET_ERROR); 384 ep->page = h; 385 ep->index = NEXTINDEX(h); 386 } 387 --ep->index; 388 } while (__bt_cmp(t, key, ep) == 0); 389 390 /* 391 * Reach here with the last page that was looked at pinned, 392 * which may or may not be the same as the last (or original) 393 * match page. If it's not useful, release it. 394 */ 395 if (h->pgno != save.page->pgno) 396 mpool_put(t->bt_mp, h, 0); 397 398 *erval = save; 399 return (RET_SUCCESS); 400 } 401 402 /* If at the end of a page, find the next entry. */ 403 if (ep->index == NEXTINDEX(ep->page)) { 404 h = ep->page; 405 pg = h->nextpg; 406 mpool_put(t->bt_mp, h, 0); 407 if (pg == P_INVALID) 408 return (RET_SPECIAL); 409 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 410 return (RET_ERROR); 411 ep->index = 0; 412 ep->page = h; 413 } 414 *erval = *ep; 415 return (RET_SUCCESS); 416 } 417 418 /* 419 * __bt_setcur -- 420 * Set the cursor to an entry in the tree. 421 * 422 * Parameters: 423 * t: the tree 424 * pgno: page number 425 * idx: page index 426 */ 427 void 428 __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 429 { 430 /* Lose any already deleted key. */ 431 if (t->bt_cursor.key.data != NULL) { 432 free(t->bt_cursor.key.data); 433 t->bt_cursor.key.size = 0; 434 t->bt_cursor.key.data = NULL; 435 } 436 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 437 438 /* Update the cursor. */ 439 t->bt_cursor.pg.pgno = pgno; 440 t->bt_cursor.pg.index = idx; 441 F_SET(&t->bt_cursor, CURS_INIT); 442 } 443