xref: /dragonfly/lib/libc/db/btree/bt_seq.c (revision 2cd2d2b5)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Mike Olson.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)bt_seq.c	8.7 (Berkeley) 7/20/94
37  * $DragonFly: src/lib/libc/db/btree/bt_seq.c,v 1.4 2003/11/12 20:21:22 eirikn Exp $
38  */
39 
40 #include <sys/types.h>
41 
42 #include <errno.h>
43 #include <stddef.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 
47 #include <db.h>
48 #include "btree.h"
49 
50 static int __bt_first (BTREE *, const DBT *, EPG *, int *);
51 static int __bt_seqadv (BTREE *, EPG *, int);
52 static int __bt_seqset (BTREE *, EPG *, DBT *, int);
53 
54 /*
55  * Sequential scan support.
56  *
57  * The tree can be scanned sequentially, starting from either end of the
58  * tree or from any specific key.  A scan request before any scanning is
59  * done is initialized as starting from the least node.
60  */
61 
62 /*
63  * __bt_seq --
64  *	Btree sequential scan interface.
65  *
66  * Parameters:
67  *	dbp:	pointer to access method
68  *	key:	key for positioning and return value
69  *	data:	data return value
70  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
71  *
72  * Returns:
73  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
74  */
75 int
76 __bt_seq(dbp, key, data, flags)
77 	const DB *dbp;
78 	DBT *key, *data;
79 	u_int flags;
80 {
81 	BTREE *t;
82 	EPG e;
83 	int status;
84 
85 	t = dbp->internal;
86 
87 	/* Toss any page pinned across calls. */
88 	if (t->bt_pinned != NULL) {
89 		mpool_put(t->bt_mp, t->bt_pinned, 0);
90 		t->bt_pinned = NULL;
91 	}
92 
93 	/*
94 	 * If scan unitialized as yet, or starting at a specific record, set
95 	 * the scan to a specific key.  Both __bt_seqset and __bt_seqadv pin
96 	 * the page the cursor references if they're successful.
97 	 */
98 	switch (flags) {
99 	case R_NEXT:
100 	case R_PREV:
101 		if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
102 			status = __bt_seqadv(t, &e, flags);
103 			break;
104 		}
105 		/* FALLTHROUGH */
106 	case R_FIRST:
107 	case R_LAST:
108 	case R_CURSOR:
109 		status = __bt_seqset(t, &e, key, flags);
110 		break;
111 	default:
112 		errno = EINVAL;
113 		return (RET_ERROR);
114 	}
115 
116 	if (status == RET_SUCCESS) {
117 		__bt_setcur(t, e.page->pgno, e.index);
118 
119 		status =
120 		    __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
121 
122 		/*
123 		 * If the user is doing concurrent access, we copied the
124 		 * key/data, toss the page.
125 		 */
126 		if (F_ISSET(t, B_DB_LOCK))
127 			mpool_put(t->bt_mp, e.page, 0);
128 		else
129 			t->bt_pinned = e.page;
130 	}
131 	return (status);
132 }
133 
134 /*
135  * __bt_seqset --
136  *	Set the sequential scan to a specific key.
137  *
138  * Parameters:
139  *	t:	tree
140  *	ep:	storage for returned key
141  *	key:	key for initial scan position
142  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
143  *
144  * Side effects:
145  *	Pins the page the cursor references.
146  *
147  * Returns:
148  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
149  */
150 static int
151 __bt_seqset(t, ep, key, flags)
152 	BTREE *t;
153 	EPG *ep;
154 	DBT *key;
155 	int flags;
156 {
157 	PAGE *h;
158 	pgno_t pg;
159 	int exact;
160 
161 	/*
162 	 * Find the first, last or specific key in the tree and point the
163 	 * cursor at it.  The cursor may not be moved until a new key has
164 	 * been found.
165 	 */
166 	switch (flags) {
167 	case R_CURSOR:				/* Keyed scan. */
168 		/*
169 		 * Find the first instance of the key or the smallest key
170 		 * which is greater than or equal to the specified key.
171 		 */
172 		if (key->data == NULL || key->size == 0) {
173 			errno = EINVAL;
174 			return (RET_ERROR);
175 		}
176 		return (__bt_first(t, key, ep, &exact));
177 	case R_FIRST:				/* First record. */
178 	case R_NEXT:
179 		/* Walk down the left-hand side of the tree. */
180 		for (pg = P_ROOT;;) {
181 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
182 				return (RET_ERROR);
183 
184 			/* Check for an empty tree. */
185 			if (NEXTINDEX(h) == 0) {
186 				mpool_put(t->bt_mp, h, 0);
187 				return (RET_SPECIAL);
188 			}
189 
190 			if (h->flags & (P_BLEAF | P_RLEAF))
191 				break;
192 			pg = GETBINTERNAL(h, 0)->pgno;
193 			mpool_put(t->bt_mp, h, 0);
194 		}
195 		ep->page = h;
196 		ep->index = 0;
197 		break;
198 	case R_LAST:				/* Last record. */
199 	case R_PREV:
200 		/* Walk down the right-hand side of the tree. */
201 		for (pg = P_ROOT;;) {
202 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
203 				return (RET_ERROR);
204 
205 			/* Check for an empty tree. */
206 			if (NEXTINDEX(h) == 0) {
207 				mpool_put(t->bt_mp, h, 0);
208 				return (RET_SPECIAL);
209 			}
210 
211 			if (h->flags & (P_BLEAF | P_RLEAF))
212 				break;
213 			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
214 			mpool_put(t->bt_mp, h, 0);
215 		}
216 
217 		ep->page = h;
218 		ep->index = NEXTINDEX(h) - 1;
219 		break;
220 	}
221 	return (RET_SUCCESS);
222 }
223 
224 /*
225  * __bt_seqadvance --
226  *	Advance the sequential scan.
227  *
228  * Parameters:
229  *	t:	tree
230  *	flags:	R_NEXT, R_PREV
231  *
232  * Side effects:
233  *	Pins the page the new key/data record is on.
234  *
235  * Returns:
236  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
237  */
238 static int
239 __bt_seqadv(t, ep, flags)
240 	BTREE *t;
241 	EPG *ep;
242 	int flags;
243 {
244 	CURSOR *c;
245 	PAGE *h;
246 	indx_t index;
247 	pgno_t pg;
248 	int exact;
249 
250 	/*
251 	 * There are a couple of states that we can be in.  The cursor has
252 	 * been initialized by the time we get here, but that's all we know.
253 	 */
254 	c = &t->bt_cursor;
255 
256 	/*
257 	 * The cursor was deleted where there weren't any duplicate records,
258 	 * so the key was saved.  Find out where that key would go in the
259 	 * current tree.  It doesn't matter if the returned key is an exact
260 	 * match or not -- if it's an exact match, the record was added after
261 	 * the delete so we can just return it.  If not, as long as there's
262 	 * a record there, return it.
263 	 */
264 	if (F_ISSET(c, CURS_ACQUIRE))
265 		return (__bt_first(t, &c->key, ep, &exact));
266 
267 	/* Get the page referenced by the cursor. */
268 	if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
269 		return (RET_ERROR);
270 
271 	/*
272  	 * Find the next/previous record in the tree and point the cursor at
273 	 * it.  The cursor may not be moved until a new key has been found.
274 	 */
275 	switch (flags) {
276 	case R_NEXT:			/* Next record. */
277 		/*
278 		 * The cursor was deleted in duplicate records, and moved
279 		 * forward to a record that has yet to be returned.  Clear
280 		 * that flag, and return the record.
281 		 */
282 		if (F_ISSET(c, CURS_AFTER))
283 			goto usecurrent;
284 		index = c->pg.index;
285 		if (++index == NEXTINDEX(h)) {
286 			pg = h->nextpg;
287 			mpool_put(t->bt_mp, h, 0);
288 			if (pg == P_INVALID)
289 				return (RET_SPECIAL);
290 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
291 				return (RET_ERROR);
292 			index = 0;
293 		}
294 		break;
295 	case R_PREV:			/* Previous record. */
296 		/*
297 		 * The cursor was deleted in duplicate records, and moved
298 		 * backward to a record that has yet to be returned.  Clear
299 		 * that flag, and return the record.
300 		 */
301 		if (F_ISSET(c, CURS_BEFORE)) {
302 usecurrent:		F_CLR(c, CURS_AFTER | CURS_BEFORE);
303 			ep->page = h;
304 			ep->index = c->pg.index;
305 			return (RET_SUCCESS);
306 		}
307 		index = c->pg.index;
308 		if (index == 0) {
309 			pg = h->prevpg;
310 			mpool_put(t->bt_mp, h, 0);
311 			if (pg == P_INVALID)
312 				return (RET_SPECIAL);
313 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
314 				return (RET_ERROR);
315 			index = NEXTINDEX(h) - 1;
316 		} else
317 			--index;
318 		break;
319 	}
320 
321 	ep->page = h;
322 	ep->index = index;
323 	return (RET_SUCCESS);
324 }
325 
326 /*
327  * __bt_first --
328  *	Find the first entry.
329  *
330  * Parameters:
331  *	t:	the tree
332  *    key:	the key
333  *  erval:	return EPG
334  * exactp:	pointer to exact match flag
335  *
336  * Returns:
337  *	The first entry in the tree greater than or equal to key,
338  *	or RET_SPECIAL if no such key exists.
339  */
340 static int
341 __bt_first(t, key, erval, exactp)
342 	BTREE *t;
343 	const DBT *key;
344 	EPG *erval;
345 	int *exactp;
346 {
347 	PAGE *h;
348 	EPG *ep, save;
349 	pgno_t pg;
350 
351 	/*
352 	 * Find any matching record; __bt_search pins the page.
353 	 *
354 	 * If it's an exact match and duplicates are possible, walk backwards
355 	 * in the tree until we find the first one.  Otherwise, make sure it's
356 	 * a valid key (__bt_search may return an index just past the end of a
357 	 * page) and return it.
358 	 */
359 	if ((ep = __bt_search(t, key, exactp)) == NULL)
360 		return (0);
361 	if (*exactp) {
362 		if (F_ISSET(t, B_NODUPS)) {
363 			*erval = *ep;
364 			return (RET_SUCCESS);
365 		}
366 
367 		/*
368 		 * Walk backwards, as long as the entry matches and there are
369 		 * keys left in the tree.  Save a copy of each match in case
370 		 * we go too far.
371 		 */
372 		save = *ep;
373 		h = ep->page;
374 		do {
375 			if (save.page->pgno != ep->page->pgno) {
376 				mpool_put(t->bt_mp, save.page, 0);
377 				save = *ep;
378 			} else
379 				save.index = ep->index;
380 
381 			/*
382 			 * Don't unpin the page the last (or original) match
383 			 * was on, but make sure it's unpinned if an error
384 			 * occurs.
385 			 */
386 			if (ep->index == 0) {
387 				if (h->prevpg == P_INVALID)
388 					break;
389 				if (h->pgno != save.page->pgno)
390 					mpool_put(t->bt_mp, h, 0);
391 				if ((h = mpool_get(t->bt_mp,
392 				    h->prevpg, 0)) == NULL) {
393 					if (h->pgno == save.page->pgno)
394 						mpool_put(t->bt_mp,
395 						    save.page, 0);
396 					return (RET_ERROR);
397 				}
398 				ep->page = h;
399 				ep->index = NEXTINDEX(h);
400 			}
401 			--ep->index;
402 		} while (__bt_cmp(t, key, ep) == 0);
403 
404 		/*
405 		 * Reach here with the last page that was looked at pinned,
406 		 * which may or may not be the same as the last (or original)
407 		 * match page.  If it's not useful, release it.
408 		 */
409 		if (h->pgno != save.page->pgno)
410 			mpool_put(t->bt_mp, h, 0);
411 
412 		*erval = save;
413 		return (RET_SUCCESS);
414 	}
415 
416 	/* If at the end of a page, find the next entry. */
417 	if (ep->index == NEXTINDEX(ep->page)) {
418 		h = ep->page;
419 		pg = h->nextpg;
420 		mpool_put(t->bt_mp, h, 0);
421 		if (pg == P_INVALID)
422 			return (RET_SPECIAL);
423 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
424 			return (RET_ERROR);
425 		ep->index = 0;
426 		ep->page = h;
427 	}
428 	*erval = *ep;
429 	return (RET_SUCCESS);
430 }
431 
432 /*
433  * __bt_setcur --
434  *	Set the cursor to an entry in the tree.
435  *
436  * Parameters:
437  *	t:	the tree
438  *   pgno:	page number
439  *  index:	page index
440  */
441 void
442 __bt_setcur(t, pgno, index)
443 	BTREE *t;
444 	pgno_t pgno;
445 	u_int index;
446 {
447 	/* Lose any already deleted key. */
448 	if (t->bt_cursor.key.data != NULL) {
449 		free(t->bt_cursor.key.data);
450 		t->bt_cursor.key.size = 0;
451 		t->bt_cursor.key.data = NULL;
452 	}
453 	F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
454 
455 	/* Update the cursor. */
456 	t->bt_cursor.pg.pgno = pgno;
457 	t->bt_cursor.pg.index = index;
458 	F_SET(&t->bt_cursor, CURS_INIT);
459 }
460