1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 09/07/93";
13 #endif /* LIBC_SCCS and not lint */
14
15 #include <sys/types.h>
16
17 #include <errno.h>
18 #include <stddef.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21
22 #include <db.h>
23 #include "btree.h"
24
25 static int bt_seqadv __P((BTREE *, EPG *, int));
26 static int bt_seqset __P((BTREE *, EPG *, DBT *, int));
27
28 /*
29 * Sequential scan support.
30 *
31 * The tree can be scanned sequentially, starting from either end of the tree
32 * or from any specific key. A scan request before any scanning is done is
33 * initialized as starting from the least node.
34 *
35 * Each tree has an EPGNO which has the current position of the cursor. The
36 * cursor has to survive deletions/insertions in the tree without losing its
37 * position. This is done by noting deletions without doing them, and then
38 * doing them when the cursor moves (or the tree is closed).
39 */
40
41 /*
42 * __BT_SEQ -- Btree sequential scan interface.
43 *
44 * Parameters:
45 * dbp: pointer to access method
46 * key: key for positioning and return value
47 * data: data return value
48 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
49 *
50 * Returns:
51 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
52 */
53 int
__bt_seq(dbp,key,data,flags)54 __bt_seq(dbp, key, data, flags)
55 const DB *dbp;
56 DBT *key, *data;
57 u_int flags;
58 {
59 BTREE *t;
60 EPG e;
61 int status;
62
63 t = dbp->internal;
64
65 /* Toss any page pinned across calls. */
66 if (t->bt_pinned != NULL) {
67 mpool_put(t->bt_mp, t->bt_pinned, 0);
68 t->bt_pinned = NULL;
69 }
70
71 /*
72 * If scan unitialized as yet, or starting at a specific record, set
73 * the scan to a specific key. Both bt_seqset and bt_seqadv pin the
74 * page the cursor references if they're successful.
75 */
76 switch(flags) {
77 case R_NEXT:
78 case R_PREV:
79 if (ISSET(t, B_SEQINIT)) {
80 status = bt_seqadv(t, &e, flags);
81 break;
82 }
83 /* FALLTHROUGH */
84 case R_CURSOR:
85 case R_FIRST:
86 case R_LAST:
87 status = bt_seqset(t, &e, key, flags);
88 break;
89 default:
90 errno = EINVAL;
91 return (RET_ERROR);
92 }
93
94 if (status == RET_SUCCESS) {
95 status = __bt_ret(t, &e, key, data);
96
97 /* Update the actual cursor. */
98 t->bt_bcursor.pgno = e.page->pgno;
99 t->bt_bcursor.index = e.index;
100
101 /*
102 * If the user is doing concurrent access, we copied the
103 * key/data, toss the page.
104 */
105 if (ISSET(t, B_DB_LOCK))
106 mpool_put(t->bt_mp, e.page, 0);
107 else
108 t->bt_pinned = e.page;
109 SET(t, B_SEQINIT);
110 }
111 return (status);
112 }
113
114 /*
115 * BT_SEQSET -- Set the sequential scan to a specific key.
116 *
117 * Parameters:
118 * t: tree
119 * ep: storage for returned key
120 * key: key for initial scan position
121 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
122 *
123 * Side effects:
124 * Pins the page the cursor references.
125 *
126 * Returns:
127 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
128 */
129 static int
bt_seqset(t,ep,key,flags)130 bt_seqset(t, ep, key, flags)
131 BTREE *t;
132 EPG *ep;
133 DBT *key;
134 int flags;
135 {
136 EPG *e;
137 PAGE *h;
138 pgno_t pg;
139 int exact;
140
141 /*
142 * Delete any already deleted record that we've been saving because
143 * the cursor pointed to it. Since going to a specific key, should
144 * delete any logically deleted records so they aren't found.
145 */
146 if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
147 return (RET_ERROR);
148
149 /*
150 * Find the first, last or specific key in the tree and point the cursor
151 * at it. The cursor may not be moved until a new key has been found.
152 */
153 switch(flags) {
154 case R_CURSOR: /* Keyed scan. */
155 /*
156 * Find the first instance of the key or the smallest key which
157 * is greater than or equal to the specified key. If run out
158 * of keys, return RET_SPECIAL.
159 */
160 if (key->data == NULL || key->size == 0) {
161 errno = EINVAL;
162 return (RET_ERROR);
163 }
164 e = __bt_first(t, key, &exact); /* Returns pinned page. */
165 if (e == NULL)
166 return (RET_ERROR);
167 /*
168 * If at the end of a page, skip any empty pages and find the
169 * next entry.
170 */
171 if (e->index == NEXTINDEX(e->page)) {
172 h = e->page;
173 do {
174 pg = h->nextpg;
175 mpool_put(t->bt_mp, h, 0);
176 if (pg == P_INVALID)
177 return (RET_SPECIAL);
178 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
179 return (RET_ERROR);
180 } while (NEXTINDEX(h) == 0);
181 e->index = 0;
182 e->page = h;
183 }
184 *ep = *e;
185 break;
186 case R_FIRST: /* First record. */
187 case R_NEXT:
188 /* Walk down the left-hand side of the tree. */
189 for (pg = P_ROOT;;) {
190 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
191 return (RET_ERROR);
192 if (h->flags & (P_BLEAF | P_RLEAF))
193 break;
194 pg = GETBINTERNAL(h, 0)->pgno;
195 mpool_put(t->bt_mp, h, 0);
196 }
197
198 /* Skip any empty pages. */
199 while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
200 pg = h->nextpg;
201 mpool_put(t->bt_mp, h, 0);
202 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
203 return (RET_ERROR);
204 }
205
206 if (NEXTINDEX(h) == 0) {
207 mpool_put(t->bt_mp, h, 0);
208 return (RET_SPECIAL);
209 }
210
211 ep->page = h;
212 ep->index = 0;
213 break;
214 case R_LAST: /* Last record. */
215 case R_PREV:
216 /* Walk down the right-hand side of the tree. */
217 for (pg = P_ROOT;;) {
218 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
219 return (RET_ERROR);
220 if (h->flags & (P_BLEAF | P_RLEAF))
221 break;
222 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
223 mpool_put(t->bt_mp, h, 0);
224 }
225
226 /* Skip any empty pages. */
227 while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
228 pg = h->prevpg;
229 mpool_put(t->bt_mp, h, 0);
230 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
231 return (RET_ERROR);
232 }
233
234 if (NEXTINDEX(h) == 0) {
235 mpool_put(t->bt_mp, h, 0);
236 return (RET_SPECIAL);
237 }
238
239 ep->page = h;
240 ep->index = NEXTINDEX(h) - 1;
241 break;
242 }
243 return (RET_SUCCESS);
244 }
245
246 /*
247 * BT_SEQADVANCE -- Advance the sequential scan.
248 *
249 * Parameters:
250 * t: tree
251 * flags: R_NEXT, R_PREV
252 *
253 * Side effects:
254 * Pins the page the new key/data record is on.
255 *
256 * Returns:
257 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
258 */
259 static int
bt_seqadv(t,e,flags)260 bt_seqadv(t, e, flags)
261 BTREE *t;
262 EPG *e;
263 int flags;
264 {
265 EPGNO *c, delc;
266 PAGE *h;
267 indx_t index;
268 pgno_t pg;
269
270 /* Save the current cursor if going to delete it. */
271 c = &t->bt_bcursor;
272 if (ISSET(t, B_DELCRSR))
273 delc = *c;
274
275 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
276 return (RET_ERROR);
277
278 /*
279 * Find the next/previous record in the tree and point the cursor at it.
280 * The cursor may not be moved until a new key has been found.
281 */
282 index = c->index;
283 switch(flags) {
284 case R_NEXT: /* Next record. */
285 if (++index == NEXTINDEX(h)) {
286 do {
287 pg = h->nextpg;
288 mpool_put(t->bt_mp, h, 0);
289 if (pg == P_INVALID)
290 return (RET_SPECIAL);
291 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
292 return (RET_ERROR);
293 } while (NEXTINDEX(h) == 0);
294 index = 0;
295 }
296 break;
297 case R_PREV: /* Previous record. */
298 if (index-- == 0) {
299 do {
300 pg = h->prevpg;
301 mpool_put(t->bt_mp, h, 0);
302 if (pg == P_INVALID)
303 return (RET_SPECIAL);
304 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
305 return (RET_ERROR);
306 } while (NEXTINDEX(h) == 0);
307 index = NEXTINDEX(h) - 1;
308 }
309 break;
310 }
311
312 e->page = h;
313 e->index = index;
314
315 /*
316 * Delete any already deleted record that we've been saving because the
317 * cursor pointed to it. This could cause the new index to be shifted
318 * down by one if the record we're deleting is on the same page and has
319 * a larger index.
320 */
321 if (ISSET(t, B_DELCRSR)) {
322 CLR(t, B_DELCRSR); /* Don't try twice. */
323 if (c->pgno == delc.pgno && c->index > delc.index)
324 --c->index;
325 if (__bt_crsrdel(t, &delc))
326 return (RET_ERROR);
327 }
328 return (RET_SUCCESS);
329 }
330
331 /*
332 * __BT_CRSRDEL -- Delete the record referenced by the cursor.
333 *
334 * Parameters:
335 * t: tree
336 *
337 * Returns:
338 * RET_ERROR, RET_SUCCESS
339 */
340 int
__bt_crsrdel(t,c)341 __bt_crsrdel(t, c)
342 BTREE *t;
343 EPGNO *c;
344 {
345 PAGE *h;
346 int status;
347
348 CLR(t, B_DELCRSR); /* Don't try twice. */
349 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
350 return (RET_ERROR);
351 status = __bt_dleaf(t, h, c->index);
352 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
353 return (status);
354 }
355