1*54925bf6Swillf /*-
2*54925bf6Swillf  * Copyright (c) 1990, 1993, 1994
3*54925bf6Swillf  *	The Regents of the University of California.  All rights reserved.
4*54925bf6Swillf  *
5*54925bf6Swillf  * This code is derived from software contributed to Berkeley by
6*54925bf6Swillf  * Mike Olson.
7*54925bf6Swillf  *
8*54925bf6Swillf  * Redistribution and use in source and binary forms, with or without
9*54925bf6Swillf  * modification, are permitted provided that the following conditions
10*54925bf6Swillf  * are met:
11*54925bf6Swillf  * 1. Redistributions of source code must retain the above copyright
12*54925bf6Swillf  *    notice, this list of conditions and the following disclaimer.
13*54925bf6Swillf  * 2. Redistributions in binary form must reproduce the above copyright
14*54925bf6Swillf  *    notice, this list of conditions and the following disclaimer in the
15*54925bf6Swillf  *    documentation and/or other materials provided with the distribution.
16*54925bf6Swillf  * 3. All advertising materials mentioning features or use of this software
17*54925bf6Swillf  *    must display the following acknowledgement:
18*54925bf6Swillf  *	This product includes software developed by the University of
19*54925bf6Swillf  *	California, Berkeley and its contributors.
20*54925bf6Swillf  * 4. Neither the name of the University nor the names of its contributors
21*54925bf6Swillf  *    may be used to endorse or promote products derived from this software
22*54925bf6Swillf  *    without specific prior written permission.
23*54925bf6Swillf  *
24*54925bf6Swillf  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25*54925bf6Swillf  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26*54925bf6Swillf  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27*54925bf6Swillf  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28*54925bf6Swillf  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29*54925bf6Swillf  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30*54925bf6Swillf  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31*54925bf6Swillf  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32*54925bf6Swillf  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33*54925bf6Swillf  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34*54925bf6Swillf  * SUCH DAMAGE.
35*54925bf6Swillf  */
36*54925bf6Swillf 
37*54925bf6Swillf #if defined(LIBC_SCCS) && !defined(lint)
38*54925bf6Swillf static char sccsid[] = "@(#)bt_put.c	8.8 (Berkeley) 7/26/94";
39*54925bf6Swillf #endif /* LIBC_SCCS and not lint */
40*54925bf6Swillf 
41*54925bf6Swillf #include <sys/types.h>
42*54925bf6Swillf 
43*54925bf6Swillf #include <errno.h>
44*54925bf6Swillf #include <stdio.h>
45*54925bf6Swillf #include <stdlib.h>
46*54925bf6Swillf #include <string.h>
47*54925bf6Swillf 
48*54925bf6Swillf #include "db-int.h"
49*54925bf6Swillf #include "btree.h"
50*54925bf6Swillf 
51*54925bf6Swillf static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *));
52*54925bf6Swillf 
53*54925bf6Swillf /*
54*54925bf6Swillf  * __BT_PUT -- Add a btree item to the tree.
55*54925bf6Swillf  *
56*54925bf6Swillf  * Parameters:
57*54925bf6Swillf  *	dbp:	pointer to access method
58*54925bf6Swillf  *	key:	key
59*54925bf6Swillf  *	data:	data
60*54925bf6Swillf  *	flag:	R_NOOVERWRITE
61*54925bf6Swillf  *
62*54925bf6Swillf  * Returns:
63*54925bf6Swillf  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
64*54925bf6Swillf  *	tree and R_NOOVERWRITE specified.
65*54925bf6Swillf  */
66*54925bf6Swillf int
__bt_put(dbp,key,data,flags)67*54925bf6Swillf __bt_put(dbp, key, data, flags)
68*54925bf6Swillf 	const DB *dbp;
69*54925bf6Swillf 	DBT *key;
70*54925bf6Swillf 	const DBT *data;
71*54925bf6Swillf 	u_int flags;
72*54925bf6Swillf {
73*54925bf6Swillf 	BTREE *t;
74*54925bf6Swillf 	DBT tkey, tdata;
75*54925bf6Swillf 	EPG *e = 0;
76*54925bf6Swillf 	PAGE *h;
77*54925bf6Swillf 	indx_t idx, nxtindex;
78*54925bf6Swillf 	db_pgno_t pg;
79*54925bf6Swillf 	u_int32_t nbytes;
80*54925bf6Swillf 	int dflags, exact, status;
81*54925bf6Swillf 	char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
82*54925bf6Swillf 
83*54925bf6Swillf 	t = dbp->internal;
84*54925bf6Swillf 
85*54925bf6Swillf 	/* Toss any page pinned across calls. */
86*54925bf6Swillf 	if (t->bt_pinned != NULL) {
87*54925bf6Swillf 		mpool_put(t->bt_mp, t->bt_pinned, 0);
88*54925bf6Swillf 		t->bt_pinned = NULL;
89*54925bf6Swillf 	}
90*54925bf6Swillf 
91*54925bf6Swillf 	/* Check for change to a read-only tree. */
92*54925bf6Swillf 	if (F_ISSET(t, B_RDONLY)) {
93*54925bf6Swillf 		errno = EPERM;
94*54925bf6Swillf 		return (RET_ERROR);
95*54925bf6Swillf 	}
96*54925bf6Swillf 
97*54925bf6Swillf 	switch (flags) {
98*54925bf6Swillf 	case 0:
99*54925bf6Swillf 	case R_NOOVERWRITE:
100*54925bf6Swillf 		break;
101*54925bf6Swillf 	case R_CURSOR:
102*54925bf6Swillf 		/*
103*54925bf6Swillf 		 * If flags is R_CURSOR, put the cursor.  Must already
104*54925bf6Swillf 		 * have started a scan and not have already deleted it.
105*54925bf6Swillf 		 */
106*54925bf6Swillf 		if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
107*54925bf6Swillf 		    !F_ISSET(&t->bt_cursor,
108*54925bf6Swillf 		        CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
109*54925bf6Swillf 			break;
110*54925bf6Swillf 		/* FALLTHROUGH */
111*54925bf6Swillf 	default:
112*54925bf6Swillf 		errno = EINVAL;
113*54925bf6Swillf 		return (RET_ERROR);
114*54925bf6Swillf 	}
115*54925bf6Swillf 
116*54925bf6Swillf 	/*
117*54925bf6Swillf 	 * If the key/data pair won't fit on a page, store it on overflow
118*54925bf6Swillf 	 * pages.  Only put the key on the overflow page if the pair are
119*54925bf6Swillf 	 * still too big after moving the data to an overflow page.
120*54925bf6Swillf 	 *
121*54925bf6Swillf 	 * XXX
122*54925bf6Swillf 	 * If the insert fails later on, the overflow pages aren't recovered.
123*54925bf6Swillf 	 */
124*54925bf6Swillf 	dflags = 0;
125*54925bf6Swillf 	if (key->size + data->size > t->bt_ovflsize) {
126*54925bf6Swillf 		if (key->size > t->bt_ovflsize) {
127*54925bf6Swillf 			u_int32_t yuck_this_is_gross_code;
128*54925bf6Swillf storekey:		if (__ovfl_put(t, key, &pg) == RET_ERROR)
129*54925bf6Swillf 				return (RET_ERROR);
130*54925bf6Swillf 			tkey.data = kb;
131*54925bf6Swillf 			tkey.size = NOVFLSIZE;
132*54925bf6Swillf 			memmove(kb, &pg, sizeof(db_pgno_t));
133*54925bf6Swillf 			yuck_this_is_gross_code = key->size;
134*54925bf6Swillf 			if (yuck_this_is_gross_code != key->size)
135*54925bf6Swillf 				abort ();
136*54925bf6Swillf 			memmove(kb + sizeof(db_pgno_t),
137*54925bf6Swillf 				&yuck_this_is_gross_code, sizeof(u_int32_t));
138*54925bf6Swillf 			dflags |= P_BIGKEY;
139*54925bf6Swillf 			key = &tkey;
140*54925bf6Swillf 		}
141*54925bf6Swillf 		if (key->size + data->size > t->bt_ovflsize) {
142*54925bf6Swillf 			u_int32_t yuck_this_is_gross_code = data->size;
143*54925bf6Swillf 			if (__ovfl_put(t, data, &pg) == RET_ERROR)
144*54925bf6Swillf 				return (RET_ERROR);
145*54925bf6Swillf 			tdata.data = db;
146*54925bf6Swillf 			tdata.size = NOVFLSIZE;
147*54925bf6Swillf 			memmove(db, &pg, sizeof(db_pgno_t));
148*54925bf6Swillf 			if (yuck_this_is_gross_code != data->size)
149*54925bf6Swillf 				abort ();
150*54925bf6Swillf 			memmove(db + sizeof(db_pgno_t),
151*54925bf6Swillf 				&yuck_this_is_gross_code, sizeof(u_int32_t));
152*54925bf6Swillf 			dflags |= P_BIGDATA;
153*54925bf6Swillf 			data = &tdata;
154*54925bf6Swillf 		}
155*54925bf6Swillf 		if (key->size + data->size > t->bt_ovflsize)
156*54925bf6Swillf 			goto storekey;
157*54925bf6Swillf 	}
158*54925bf6Swillf 
159*54925bf6Swillf 	/* Replace the cursor. */
160*54925bf6Swillf 	if (flags == R_CURSOR) {
161*54925bf6Swillf 		if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
162*54925bf6Swillf 			return (RET_ERROR);
163*54925bf6Swillf 		idx = t->bt_cursor.pg.index;
164*54925bf6Swillf 		goto delete;
165*54925bf6Swillf 	}
166*54925bf6Swillf 
167*54925bf6Swillf 	/*
168*54925bf6Swillf 	 * Find the key to delete, or, the location at which to insert.
169*54925bf6Swillf 	 * Bt_fast and __bt_search both pin the returned page.
170*54925bf6Swillf 	 */
171*54925bf6Swillf 	if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
172*54925bf6Swillf 		if ((e = __bt_search(t, key, &exact)) == NULL)
173*54925bf6Swillf 			return (RET_ERROR);
174*54925bf6Swillf 	h = e->page;
175*54925bf6Swillf 	idx = e->index;
176*54925bf6Swillf 
177*54925bf6Swillf 	/*
178*54925bf6Swillf 	 * Add the key/data pair to the tree.  If an identical key is already
179*54925bf6Swillf 	 * in the tree, and R_NOOVERWRITE is set, an error is returned.  If
180*54925bf6Swillf 	 * R_NOOVERWRITE is not set, the key is either added (if duplicates are
181*54925bf6Swillf 	 * permitted) or an error is returned.
182*54925bf6Swillf 	 */
183*54925bf6Swillf 	switch (flags) {
184*54925bf6Swillf 	case R_NOOVERWRITE:
185*54925bf6Swillf 		if (!exact)
186*54925bf6Swillf 			break;
187*54925bf6Swillf 		mpool_put(t->bt_mp, h, 0);
188*54925bf6Swillf 		return (RET_SPECIAL);
189*54925bf6Swillf 	default:
190*54925bf6Swillf 		if (!exact || !F_ISSET(t, B_NODUPS))
191*54925bf6Swillf 			break;
192*54925bf6Swillf 		/*
193*54925bf6Swillf 		 * !!!
194*54925bf6Swillf 		 * Note, the delete may empty the page, so we need to put a
195*54925bf6Swillf 		 * new entry into the page immediately.
196*54925bf6Swillf 		 */
197*54925bf6Swillf delete:		if (__bt_dleaf(t, key, h, idx) == RET_ERROR) {
198*54925bf6Swillf 			mpool_put(t->bt_mp, h, 0);
199*54925bf6Swillf 			return (RET_ERROR);
200*54925bf6Swillf 		}
201*54925bf6Swillf 		break;
202*54925bf6Swillf 	}
203*54925bf6Swillf 
204*54925bf6Swillf 	/*
205*54925bf6Swillf 	 * If not enough room, or the user has put a ceiling on the number of
206*54925bf6Swillf 	 * keys permitted in the page, split the page.  The split code will
207*54925bf6Swillf 	 * insert the key and data and unpin the current page.  If inserting
208*54925bf6Swillf 	 * into the offset array, shift the pointers up.
209*54925bf6Swillf 	 */
210*54925bf6Swillf 	nbytes = NBLEAFDBT(key->size, data->size);
211*54925bf6Swillf 	if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
212*54925bf6Swillf 		if ((status = __bt_split(t, h, key,
213*54925bf6Swillf 		    data, dflags, nbytes, idx)) != RET_SUCCESS)
214*54925bf6Swillf 			return (status);
215*54925bf6Swillf 		goto success;
216*54925bf6Swillf 	}
217*54925bf6Swillf 
218*54925bf6Swillf 	if (idx < (nxtindex = NEXTINDEX(h)))
219*54925bf6Swillf 		memmove(h->linp + idx + 1, h->linp + idx,
220*54925bf6Swillf 		    (nxtindex - idx) * sizeof(indx_t));
221*54925bf6Swillf 	h->lower += sizeof(indx_t);
222*54925bf6Swillf 
223*54925bf6Swillf 	h->linp[idx] = h->upper -= nbytes;
224*54925bf6Swillf 	dest = (char *)h + h->upper;
225*54925bf6Swillf 	WR_BLEAF(dest, key, data, dflags);
226*54925bf6Swillf 
227*54925bf6Swillf 	/* If the cursor is on this page, adjust it as necessary. */
228*54925bf6Swillf 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
229*54925bf6Swillf 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
230*54925bf6Swillf 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx)
231*54925bf6Swillf 		++t->bt_cursor.pg.index;
232*54925bf6Swillf 
233*54925bf6Swillf 	if (t->bt_order == NOT) {
234*54925bf6Swillf 		if (h->nextpg == P_INVALID) {
235*54925bf6Swillf 			if (idx == NEXTINDEX(h) - 1) {
236*54925bf6Swillf 				t->bt_order = FORWARD;
237*54925bf6Swillf 				t->bt_last.index = idx;
238*54925bf6Swillf 				t->bt_last.pgno = h->pgno;
239*54925bf6Swillf 			}
240*54925bf6Swillf 		} else if (h->prevpg == P_INVALID) {
241*54925bf6Swillf 			if (idx == 0) {
242*54925bf6Swillf 				t->bt_order = BACK;
243*54925bf6Swillf 				t->bt_last.index = 0;
244*54925bf6Swillf 				t->bt_last.pgno = h->pgno;
245*54925bf6Swillf 			}
246*54925bf6Swillf 		}
247*54925bf6Swillf 	}
248*54925bf6Swillf 
249*54925bf6Swillf 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
250*54925bf6Swillf 
251*54925bf6Swillf success:
252*54925bf6Swillf 	if (flags == R_SETCURSOR)
253*54925bf6Swillf 		__bt_setcur(t, e->page->pgno, e->index);
254*54925bf6Swillf 
255*54925bf6Swillf 	F_SET(t, B_MODIFIED);
256*54925bf6Swillf 	return (RET_SUCCESS);
257*54925bf6Swillf }
258*54925bf6Swillf 
259*54925bf6Swillf #ifdef STATISTICS
260*54925bf6Swillf u_long bt_cache_hit, bt_cache_miss;
261*54925bf6Swillf #endif
262*54925bf6Swillf 
263*54925bf6Swillf /*
264*54925bf6Swillf  * BT_FAST -- Do a quick check for sorted data.
265*54925bf6Swillf  *
266*54925bf6Swillf  * Parameters:
267*54925bf6Swillf  *	t:	tree
268*54925bf6Swillf  *	key:	key to insert
269*54925bf6Swillf  *
270*54925bf6Swillf  * Returns:
271*54925bf6Swillf  * 	EPG for new record or NULL if not found.
272*54925bf6Swillf  */
273*54925bf6Swillf static EPG *
bt_fast(t,key,data,exactp)274*54925bf6Swillf bt_fast(t, key, data, exactp)
275*54925bf6Swillf 	BTREE *t;
276*54925bf6Swillf 	const DBT *key, *data;
277*54925bf6Swillf 	int *exactp;
278*54925bf6Swillf {
279*54925bf6Swillf 	PAGE *h;
280*54925bf6Swillf 	u_int32_t nbytes;
281*54925bf6Swillf 	int cmp;
282*54925bf6Swillf 
283*54925bf6Swillf 	if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
284*54925bf6Swillf 		t->bt_order = NOT;
285*54925bf6Swillf 		return (NULL);
286*54925bf6Swillf 	}
287*54925bf6Swillf 	t->bt_cur.page = h;
288*54925bf6Swillf 	t->bt_cur.index = t->bt_last.index;
289*54925bf6Swillf 
290*54925bf6Swillf 	/*
291*54925bf6Swillf 	 * If won't fit in this page or have too many keys in this page,
292*54925bf6Swillf 	 * have to search to get split stack.
293*54925bf6Swillf 	 */
294*54925bf6Swillf 	nbytes = NBLEAFDBT(key->size, data->size);
295*54925bf6Swillf 	if (h->upper - h->lower < nbytes + sizeof(indx_t))
296*54925bf6Swillf 		goto miss;
297*54925bf6Swillf 
298*54925bf6Swillf 	if (t->bt_order == FORWARD) {
299*54925bf6Swillf 		if (t->bt_cur.page->nextpg != P_INVALID)
300*54925bf6Swillf 			goto miss;
301*54925bf6Swillf 		if (t->bt_cur.index != NEXTINDEX(h) - 1)
302*54925bf6Swillf 			goto miss;
303*54925bf6Swillf 		if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
304*54925bf6Swillf 			goto miss;
305*54925bf6Swillf 		t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
306*54925bf6Swillf 	} else {
307*54925bf6Swillf 		if (t->bt_cur.page->prevpg != P_INVALID)
308*54925bf6Swillf 			goto miss;
309*54925bf6Swillf 		if (t->bt_cur.index != 0)
310*54925bf6Swillf 			goto miss;
311*54925bf6Swillf 		if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
312*54925bf6Swillf 			goto miss;
313*54925bf6Swillf 		t->bt_last.index = 0;
314*54925bf6Swillf 	}
315*54925bf6Swillf 	*exactp = cmp == 0;
316*54925bf6Swillf #ifdef STATISTICS
317*54925bf6Swillf 	++bt_cache_hit;
318*54925bf6Swillf #endif
319*54925bf6Swillf 	return (&t->bt_cur);
320*54925bf6Swillf 
321*54925bf6Swillf miss:
322*54925bf6Swillf #ifdef STATISTICS
323*54925bf6Swillf 	++bt_cache_miss;
324*54925bf6Swillf #endif
325*54925bf6Swillf 	t->bt_order = NOT;
326*54925bf6Swillf 	mpool_put(t->bt_mp, h, 0);
327*54925bf6Swillf 	return (NULL);
328*54925bf6Swillf }
329