1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/mp.h"
15 
16 /*
17  * __db_traverse_big
18  *	Traverse a chain of overflow pages and call the callback routine
19  * on each one.  The calling convention for the callback is:
20  *	callback(dbc, page, cookie, did_put),
21  * where did_put is a return value indicating if the page in question has
22  * already been returned to the mpool.
23  *
24  * PUBLIC: int __db_traverse_big __P((DBC *, db_pgno_t,
25  * PUBLIC:	int (*)(DBC *, PAGE *, void *, int *), void *));
26  */
27 int
__db_traverse_big(dbc,pgno,callback,cookie)28 __db_traverse_big(dbc, pgno, callback, cookie)
29 	DBC *dbc;
30 	db_pgno_t pgno;
31 	int (*callback) __P((DBC *, PAGE *, void *, int *));
32 	void *cookie;
33 {
34 	DB_MPOOLFILE *mpf;
35 	PAGE *p;
36 	int did_put, ret;
37 
38 	mpf = dbc->dbp->mpf;
39 
40 	do {
41 		did_put = 0;
42 		if ((ret = __memp_fget(mpf,
43 		     &pgno, dbc->thread_info, dbc->txn, 0, &p)) != 0)
44 			return (ret);
45 		/*
46 		 * If we are freeing pages only process the overflow
47 		 * chain if the head of the chain has a refcount of 1.
48 		 */
49 		pgno = NEXT_PGNO(p);
50 		if (callback == __db_truncate_callback && OV_REF(p) != 1)
51 			pgno = PGNO_INVALID;
52 		if ((ret = callback(dbc, p, cookie, &did_put)) == 0 &&
53 		    !did_put)
54 			ret = __memp_fput(mpf,
55 			     dbc->thread_info, p, dbc->priority);
56 	} while (ret == 0 && pgno != PGNO_INVALID);
57 
58 	return (ret);
59 }
60 
61 /*
62  * __db_reclaim_callback
63  * This is the callback routine used during a delete of a subdatabase.
64  * we are traversing a btree or hash table and trying to free all the
65  * pages.  Since they share common code for duplicates and overflow
66  * items, we traverse them identically and use this routine to do the
67  * actual free.  The reason that this is callback is because hash uses
68  * the same traversal code for statistics gathering.
69  *
70  * PUBLIC: int __db_reclaim_callback __P((DBC *, PAGE *, void *, int *));
71  */
72 int
__db_reclaim_callback(dbc,p,cookie,putp)73 __db_reclaim_callback(dbc, p, cookie, putp)
74 	DBC *dbc;
75 	PAGE *p;
76 	void *cookie;
77 	int *putp;
78 {
79 	DB *dbp;
80 	int ret;
81 
82 	dbp = dbc->dbp;
83 
84 	/*
85 	 * We don't want to log the free of the root with the subdb.
86 	 * If we abort then the subdb may not be openable to undo
87 	 * the free.
88 	 */
89 	if ((dbp->type == DB_BTREE || dbp->type == DB_RECNO) &&
90 	    PGNO(p) == ((BTREE *)dbp->bt_internal)->bt_root)
91 		return (0);
92 	if ((ret = __db_free(dbc, p, *(u_int32_t *)cookie)) != 0)
93 		return (ret);
94 	*putp = 1;
95 
96 	return (0);
97 }
98 
99 /*
100  * __db_truncate_callback
101  * This is the callback routine used during a truncate.
102  * we are traversing a btree or hash table and trying to free all the
103  * pages.
104  *
105  * PUBLIC: int __db_truncate_callback __P((DBC *, PAGE *, void *, int *));
106  */
107 int
__db_truncate_callback(dbc,p,cookie,putp)108 __db_truncate_callback(dbc, p, cookie, putp)
109 	DBC *dbc;
110 	PAGE *p;
111 	void *cookie;
112 	int *putp;
113 {
114 	DB *dbp;
115 	DBT ddbt, ldbt;
116 	DB_MPOOLFILE *mpf;
117 	db_indx_t indx, len, off, tlen, top;
118 	u_int8_t *hk, type;
119 	u_int32_t *countp;
120 	int ret;
121 
122 	top = NUM_ENT(p);
123 	dbp = dbc->dbp;
124 	mpf = dbp->mpf;
125 	countp = cookie;
126 	*putp = 1;
127 
128 	switch (TYPE(p)) {
129 	case P_LBTREE:
130 		/* Skip for off-page duplicates and deleted items. */
131 		for (indx = 0; indx < top; indx += P_INDX) {
132 			type = GET_BKEYDATA(dbp, p, indx + O_INDX)->type;
133 			if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE)
134 				++*countp;
135 		}
136 		/* FALLTHROUGH */
137 	case P_IBTREE:
138 	case P_IRECNO:
139 	case P_INVALID:
140 		if (dbp->type != DB_HASH &&
141 		    ((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) {
142 			type = dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
143 			goto reinit;
144 		}
145 		break;
146 	case P_OVERFLOW:
147 		if ((ret = __memp_dirty(mpf,
148 		    &p, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
149 			return (ret);
150 		if (DBC_LOGGING(dbc)) {
151 			if ((ret = __db_ovref_log(dbp, dbc->txn,
152 			    &LSN(p), 0, p->pgno, -1, &LSN(p))) != 0)
153 				return (ret);
154 		} else
155 			LSN_NOT_LOGGED(LSN(p));
156 		if (--OV_REF(p) != 0)
157 			*putp = 0;
158 		break;
159 	case P_LRECNO:
160 		for (indx = 0; indx < top; indx += O_INDX) {
161 			type = GET_BKEYDATA(dbp, p, indx)->type;
162 			if (!B_DISSET(type))
163 				++*countp;
164 		}
165 
166 		if (((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) {
167 			type = P_LRECNO;
168 			goto reinit;
169 		}
170 		break;
171 	case P_LDUP:
172 		/* Correct for deleted items. */
173 		for (indx = 0; indx < top; indx += O_INDX)
174 			if (!B_DISSET(GET_BKEYDATA(dbp, p, indx)->type))
175 				++*countp;
176 
177 		break;
178 	case P_HASH:
179 		/* Correct for on-page duplicates and deleted items. */
180 		for (indx = 0; indx < top; indx += P_INDX) {
181 			switch (*H_PAIRDATA(dbp, p, indx)) {
182 			case H_OFFDUP:
183 				break;
184 			case H_OFFPAGE:
185 			case H_KEYDATA:
186 				++*countp;
187 				break;
188 			case H_DUPLICATE:
189 				tlen = LEN_HDATA(dbp, p, 0, indx);
190 				hk = H_PAIRDATA(dbp, p, indx);
191 				for (off = 0; off < tlen;
192 				    off += len + 2 * sizeof(db_indx_t)) {
193 					++*countp;
194 					memcpy(&len,
195 					    HKEYDATA_DATA(hk)
196 					    + off, sizeof(db_indx_t));
197 				}
198 				break;
199 			default:
200 				return (__db_pgfmt(dbp->env, p->pgno));
201 			}
202 		}
203 		/* Don't free the head of the bucket. */
204 		if (PREV_PGNO(p) == PGNO_INVALID) {
205 			type = P_HASH;
206 
207 reinit:			if ((ret = __memp_dirty(mpf, &p,
208 			    dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
209 				return (ret);
210 			*putp = 0;
211 			if (DBC_LOGGING(dbc)) {
212 				memset(&ldbt, 0, sizeof(ldbt));
213 				memset(&ddbt, 0, sizeof(ddbt));
214 				ldbt.data = p;
215 				ldbt.size = P_OVERHEAD(dbp);
216 				ldbt.size += p->entries * sizeof(db_indx_t);
217 				ddbt.data = (u_int8_t *)p + HOFFSET(p);
218 				ddbt.size = dbp->pgsize - HOFFSET(p);
219 				if ((ret = __db_pg_init_log(dbp,
220 				    dbc->txn, &LSN(p), 0,
221 				    p->pgno, &ldbt, &ddbt)) != 0)
222 					return (ret);
223 			} else
224 				LSN_NOT_LOGGED(LSN(p));
225 
226 			P_INIT(p, dbp->pgsize, PGNO(p), PGNO_INVALID,
227 			    PGNO_INVALID, type == P_HASH ? 0 : 1, type);
228 		}
229 		break;
230 	default:
231 		return (__db_pgfmt(dbp->env, p->pgno));
232 	}
233 
234 	if (*putp == 1) {
235 		if ((ret = __db_free(dbc, p, 0)) != 0)
236 			return (ret);
237 	} else {
238 		if ((ret = __memp_fput(mpf, dbc->thread_info, p,
239 		    dbc->priority)) != 0)
240 			return (ret);
241 		*putp = 1;
242 	}
243 
244 	return (0);
245 }
246