1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998, 1999, 2000
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9  *	Keith Bostic.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994, 1995
13  *	The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Mike Olson.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 #include "config.h"
44 
45 #ifndef lint
46 static const char revid[] = "$Id: db_overflow.c,v 1.5 2000/06/28 16:47:01 loic Exp $";
47 #endif /* not lint */
48 
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
51 
52 #include <errno.h>
53 #include <string.h>
54 #endif
55 
56 #include "db_int.h"
57 #include "db_page.h"
58 #include "db_am.h"
59 #include "db_verify.h"
60 
61 /*
62  * Big key/data code.
63  *
64  * Big key and data entries are stored on linked lists of pages.  The initial
65  * reference is a structure with the total length of the item and the page
66  * number where it begins.  Each entry in the linked list contains a pointer
67  * to the next page of data, and so on.
68  */
69 
70 /*
71  * CDB___db_goff --
72  *	Get an offpage item.
73  *
74  * PUBLIC: int CDB___db_goff __P((DB *, DBT *,
75  * PUBLIC:     u_int32_t, db_pgno_t, void **, u_int32_t *));
76  */
77 int
CDB___db_goff(dbp,dbt,tlen,pgno,bpp,bpsz)78 CDB___db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
79 	DB *dbp;
80 	DBT *dbt;
81 	u_int32_t tlen;
82 	db_pgno_t pgno;
83 	void **bpp;
84 	u_int32_t *bpsz;
85 {
86 	DB_ENV *dbenv;
87 	PAGE *h;
88 	db_indx_t bytes;
89 	u_int32_t curoff, needed, start;
90 	u_int8_t *p, *src;
91 	int ret;
92 
93 	dbenv = dbp->dbenv;
94 
95 	/*
96 	 * Check if the buffer is big enough; if it is not and we are
97 	 * allowed to malloc space, then we'll malloc it.  If we are
98 	 * not (DB_DBT_USERMEM), then we'll set the dbt and return
99 	 * appropriately.
100 	 */
101 	if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
102 		start = dbt->doff;
103 		needed = dbt->dlen;
104 	} else {
105 		start = 0;
106 		needed = tlen;
107 	}
108 
109 	/* Allocate any necessary memory. */
110 	if (F_ISSET(dbt, DB_DBT_USERMEM)) {
111 		if (needed > dbt->ulen) {
112 			dbt->size = needed;
113 			return (ENOMEM);
114 		}
115 	} else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
116 		if ((ret = CDB___os_malloc(dbenv,
117 		    needed, dbp->db_malloc, &dbt->data)) != 0)
118 			return (ret);
119 	} else if (F_ISSET(dbt, DB_DBT_REALLOC)) {
120 		if ((ret = CDB___os_realloc(dbenv,
121 		    needed, dbp->db_realloc, &dbt->data)) != 0)
122 			return (ret);
123 	} else if (*bpsz == 0 || *bpsz < needed) {
124 		if ((ret = CDB___os_realloc(dbenv, needed, NULL, bpp)) != 0)
125 			return (ret);
126 		*bpsz = needed;
127 		dbt->data = *bpp;
128 	} else
129 		dbt->data = *bpp;
130 
131 	/*
132 	 * Step through the linked list of pages, copying the data on each
133 	 * one into the buffer.  Never copy more than the total data length.
134 	 */
135 	dbt->size = needed;
136 	for (curoff = 0, p = dbt->data; pgno != PGNO_INVALID && needed > 0;) {
137 		if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
138 			(void)CDB___db_pgerr(dbp, pgno);
139 			return (ret);
140 		}
141 		/* Check if we need any bytes from this page. */
142 		if (curoff + OV_LEN(h) >= start) {
143 			src = (u_int8_t *)h + P_OVERHEAD;
144 			bytes = OV_LEN(h);
145 			if (start > curoff) {
146 				src += start - curoff;
147 				bytes -= start - curoff;
148 			}
149 			if (bytes > needed)
150 				bytes = needed;
151 			memcpy(p, src, bytes);
152 			p += bytes;
153 			needed -= bytes;
154 		}
155 		curoff += OV_LEN(h);
156 		pgno = h->next_pgno;
157 		CDB_memp_fput(dbp->mpf, h, 0);
158 	}
159 	return (0);
160 }
161 
162 /*
163  * CDB___db_poff --
164  *	Put an offpage item.
165  *
166  * PUBLIC: int CDB___db_poff __P((DBC *, const DBT *, db_pgno_t *));
167  */
168 int
CDB___db_poff(dbc,dbt,pgnop)169 CDB___db_poff(dbc, dbt, pgnop)
170 	DBC *dbc;
171 	const DBT *dbt;
172 	db_pgno_t *pgnop;
173 {
174 	DB *dbp;
175 	PAGE *pagep, *lastp;
176 	DB_LSN new_lsn, null_lsn;
177 	DBT tmp_dbt;
178 	db_indx_t pagespace;
179 	u_int32_t sz;
180 	u_int8_t *p;
181 	int ret;
182 
183 	/*
184 	 * Allocate pages and copy the key/data item into them.  Calculate the
185 	 * number of bytes we get for pages we fill completely with a single
186 	 * item.
187 	 */
188 	dbp = dbc->dbp;
189 	pagespace = P_MAXSPACE(dbp->pgsize);
190 
191 	lastp = NULL;
192 	for (p = dbt->data,
193 	    sz = dbt->size; sz > 0; p += pagespace, sz -= pagespace) {
194 		/*
195 		 * Reduce pagespace so we terminate the loop correctly and
196 		 * don't copy too much data.
197 		 */
198 		if (sz < pagespace)
199 			pagespace = sz;
200 
201 		/*
202 		 * Allocate and initialize a new page and copy all or part of
203 		 * the item onto the page.  If sz is less than pagespace, we
204 		 * have a partial record.
205 		 */
206 		if ((ret = CDB___db_new(dbc, (P_OVERFLOW | dbp->tags), &pagep)) != 0)
207 			return (ret);
208 		if (DB_LOGGING(dbc)) {
209 			tmp_dbt.data = p;
210 			tmp_dbt.size = pagespace;
211 			ZERO_LSN(null_lsn);
212 			if ((ret = CDB___db_big_log(dbp->dbenv, dbc->txn,
213 			    &new_lsn, 0, DB_ADD_BIG, dbp->log_fileid,
214 			    PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID,
215 			    PGNO_INVALID, &tmp_dbt, &LSN(pagep),
216 			    lastp == NULL ? &null_lsn : &LSN(lastp),
217 			    &null_lsn)) != 0)
218 				return (ret);
219 
220 			/* Move lsn onto page. */
221 			if (lastp)
222 				LSN(lastp) = new_lsn;
223 			LSN(pagep) = new_lsn;
224 		}
225 
226 		P_INIT(pagep, dbp->pgsize,
227 		    PGNO(pagep), PGNO_INVALID, PGNO_INVALID, 0, P_OVERFLOW, dbp->tags);
228 		OV_LEN(pagep) = pagespace;
229 		OV_REF(pagep) = 1;
230 		memcpy((u_int8_t *)pagep + P_OVERHEAD, p, pagespace);
231 
232 		/*
233 		 * If this is the first entry, update the user's info.
234 		 * Otherwise, update the entry on the last page filled
235 		 * in and release that page.
236 		 */
237 		if (lastp == NULL)
238 			*pgnop = PGNO(pagep);
239 		else {
240 			lastp->next_pgno = PGNO(pagep);
241 			pagep->prev_pgno = PGNO(lastp);
242 			(void)CDB_memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
243 		}
244 		lastp = pagep;
245 	}
246 	(void)CDB_memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
247 	return (0);
248 }
249 
250 /*
251  * CDB___db_ovref --
252  *	Increment/decrement the reference count on an overflow page.
253  *
254  * PUBLIC: int CDB___db_ovref __P((DBC *, db_pgno_t, int32_t));
255  */
256 int
CDB___db_ovref(dbc,pgno,adjust)257 CDB___db_ovref(dbc, pgno, adjust)
258 	DBC *dbc;
259 	db_pgno_t pgno;
260 	int32_t adjust;
261 {
262 	DB *dbp;
263 	PAGE *h;
264 	int ret;
265 
266 	dbp = dbc->dbp;
267 	if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
268 		(void)CDB___db_pgerr(dbp, pgno);
269 		return (ret);
270 	}
271 
272 	if (DB_LOGGING(dbc))
273 		if ((ret = CDB___db_ovref_log(dbp->dbenv, dbc->txn,
274 		    &LSN(h), 0, dbp->log_fileid, h->pgno, adjust,
275 		    &LSN(h))) != 0)
276 			return (ret);
277 	OV_REF(h) += adjust;
278 
279 	(void)CDB_memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
280 	return (0);
281 }
282 
283 /*
284  * CDB___db_doff --
285  *	Delete an offpage chain of overflow pages.
286  *
287  * PUBLIC: int CDB___db_doff __P((DBC *, db_pgno_t));
288  */
289 int
CDB___db_doff(dbc,pgno)290 CDB___db_doff(dbc, pgno)
291 	DBC *dbc;
292 	db_pgno_t pgno;
293 {
294 	DB *dbp;
295 	PAGE *pagep;
296 	DB_LSN null_lsn;
297 	DBT tmp_dbt;
298 	int ret;
299 
300 	dbp = dbc->dbp;
301 	do {
302 		if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
303 			(void)CDB___db_pgerr(dbp, pgno);
304 			return (ret);
305 		}
306 
307 		/*
308 		 * If it's an overflow page and it's referenced by more than
309 		 * one key/data item, decrement the reference count and return.
310 		 */
311 		if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) {
312 			(void)CDB_memp_fput(dbp->mpf, pagep, 0);
313 			return (CDB___db_ovref(dbc, pgno, -1));
314 		}
315 
316 		if (DB_LOGGING(dbc)) {
317 			tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD;
318 			tmp_dbt.size = OV_LEN(pagep);
319 			ZERO_LSN(null_lsn);
320 			if ((ret = CDB___db_big_log(dbp->dbenv, dbc->txn,
321 			    &LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid,
322 			    PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep),
323 			    &tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0)
324 				return (ret);
325 		}
326 		pgno = pagep->next_pgno;
327 		if ((ret = CDB___db_free(dbc, pagep)) != 0)
328 			return (ret);
329 	} while (pgno != PGNO_INVALID);
330 
331 	return (0);
332 }
333 
334 /*
335  * CDB___db_moff --
336  *	Match on overflow pages.
337  *
338  * Given a starting page number and a key, return <0, 0, >0 to indicate if the
339  * key on the page is less than, equal to or greater than the key specified.
340  * We optimize this by doing chunk at a time comparison unless the user has
341  * specified a comparison function.  In this case, we need to materialize
342  * the entire object and call their comparison routine.
343  *
344  * PUBLIC: int CDB___db_moff __P((DB *, const DBT *, db_pgno_t, u_int32_t,
345  * PUBLIC:     int (*)(const DBT *, const DBT *), int *));
346  */
347 int
CDB___db_moff(dbp,dbt,pgno,tlen,cmpfunc,cmpp)348 CDB___db_moff(dbp, dbt, pgno, tlen, cmpfunc, cmpp)
349 	DB *dbp;
350 	const DBT *dbt;
351 	db_pgno_t pgno;
352 	u_int32_t tlen;
353 	int (*cmpfunc) __P((const DBT *, const DBT *));
354 	int *cmpp;
355 {
356 	PAGE *pagep;
357 	DBT local_dbt;
358 	void *buf;
359 	u_int32_t bufsize, cmp_bytes, key_left;
360 	u_int8_t *p1, *p2;
361 	int ret;
362 
363 	/*
364 	 * If there is a user-specified comparison function, build a
365 	 * contiguous copy of the key, and call it.
366 	 */
367 	if (cmpfunc != NULL) {
368 		memset(&local_dbt, 0, sizeof(local_dbt));
369 		buf = NULL;
370 		bufsize = 0;
371 
372 		if ((ret = CDB___db_goff(dbp,
373 		    &local_dbt, tlen, pgno, &buf, &bufsize)) != 0)
374 			return (ret);
375 		/* Pass the key as the first argument */
376 		*cmpp = cmpfunc(dbt, &local_dbt);
377 		CDB___os_free(buf, bufsize);
378 		return (0);
379 	}
380 
381 	/* While there are both keys to compare. */
382 	for (*cmpp = 0, p1 = dbt->data,
383 	    key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
384 		if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0)
385 			return (ret);
386 
387 		cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
388 		tlen -= cmp_bytes;
389 		key_left -= cmp_bytes;
390 		for (p2 =
391 		    (u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2)
392 			if (*p1 != *p2) {
393 				*cmpp = (long)*p1 - (long)*p2;
394 				break;
395 			}
396 		pgno = NEXT_PGNO(pagep);
397 		if ((ret = CDB_memp_fput(dbp->mpf, pagep, 0)) != 0)
398 			return (ret);
399 		if (*cmpp != 0)
400 			return (0);
401 	}
402 	if (key_left > 0)		/* DBT is longer than the page key. */
403 		*cmpp = 1;
404 	else if (tlen > 0)		/* DBT is shorter than the page key. */
405 		*cmpp = -1;
406 	else
407 		*cmpp = 0;
408 
409 	return (0);
410 }
411 
412 /*
413  * CDB___db_vrfy_overflow --
414  *	Verify overflow page.
415  *
416  * PUBLIC: int CDB___db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
417  * PUBLIC:     u_int32_t));
418  */
419 int
CDB___db_vrfy_overflow(dbp,vdp,h,pgno,flags)420 CDB___db_vrfy_overflow(dbp, vdp, h, pgno, flags)
421 	DB *dbp;
422 	VRFY_DBINFO *vdp;
423 	PAGE *h;
424 	db_pgno_t pgno;
425 	u_int32_t flags;
426 {
427 	VRFY_PAGEINFO *pip;
428 	int isbad, ret, t_ret;
429 
430 	isbad = 0;
431 	if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
432 		return (ret);
433 
434 	if ((ret = CDB___db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
435 		if (ret == DB_VERIFY_BAD)
436 			isbad = 1;
437 		else
438 			goto err;
439 	}
440 
441 	pip->refcount = OV_REF(h);
442 	if (pip->refcount < 1) {
443 		EPRINT((dbp->dbenv, "Overflow page %lu has zero reference count",
444 		    pgno));
445 		isbad = 1;
446 	}
447 
448 	/* Just store for now. */
449 	pip->olen = HOFFSET(h);
450 
451 err:	if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
452 		ret = t_ret;
453 	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
454 }
455 
456 /*
457  * CDB___db_vrfy_ovfl_structure --
458  *	Walk a list of overflow pages, avoiding cycles and marking
459  *	pages seen.
460  *
461  * PUBLIC: int CDB___db_vrfy_ovfl_structure
462  * PUBLIC:     __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t));
463  */
464 int
CDB___db_vrfy_ovfl_structure(dbp,vdp,pgno,tlen,flags)465 CDB___db_vrfy_ovfl_structure(dbp, vdp, pgno, tlen, flags)
466 	DB *dbp;
467 	VRFY_DBINFO *vdp;
468 	db_pgno_t pgno;
469 	u_int32_t tlen;
470 	u_int32_t flags;
471 {
472 	DB *pgset;
473 	VRFY_PAGEINFO *pip;
474 	db_pgno_t next, prev;
475 	int isbad, p, ret, t_ret;
476 	u_int32_t refcount;
477 
478 	pgset = vdp->pgset;
479 	DB_ASSERT(pgset != NULL);
480 	isbad = 0;
481 
482 	/* This shouldn't happen, but just to be sure. */
483 	if (!IS_VALID_PGNO(pgno))
484 		return (DB_VERIFY_BAD);
485 
486 	/*
487 	 * Check the first prev_pgno;  it ought to be PGNO_INVALID,
488 	 * since there's no prev page.
489 	 */
490 	if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
491 		return (ret);
492 
493 	/* The refcount is stored on the first overflow page. */
494 	refcount = pip->refcount;
495 
496 	if (pip->type != P_OVERFLOW) {
497 		EPRINT((dbp->dbenv,
498 		    "Overflow page %lu of invalid type", pgno, pip->type));
499 		ret = DB_VERIFY_BAD;
500 		goto err;		/* Unsafe to continue. */
501 	}
502 
503 	prev = pip->prev_pgno;
504 	if (prev != PGNO_INVALID) {
505 		EPRINT((dbp->dbenv,
506 		    "First overflow page %lu has a prev_pgno", pgno));
507 		isbad = 1;
508 	}
509 
510 	for (;;) {
511 		/*
512 		 * This is slightly gross.  Btree leaf pages reference
513 		 * individual overflow trees multiple times if the overflow page
514 		 * is the key to a duplicate set.  The reference count does not
515 		 * reflect this multiple referencing.  Thus, if this is called
516 		 * during the structure verification of a btree leaf page, we
517 		 * check to see whether we've seen it from a leaf page before
518 		 * and, if we have, adjust our count of how often we've seen it
519 		 * accordingly.
520 		 *
521 		 * (This will screw up if it's actually referenced--and
522 		 * correctly refcounted--from two different leaf pages, but
523 		 * that's a very unlikely brokenness that we're not checking for
524 		 * anyway.)
525 		 */
526 
527 		if (LF_ISSET(ST_OVFL_LEAF)) {
528 			if (F_ISSET(pip, VRFY_OVFL_LEAFSEEN)) {
529 				if ((ret =
530 				    CDB___db_vrfy_pgset_dec(pgset, pgno)) != 0)
531 					goto err;
532 			} else
533 				F_SET(pip, VRFY_OVFL_LEAFSEEN);
534 		}
535 
536 		if ((ret = CDB___db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
537 			goto err;
538 
539 		/*
540 		 * We may have seen this elsewhere, if the overflow entry
541 		 * has been promoted to an internal page.
542 		 */
543 		if ((u_int32_t)p > refcount) {
544 			EPRINT((dbp->dbenv,
545 			    "Page %lu encountered twice in overflow traversal",
546 			    pgno));
547 			ret = DB_VERIFY_BAD;
548 			goto err;
549 		}
550 		if ((ret = CDB___db_vrfy_pgset_inc(pgset, pgno)) != 0)
551 			goto err;
552 
553 		/* Keep a running tab on how much of the item we've seen. */
554 		tlen -= pip->olen;
555 
556 		next = pip->next_pgno;
557 
558 		/* Are we there yet? */
559 		if (next == PGNO_INVALID)
560 			break;
561 
562 		/*
563 		 * We've already checked this when we saved it, but just
564 		 * to be sure...
565 		 */
566 		if (!IS_VALID_PGNO(next)) {
567 			DB_ASSERT(0);
568 			EPRINT((dbp->dbenv,
569 			    "Overflow page %lu has bad next_pgno",
570 			    pgno));
571 			ret = DB_VERIFY_BAD;
572 			goto err;
573 		}
574 
575 		if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 ||
576 		    (ret = CDB___db_vrfy_getpageinfo(vdp, next, &pip)) != 0)
577 			return (ret);
578 		if (pip->prev_pgno != pgno) {
579 			EPRINT((dbp->dbenv,
580 			    "Overflow page %lu has bogus prev_pgno value",
581 			    next));
582 			isbad = 1;
583 			/*
584 			 * It's safe to continue because we have separate
585 			 * cycle detection.
586 			 */
587 		}
588 
589 		pgno = next;
590 	}
591 
592 	if (tlen > 0) {
593 		isbad = 1;
594 		EPRINT((dbp->dbenv,
595 		    "Overflow item incomplete on page %lu", pgno));
596 	}
597 
598 err:	if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
599 		ret = t_ret;
600 	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
601 }
602 
603 /*
604  * CDB___db_safe_goff --
605  *	Get an overflow item, very carefully, from an untrusted database,
606  *	in the context of the salvager.
607  *
608  * PUBLIC: int CDB___db_safe_goff __P((DB *, VRFY_DBINFO *, db_pgno_t,
609  * PUBLIC:     DBT *, void **, u_int32_t));
610  */
611 int
CDB___db_safe_goff(dbp,vdp,pgno,dbt,buf,flags)612 CDB___db_safe_goff(dbp, vdp, pgno, dbt, buf, flags)
613 	DB *dbp;
614 	VRFY_DBINFO *vdp;
615 	db_pgno_t pgno;
616 	DBT *dbt;
617 	void **buf;
618 	u_int32_t flags;
619 {
620 	PAGE *h;
621 	int ret, err_ret;
622 	u_int32_t bytesgot, bytes;
623 	u_int8_t *src, *dest;
624 
625 	ret = DB_VERIFY_BAD;
626 	err_ret = 0;
627 	bytesgot = bytes = 0;
628 
629 	while ((pgno != PGNO_INVALID) && (IS_VALID_PGNO(pgno))) {
630 		/*
631 		 * Mark that we're looking at this page;  if we've seen it
632 		 * already, quit.
633 		 */
634 		if ((ret = CDB___db_salvage_markdone(vdp, pgno)) != 0)
635 			break;
636 
637 		if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
638 			break;
639 
640 		/*
641 		 * Make sure it's really an overflow page, unless we're
642 		 * being aggressive, in which case we pretend it is.
643 		 */
644 		if (!LF_ISSET(DB_AGGRESSIVE) && TYPE(h) != P_OVERFLOW) {
645 			ret = DB_VERIFY_BAD;
646 			break;
647 		}
648 
649 		src = (u_int8_t *)h + P_OVERHEAD;
650 		bytes = OV_LEN(h);
651 
652 		if (bytes + P_OVERHEAD > dbp->pgsize)
653 			bytes = dbp->pgsize - P_OVERHEAD;
654 
655 		if ((ret = CDB___os_realloc(dbp->dbenv,
656 		    bytesgot + bytes, 0, buf)) != 0)
657 			break;
658 
659 		dest = (u_int8_t *)*buf + bytesgot;
660 		bytesgot += bytes;
661 
662 		memcpy(dest, src, bytes);
663 
664 		pgno = NEXT_PGNO(h);
665 		/* Not much we can do here--we don't want to quit. */
666 		if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
667 			err_ret = ret;
668 	}
669 
670 	if (ret == 0) {
671 		dbt->size = bytesgot;
672 		dbt->data = *buf;
673 	}
674 
675 	return ((err_ret != 0 && ret == 0) ? err_ret : ret);
676 }
677