1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2000, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/db_swap.h"
14 #include "dbinc/db_verify.h"
15 #include "dbinc/btree.h"
16 #include "dbinc/fop.h"
17 #include "dbinc/hash.h"
18 #include "dbinc/heap.h"
19 #include "dbinc/lock.h"
20 #include "dbinc/mp.h"
21 #include "dbinc/qam.h"
22 #include "dbinc/txn.h"
23 
24 /*
25  * This is the code for DB->verify, the DB database consistency checker.
26  * For now, it checks all subdatabases in a database, and verifies
27  * everything it knows how to (i.e. it's all-or-nothing, and one can't
28  * check only for a subset of possible problems).
29  */
30 
31 static u_int __db_guesspgsize __P((ENV *, DB_FH *));
32 static int   __db_is_valid_magicno __P((u_int32_t, DBTYPE *));
33 static int   __db_meta2pgset
34 		__P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *));
35 static int   __db_salvage __P((DB *, VRFY_DBINFO *,
36 		db_pgno_t, void *, int (*)(void *, const void *), u_int32_t));
37 static int   __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *,
38 		PAGE *, void *, int (*)(void *, const void *), u_int32_t));
39 static int   __db_salvage_all __P((DB *, VRFY_DBINFO *, void *,
40 		int(*)(void *, const void *), u_int32_t, int *));
41 static int   __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *,
42 		int (*)(void *, const void *), u_int32_t));
43 static int   __db_verify_arg __P((DB *, const char *, void *, u_int32_t));
44 static int   __db_vrfy_freelist
45 		__P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
46 static int   __db_vrfy_getpagezero
47 		__P((DB *, DB_FH *, const char *, u_int8_t *, u_int32_t));
48 static int   __db_vrfy_invalid
49 		__P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
50 static int   __db_vrfy_orderchkonly __P((DB *,
51 		VRFY_DBINFO *, const char *, const char *, u_int32_t));
52 static int   __db_vrfy_pagezero __P((DB *,
53 		VRFY_DBINFO *, DB_FH *, const char *, u_int32_t));
54 static int   __db_vrfy_subdbs
55 		__P((DB *, VRFY_DBINFO *, const char *, u_int32_t));
56 static int   __db_vrfy_structure __P((DB *, VRFY_DBINFO *,
57 		const char *, db_pgno_t, void *, void *, u_int32_t));
58 static int   __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *,
59 		void *, int (*)(void *, const void *), u_int32_t));
60 
61 #define	VERIFY_FLAGS							\
62     (DB_AGGRESSIVE |							\
63      DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF)
64 
65 /*
66  * __db_verify_pp --
67  *	DB->verify public interface.
68  *
69  * PUBLIC: int __db_verify_pp
70  * PUBLIC:     __P((DB *, const char *, const char *, FILE *, u_int32_t));
71  */
72 int
__db_verify_pp(dbp,file,database,outfile,flags)73 __db_verify_pp(dbp, file, database, outfile, flags)
74 	DB *dbp;
75 	const char *file, *database;
76 	FILE *outfile;
77 	u_int32_t flags;
78 {
79 	/*
80 	 * __db_verify_pp is a wrapper to __db_verify_internal, which lets
81 	 * us pass appropriate equivalents to FILE * in from the non-C APIs.
82 	 * That's why the usual ENV_ENTER macros are in __db_verify_internal,
83 	 * not here.
84 	 */
85 	return (__db_verify_internal(dbp,
86 	    file, database, outfile, __db_pr_callback, flags));
87 }
88 
89 /*
90  * __db_verify_internal --
91  *
92  * PUBLIC: int __db_verify_internal __P((DB *, const char *,
93  * PUBLIC:     const char *, void *, int (*)(void *, const void *), u_int32_t));
94  */
95 int
__db_verify_internal(dbp,fname,dname,handle,callback,flags)96 __db_verify_internal(dbp, fname, dname, handle, callback, flags)
97 	DB *dbp;
98 	const char *fname, *dname;
99 	void *handle;
100 	int (*callback) __P((void *, const void *));
101 	u_int32_t flags;
102 {
103 	DB_THREAD_INFO *ip;
104 	ENV *env;
105 	int ret, t_ret;
106 
107 	env = dbp->env;
108 
109 	DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify");
110 
111 	if (!LF_ISSET(DB_SALVAGE))
112 		LF_SET(DB_UNREF);
113 
114 	ENV_ENTER(env, ip);
115 
116 	if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0)
117 		ret = __db_verify(dbp, ip,
118 		     fname, dname, handle, callback, NULL, NULL, flags);
119 
120 	/* Db.verify is a DB handle destructor. */
121 	if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
122 		ret = t_ret;
123 
124 	ENV_LEAVE(env, ip);
125 	return (ret);
126 }
127 
128 /*
129  * __db_verify_arg --
130  *	Check DB->verify arguments.
131  */
132 static int
__db_verify_arg(dbp,dname,handle,flags)133 __db_verify_arg(dbp, dname, handle, flags)
134 	DB *dbp;
135 	const char *dname;
136 	void *handle;
137 	u_int32_t flags;
138 {
139 	ENV *env;
140 	int ret;
141 
142 	env = dbp->env;
143 
144 	if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0)
145 		return (ret);
146 
147 	/*
148 	 * DB_SALVAGE is mutually exclusive with the other flags except
149 	 * DB_AGGRESSIVE, DB_PRINTABLE.
150 	 *
151 	 * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging.
152 	 *
153 	 * DB_SALVAGE requires an output stream.
154 	 */
155 	if (LF_ISSET(DB_SALVAGE)) {
156 		if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE)))
157 			return (__db_ferr(env, "DB->verify", 1));
158 		if (handle == NULL) {
159 			__db_errx(env, DB_STR("0518",
160 			    "DB_SALVAGE requires a an output handle"));
161 			return (EINVAL);
162 		}
163 	} else
164 		if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE))
165 			return (__db_ferr(env, "DB->verify", 1));
166 
167 	/*
168 	 * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and
169 	 * DB_NOORDERCHK, and requires a database name.
170 	 */
171 	if ((ret = __db_fcchk(env, "DB->verify", flags,
172 	    DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0)
173 		return (ret);
174 	if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) {
175 		__db_errx(env, DB_STR("0519",
176 		    "DB_ORDERCHKONLY requires a database name"));
177 		return (EINVAL);
178 	}
179 	return (0);
180 }
181 
182 /*
183  * __db_verify --
184  *	Walk the entire file page-by-page, either verifying with or without
185  *	dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data
186  *	pairs can be found and dumping them in standard (db_load-ready)
187  *	dump format.
188  *
189  *	(Salvaging isn't really a verification operation, but we put it
190  *	here anyway because it requires essentially identical top-level
191  *	code.)
192  *
193  *	flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE
194  *	(and optionally DB_AGGRESSIVE).
195  * PUBLIC: int   __db_verify __P((DB *, DB_THREAD_INFO *, const char *,
196  * PUBLIC:		const char *, void *, int (*)(void *, const void *),
197  * PUBLIC:		void *, void *, u_int32_t));
198  */
199 int
__db_verify(dbp,ip,name,subdb,handle,callback,lp,rp,flags)200 __db_verify(dbp, ip, name, subdb, handle, callback, lp, rp, flags)
201 	DB *dbp;
202 	DB_THREAD_INFO *ip;
203 	const char *name, *subdb;
204 	void *handle;
205 	int (*callback) __P((void *, const void *));
206 	void *lp, *rp;
207 	u_int32_t flags;
208 {
209 	DB_FH *fhp;
210 	ENV *env;
211 	VRFY_DBINFO *vdp;
212 	u_int32_t sflags;
213 	int has_subdbs, isbad, ret, t_ret;
214 	char *real_name;
215 
216 	env = dbp->env;
217 	fhp = NULL;
218 	vdp = NULL;
219 	real_name = NULL;
220 	has_subdbs = isbad = ret = t_ret = 0;
221 
222 	F_SET(dbp, DB_AM_VERIFYING);
223 
224 	/* Initialize any feedback function. */
225 	if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
226 		dbp->db_feedback(dbp, DB_VERIFY, 0);
227 
228 	/*
229 	 * We don't know how large the cache is, and if the database
230 	 * in question uses a small page size--which we don't know
231 	 * yet!--it may be uncomfortably small for the default page
232 	 * size [#2143].  However, the things we need temporary
233 	 * databases for in dbinfo are largely tiny, so using a
234 	 * 1024-byte pagesize is probably not going to be a big hit,
235 	 * and will make us fit better into small spaces.
236 	 */
237 	if ((ret = __db_vrfy_dbinfo_create(env, ip,  1024, &vdp)) != 0)
238 		goto err;
239 
240 	/*
241 	 * Note whether the user has requested that we use printable
242 	 * chars where possible.  We won't get here with this flag if
243 	 * we're not salvaging.
244 	 */
245 	if (LF_ISSET(DB_PRINTABLE))
246 		F_SET(vdp, SALVAGE_PRINTABLE);
247 
248 	if (name != NULL) {
249 		/* Find the real name of the file. */
250 		if ((ret = __db_appname(env,
251 		    DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0)
252 			goto err;
253 
254 		/*
255 		 * Our first order of business is to verify page 0, which is the
256 		 * metadata page for the master database of subdatabases or of
257 		 * the only database in the file.  We want to do this by hand
258 		 * rather than just calling __db_open in case it's
259 		 * corrupt--various things in __db_open might act funny.
260 		 *
261 		 * Once we know the metadata page is healthy, I believe that
262 		 * it's safe to open the database normally and then use the page
263 		 * swapping code, which makes life easier.
264 		 */
265 		if ((ret = __os_open(env,
266 		    real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0)
267 			goto err;
268 	} else {
269 		MAKE_INMEM(dbp);
270 	}
271 
272 	/* Verify the metadata page 0; set pagesize and type. */
273 	if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, subdb, flags)) != 0) {
274 		if (ret == DB_VERIFY_BAD)
275 			isbad = 1;
276 		else
277 			goto err;
278 	}
279 
280 	/*
281 	 * We can assume at this point that dbp->pagesize and dbp->type are
282 	 * set correctly, or at least as well as they can be, and that
283 	 * locking, logging, and txns are not in use.  Thus we can trust
284 	 * the memp code not to look at the page, and thus to be safe
285 	 * enough to use.
286 	 *
287 	 * The dbp is not open, but the file is open in the fhp, and we
288 	 * cannot assume that __db_open is safe.  Call __env_setup,
289 	 * the [safe] part of __db_open that initializes the environment--
290 	 * and the mpool--manually.
291 	 */
292 	if ((ret = __env_setup(dbp, NULL,
293 	    name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0)
294 		goto err;
295 
296 	/*
297 	 * Set our name in the Queue subsystem;  we may need it later
298 	 * to deal with extents.  In-memory databases are not allowed to have
299 	 * extents.
300 	 */
301 	if (dbp->type == DB_QUEUE && name != NULL &&
302 	    (ret = __qam_set_ext_data(dbp, name)) != 0)
303 		goto err;
304 
305 	/* Mark the dbp as opened, so that we correctly handle its close. */
306 	F_SET(dbp, DB_AM_OPEN_CALLED);
307 
308 	/*
309 	 * Find out the page number of the last page in the database.  We'll
310 	 * use this later to verify the metadata page.  We don't verify now
311 	 * because the data from __db_vrfy_pagezero could be stale.
312 	 */
313 	if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0)
314 		goto err;
315 	/*
316 	 * DB_ORDERCHKONLY is a special case;  our file consists of
317 	 * several subdatabases, which use different hash, bt_compare,
318 	 * and/or dup_compare functions.  Consequently, we couldn't verify
319 	 * sorting and hashing simply by calling DB->verify() on the file.
320 	 * DB_ORDERCHKONLY allows us to come back and check those things;  it
321 	 * requires a subdatabase, and assumes that everything but that
322 	 * database's sorting/hashing is correct.
323 	 */
324 	if (LF_ISSET(DB_ORDERCHKONLY)) {
325 		ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags);
326 		goto done;
327 	}
328 
329 	sflags = flags;
330 	if (dbp->p_internal != NULL)
331 		LF_CLR(DB_SALVAGE);
332 
333 	/*
334 	 * When salvaging, we use a db to keep track of whether we've seen a
335 	 * given overflow or dup page in the course of traversing normal data.
336 	 * If in the end we have not, we assume its key got lost and print it
337 	 * with key "UNKNOWN".
338 	 */
339 	if (LF_ISSET(DB_SALVAGE)) {
340 		if ((ret = __db_salvage_init(vdp)) != 0)
341 			goto err;
342 
343 		/*
344 		 * If we're not being aggressive, salvage by walking the tree
345 		 * and only printing the leaves we find.  "has_subdbs" will
346 		 * indicate whether we found subdatabases.
347 		 */
348 		if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_all(
349 		    dbp, vdp, handle, callback, flags, &has_subdbs) != 0)
350 			isbad = 1;
351 
352 		/*
353 		 * If we have subdatabases, flag if any keys are found that
354 		 * don't belong to a subdatabase -- they'll need to have an
355 		 * "__OTHER__" subdatabase header printed first.
356 		 */
357 		if (has_subdbs) {
358 			F_SET(vdp, SALVAGE_PRINTHEADER);
359 			F_SET(vdp, SALVAGE_HASSUBDBS);
360 		}
361 	}
362 
363 	/* Walk all the pages, if a page cannot be read, verify structure. */
364 	if ((ret =
365 	    __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) {
366 		if (ret == DB_VERIFY_BAD)
367 			isbad = 1;
368 		else if (ret != DB_PAGE_NOTFOUND)
369 			goto err;
370 	}
371 
372 	/* If we're verifying, verify inter-page structure. */
373 	if (!LF_ISSET(DB_SALVAGE) && isbad == 0)
374 		if ((t_ret = __db_vrfy_structure(dbp,
375 		    vdp, name, 0, lp, rp, flags)) != 0) {
376 			if (t_ret == DB_VERIFY_BAD)
377 				isbad = 1;
378 			else
379 				goto err;
380 		}
381 
382 	/*
383 	 * If we're salvaging, output with key UNKNOWN any overflow or dup pages
384 	 * we haven't been able to put in context.  Then destroy the salvager's
385 	 * state-saving database.
386 	 */
387 	if (LF_ISSET(DB_SALVAGE)) {
388 		if ((ret = __db_salvage_unknowns(dbp,
389 		    vdp, handle, callback, flags)) != 0)
390 			isbad = 1;
391 	}
392 
393 	flags = sflags;
394 
395 #ifdef HAVE_PARTITION
396 	if (t_ret == 0 && dbp->p_internal != NULL)
397 		t_ret = __part_verify(dbp, vdp, name, handle, callback, flags);
398 #endif
399 
400 	if (ret == 0)
401 		ret = t_ret;
402 
403 	/* Don't display a footer for a database holding other databases. */
404 	if (LF_ISSET(DB_SALVAGE | DB_VERIFY_PARTITION) == DB_SALVAGE &&
405 	    (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER)))
406 		(void)__db_prfooter(handle, callback);
407 
408 done: err:
409 	/* Send feedback that we're done. */
410 	if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
411 		dbp->db_feedback(dbp, DB_VERIFY, 100);
412 
413 	if (LF_ISSET(DB_SALVAGE) &&
414 	    (t_ret = __db_salvage_destroy(vdp)) != 0 && ret == 0)
415 		ret = t_ret;
416 	if (fhp != NULL &&
417 	    (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
418 		ret = t_ret;
419 	if (vdp != NULL &&
420 	    (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0)
421 		ret = t_ret;
422 	if (real_name != NULL)
423 		__os_free(env, real_name);
424 
425 	/*
426 	 * DB_VERIFY_FATAL is a private error, translate to a public one.
427 	 *
428 	 * If we didn't find a page, it's probably a page number was corrupted.
429 	 * Return the standard corruption error.
430 	 *
431 	 * Otherwise, if we found corruption along the way, set the return.
432 	 */
433 	if (ret == DB_VERIFY_FATAL ||
434 	    ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1))
435 		ret = DB_VERIFY_BAD;
436 
437 	/* Make sure there's a public complaint if we found corruption. */
438 	if (ret != 0)
439 		__db_err(env, ret, "%s", name);
440 
441 	return (ret);
442 }
443 
444 /*
445  * __db_vrfy_getpagezero --
446  *      Store the master metadata page into a local buffer.  For safety, skip
447  *      the DB paging code and read the page directly from disk (via seek and
448  *      read) or the mpool.
449  */
450 static int
__db_vrfy_getpagezero(dbp,fhp,name,mbuf,flags)451 __db_vrfy_getpagezero(dbp, fhp, name, mbuf, flags)
452 	DB *dbp;
453 	DB_FH *fhp;
454 	const char *name;
455 	u_int8_t *mbuf;
456 	u_int32_t flags;
457 {
458 	DB_MPOOLFILE *mpf;
459 	ENV *env;
460 	PAGE *h;
461 	db_pgno_t pgno;
462 	int ret, t_ret;
463 	size_t nr;
464 
465 	env = dbp->env;
466 
467 	if (F_ISSET(dbp, DB_AM_INMEM)) {
468 		/*
469 		 * Now get the metadata page from the cache, if possible.  If
470 		 * we're verifying an in-memory db, this is the only metadata
471 		 * page we have.
472 		 *
473 		 *
474 		 * Open the in-memory db file and get the metadata page.
475 		 */
476 		if ((ret = __memp_fcreate_pp(env->dbenv, &mpf, DB_VERIFY)) != 0)
477 			return (ret);
478 		if ((ret = __memp_set_flags(mpf, DB_MPOOL_NOFILE, 1)) != 0)
479 			goto mpf_err;
480 		if ((ret = __memp_fopen_pp(mpf,
481 		    name, DB_ODDFILESIZE | DB_RDONLY, 0, 0)) != 0)
482 			goto mpf_err;
483 		pgno = PGNO_BASE_MD;
484 		if ((ret = __memp_fget_pp(mpf, &pgno, NULL, 0, &h)) != 0) {
485 			__db_err(env, ret, DB_STR_A("0747",
486 			    "Metadata page %lu cannot be read from mpool",
487 			    "%lu"), (u_long)pgno);
488 			goto mpf_err;
489 		}
490 		memcpy(mbuf, (u_int8_t *)h, DBMETASIZE);
491 		ret = __memp_fput_pp(mpf, h, DB_PRIORITY_UNCHANGED, 0);
492 mpf_err:	if ((t_ret = __memp_fclose_pp(mpf, 0)) != 0 || ret != 0) {
493 			return (ret == 0 ? t_ret : ret);
494 		}
495 	} else {
496 		/*
497 		 * Seek to the metadata page.
498 		 *
499 		 * Note that if we're just starting a verification, dbp->pgsize
500 		 * may be zero;  this is okay, as we want page zero anyway and
501 		 * 0*0 == 0.
502 		 */
503 		if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 ||
504 		    (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) {
505 			__db_err(env, ret, DB_STR_A("0520",
506 			    "Metadata page %lu cannot be read", "%lu"),
507 			    (u_long)PGNO_BASE_MD);
508 			return (ret);
509 		}
510 
511 		if (nr != DBMETASIZE) {
512 			EPRINT((env, DB_STR_A("0521",
513 			    "Page %lu: Incomplete metadata page", "%lu"),
514 			    (u_long)PGNO_BASE_MD));
515 			return (DB_VERIFY_FATAL);
516 		}
517 	}
518 
519 	return (ret);
520 }
521 
522 /*
523  * __db_vrfy_pagezero --
524  *	Verify the master metadata page.  Use seek, read, and a local buffer
525  *	rather than the DB paging code, for safety.
526  *
527  *	Must correctly (or best-guess) set dbp->type and dbp->pagesize.
528  */
529 static int
__db_vrfy_pagezero(dbp,vdp,fhp,name,flags)530 __db_vrfy_pagezero(dbp, vdp, fhp, name, flags)
531 	DB *dbp;
532 	VRFY_DBINFO *vdp;
533 	DB_FH *fhp;
534 	const char *name;
535 	u_int32_t flags;
536 {
537 	DBMETA *meta;
538 	ENV *env;
539 	VRFY_PAGEINFO *pip;
540 	db_pgno_t freelist;
541 	int isbad, ret, swapped;
542 	u_int8_t mbuf[DBMETASIZE];
543 
544 	isbad = ret = swapped = 0;
545 	freelist = 0;
546 	env = dbp->env;
547 	meta = (DBMETA *)mbuf;
548 	dbp->type = DB_UNKNOWN;
549 
550 	if ((ret = __db_vrfy_getpagezero(dbp, fhp, name, mbuf, flags)) != 0)
551 		return (ret);
552 
553 	if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
554 		return (ret);
555 
556 	if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
557 		EPRINT((env, DB_STR_A("0522",
558 		    "Page %lu: metadata page corrupted", "%lu"),
559 		    (u_long)PGNO_BASE_MD));
560 		isbad = 1;
561 		if (ret != DB_CHKSUM_FAIL) {
562 			EPRINT((env, DB_STR_A("0523",
563 			    "Page %lu: could not check metadata page", "%lu"),
564 			    (u_long)PGNO_BASE_MD));
565 			return (DB_VERIFY_FATAL);
566 		}
567 	}
568 
569 	/*
570 	 * Check all of the fields that we can.
571 	 *
572 	 * 08-11: Current page number.  Must == pgno.
573 	 * Note that endianness doesn't matter--it's zero.
574 	 */
575 	if (meta->pgno != PGNO_BASE_MD) {
576 		isbad = 1;
577 		EPRINT((env, DB_STR_A("0524",
578 		    "Page %lu: pgno incorrectly set to %lu", "%lu %lu"),
579 		    (u_long)PGNO_BASE_MD, (u_long)meta->pgno));
580 	}
581 
582 	/* 12-15: Magic number.  Must be one of valid set. */
583 	if (__db_is_valid_magicno(meta->magic, &dbp->type))
584 		swapped = 0;
585 	else {
586 		M_32_SWAP(meta->magic);
587 		if (__db_is_valid_magicno(meta->magic,
588 		    &dbp->type))
589 			swapped = 1;
590 		else {
591 			isbad = 1;
592 			EPRINT((env, DB_STR_A("0525",
593 			    "Page %lu: bad magic number %lu", "%lu %lu"),
594 			    (u_long)PGNO_BASE_MD, (u_long)meta->magic));
595 		}
596 	}
597 
598 	/*
599 	 * 16-19: Version.  Must be current;  for now, we
600 	 * don't support verification of old versions.
601 	 */
602 	if (swapped)
603 		M_32_SWAP(meta->version);
604 	if ((dbp->type == DB_BTREE &&
605 	    (meta->version > DB_BTREEVERSION ||
606 	    meta->version < DB_BTREEOLDVER)) ||
607 	    (dbp->type == DB_HASH &&
608 	    (meta->version > DB_HASHVERSION ||
609 	    meta->version < DB_HASHOLDVER)) ||
610 	    (dbp->type == DB_HEAP &&
611 	    (meta->version > DB_HEAPVERSION ||
612 	    meta->version < DB_HEAPOLDVER)) ||
613 	    (dbp->type == DB_QUEUE &&
614 	    (meta->version > DB_QAMVERSION ||
615 	    meta->version < DB_QAMOLDVER))) {
616 		isbad = 1;
617 		EPRINT((env, DB_STR_A("0526",
618     "Page %lu: unsupported DB version %lu; extraneous errors may result",
619 		    "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->version));
620 	}
621 
622 	/*
623 	 * 20-23: Pagesize.  Must be power of two,
624 	 * greater than 512, and less than 64K.
625 	 */
626 	if (swapped)
627 		M_32_SWAP(meta->pagesize);
628 	if (IS_VALID_PAGESIZE(meta->pagesize))
629 		dbp->pgsize = meta->pagesize;
630 	else {
631 		isbad = 1;
632 		EPRINT((env, DB_STR_A("0527", "Page %lu: bad page size %lu",
633 		    "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->pagesize));
634 
635 		/*
636 		 * Now try to settle on a pagesize to use.
637 		 * If the user-supplied one is reasonable,
638 		 * use it;  else, guess.
639 		 */
640 		if (!IS_VALID_PAGESIZE(dbp->pgsize))
641 			dbp->pgsize = __db_guesspgsize(env, fhp);
642 	}
643 
644 	/*
645 	 * 25: Page type.  Must be correct for dbp->type,
646 	 * which is by now set as well as it can be.
647 	 */
648 	/* Needs no swapping--only one byte! */
649 	if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) ||
650 	    (dbp->type == DB_HASH && meta->type != P_HASHMETA) ||
651 	    (dbp->type == DB_HEAP && meta->type != P_HEAPMETA) ||
652 	    (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) {
653 		isbad = 1;
654 		EPRINT((env, DB_STR_A("0528", "Page %lu: bad page type %lu",
655 		    "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->type));
656 	}
657 
658 	/*
659 	 * 26: Meta-flags.
660 	 */
661 	if (meta->metaflags != 0) {
662 		if (FLD_ISSET(meta->metaflags,
663 		    ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
664 			isbad = 1;
665 			EPRINT((env, DB_STR_A("0529",
666 			    "Page %lu: bad meta-data flags value %#lx",
667 			    "%lu %#lx"), (u_long)PGNO_BASE_MD,
668 			    (u_long)meta->metaflags));
669 		}
670 		if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
671 			F_SET(pip, VRFY_HAS_CHKSUM);
672 		if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
673 			F_SET(pip, VRFY_HAS_PART_RANGE);
674 		if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
675 			F_SET(pip, VRFY_HAS_PART_CALLBACK);
676 
677 		if (FLD_ISSET(meta->metaflags,
678 		    DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) &&
679 		    (ret = __partition_init(dbp, meta->metaflags)) != 0)
680 			return (ret);
681 	}
682 
683 	/*
684 	 * 28-31: Free list page number.
685 	 * 32-35: Last page in database file.
686 	 * We'll verify last_pgno once we open the db in the mpool;
687 	 * for now, just store it.
688 	 */
689 	if (swapped)
690 	    M_32_SWAP(meta->free);
691 	freelist = meta->free;
692 	if (swapped)
693 	    M_32_SWAP(meta->last_pgno);
694 	vdp->meta_last_pgno = meta->last_pgno;
695 
696 	/*
697 	 * Initialize vdp->pages to fit a single pageinfo structure for
698 	 * this one page.  We'll realloc later when we know how many
699 	 * pages there are.
700 	 */
701 	pip->pgno = PGNO_BASE_MD;
702 	pip->type = meta->type;
703 
704 	/*
705 	 * Signal that we still have to check the info specific to
706 	 * a given type of meta page.
707 	 */
708 	F_SET(pip, VRFY_INCOMPLETE);
709 
710 	pip->free = freelist;
711 
712 	if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
713 		return (ret);
714 
715 	/* Set up the dbp's fileid.  We don't use the regular open path. */
716 	memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN);
717 	dbp->preserve_fid = 1;
718 
719 	if (swapped == 1)
720 		F_SET(dbp, DB_AM_SWAP);
721 
722 	return (isbad ? DB_VERIFY_BAD : 0);
723 }
724 
725 /*
726  * __db_vrfy_walkpages --
727  *	Main loop of the verifier/salvager.  Walks through,
728  *	page by page, and verifies all pages and/or prints all data pages.
729  */
730 static int
__db_vrfy_walkpages(dbp,vdp,handle,callback,flags)731 __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)
732 	DB *dbp;
733 	VRFY_DBINFO *vdp;
734 	void *handle;
735 	int (*callback) __P((void *, const void *));
736 	u_int32_t flags;
737 {
738 	DB_MPOOLFILE *mpf;
739 	ENV *env;
740 	PAGE *h;
741 	VRFY_PAGEINFO *pip;
742 	db_pgno_t i;
743 	int ret, t_ret, isbad;
744 
745 	env = dbp->env;
746 	mpf = dbp->mpf;
747 	h = NULL;
748 	ret = isbad = t_ret = 0;
749 
750 	for (i = 0; i <= vdp->last_pgno; i++) {
751 		/*
752 		 * If DB_SALVAGE is set, we inspect our database of completed
753 		 * pages, and skip any we've already printed in the subdb pass.
754 		 */
755 		if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0))
756 			continue;
757 
758 		/*
759 		 * An individual page get can fail if:
760 		 *  * This is a hash database, it is expected to find
761 		 *    empty buckets, which don't have allocated pages. Create
762 		 *    a dummy page so the verification can proceed.
763 		 *  * We are salvaging, flag the error and continue.
764 		 */
765 		if ((t_ret = __memp_fget(mpf, &i,
766 		    vdp->thread_info, NULL, 0, &h)) != 0) {
767 			if (dbp->type == DB_HASH ||
768 			    (dbp->type == DB_QUEUE &&
769 			    F_ISSET(dbp, DB_AM_INMEM))) {
770 				if ((t_ret =
771 				    __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
772 					goto err1;
773 				pip->type = P_INVALID;
774 				pip->pgno = i;
775 				F_CLR(pip, VRFY_IS_ALLZEROES);
776 				F_SET(pip, VRFY_NONEXISTENT);
777 				if ((t_ret = __db_vrfy_putpageinfo(
778 				    env, vdp, pip)) != 0)
779 					goto err1;
780 				continue;
781 			}
782 			if (t_ret == DB_PAGE_NOTFOUND) {
783 				EPRINT((env, DB_STR_A("0530",
784     "Page %lu: beyond the end of the file, metadata page has last page as %lu",
785 				    "%lu %lu"), (u_long)i,
786 				    (u_long)vdp->last_pgno));
787 				if (ret == 0)
788 					return (t_ret);
789 			}
790 
791 err1:			if (ret == 0)
792 				ret = t_ret;
793 			if (LF_ISSET(DB_SALVAGE))
794 				continue;
795 			return (ret);
796 		}
797 
798 		if (LF_ISSET(DB_SALVAGE)) {
799 			/*
800 			 * We pretty much don't want to quit unless a
801 			 * bomb hits.  May as well return that something
802 			 * was screwy, however.
803 			 */
804 			if ((t_ret = __db_salvage_pg(dbp,
805 			    vdp, i, h, handle, callback, flags)) != 0) {
806 				if (ret == 0)
807 					ret = t_ret;
808 				isbad = 1;
809 			}
810 		} else {
811 			/*
812 			 * If we are not salvaging, and we get any error
813 			 * other than DB_VERIFY_BAD, return immediately;
814 			 * it may not be safe to proceed.  If we get
815 			 * DB_VERIFY_BAD, keep going;  listing more errors
816 			 * may make it easier to diagnose problems and
817 			 * determine the magnitude of the corruption.
818 			 *
819 			 * Verify info common to all page types.
820 			 */
821 			if (i != PGNO_BASE_MD) {
822 				ret = __db_vrfy_common(dbp, vdp, h, i, flags);
823 				if (ret == DB_VERIFY_BAD)
824 					isbad = 1;
825 				else if (ret != 0)
826 					goto err;
827 			}
828 
829 			switch (TYPE(h)) {
830 			case P_INVALID:
831 				ret = __db_vrfy_invalid(dbp, vdp, h, i, flags);
832 				break;
833 			case __P_DUPLICATE:
834 				isbad = 1;
835 				EPRINT((env, DB_STR_A("0531",
836 				    "Page %lu: old-style duplicate page",
837 				    "%lu"), (u_long)i));
838 				break;
839 			case P_HASH_UNSORTED:
840 			case P_HASH:
841 				ret = __ham_vrfy(dbp, vdp, h, i, flags);
842 				break;
843 			case P_HEAP:
844 			case P_IHEAP:
845 				ret = __heap_vrfy(dbp, vdp, h, i, flags);
846 				break;
847 			case P_IBTREE:
848 			case P_IRECNO:
849 			case P_LBTREE:
850 			case P_LDUP:
851 				ret = __bam_vrfy(dbp, vdp, h, i, flags);
852 				break;
853 			case P_LRECNO:
854 				ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags);
855 				break;
856 			case P_OVERFLOW:
857 				ret = __db_vrfy_overflow(dbp, vdp, h, i, flags);
858 				break;
859 			case P_HASHMETA:
860 				ret = __ham_vrfy_meta(dbp,
861 				    vdp, (HMETA *)h, i, flags);
862 				break;
863 			case P_HEAPMETA:
864 				ret = __heap_vrfy_meta(dbp,
865 				    vdp, (HEAPMETA *)h, i, flags);
866 				break;
867 			case P_BTREEMETA:
868 				ret = __bam_vrfy_meta(dbp,
869 				    vdp, (BTMETA *)h, i, flags);
870 				break;
871 			case P_QAMMETA:
872 				ret = __qam_vrfy_meta(dbp,
873 				    vdp, (QMETA *)h, i, flags);
874 				break;
875 			case P_QAMDATA:
876 				ret = __qam_vrfy_data(dbp,
877 				    vdp, (QPAGE *)h, i, flags);
878 				break;
879 			default:
880 				EPRINT((env, DB_STR_A("0532",
881 				    "Page %lu: unknown page type %lu",
882 				    "%lu %lu"), (u_long)i, (u_long)TYPE(h)));
883 				isbad = 1;
884 				break;
885 			}
886 
887 			/*
888 			 * Set up error return.
889 			 */
890 			if (ret == DB_VERIFY_BAD)
891 				isbad = 1;
892 			else if (ret != 0)
893 				goto err;
894 
895 			/*
896 			 * Provide feedback to the application about our
897 			 * progress.  The range 0-50% comes from the fact
898 			 * that this is the first of two passes through the
899 			 * database (front-to-back, then top-to-bottom).
900 			 */
901 			if (dbp->db_feedback != NULL)
902 				dbp->db_feedback(dbp, DB_VERIFY,
903 				    (int)((i + 1) * 50 / (vdp->last_pgno + 1)));
904 		}
905 
906 		/*
907 		 * Just as with the page get, bail if and only if we're
908 		 * not salvaging.
909 		 */
910 		if ((t_ret = __memp_fput(mpf,
911 		    vdp->thread_info, h, dbp->priority)) != 0) {
912 			if (ret == 0)
913 				ret = t_ret;
914 			if (!LF_ISSET(DB_SALVAGE))
915 				return (ret);
916 		}
917 	}
918 
919 	/*
920 	 * If we've seen a Queue metadata page, we may need to walk Queue
921 	 * extent pages that won't show up between 0 and vdp->last_pgno.
922 	 */
923 	if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
924 	    __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
925 		if (ret == 0)
926 			ret = t_ret;
927 		if (t_ret == DB_VERIFY_BAD)
928 			isbad = 1;
929 		else if (!LF_ISSET(DB_SALVAGE))
930 			return (ret);
931 	}
932 
933 	if (0) {
934 err:		if (h != NULL && (t_ret = __memp_fput(mpf,
935 		    vdp->thread_info, h, dbp->priority)) != 0)
936 			return (ret == 0 ? t_ret : ret);
937 	}
938 
939 	return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
940 }
941 
942 /*
943  * __db_vrfy_structure--
944  *	After a beginning-to-end walk through the database has been
945  *	completed, put together the information that has been collected
946  *	to verify the overall database structure.
947  *
948  *	Should only be called if we want to do a database verification,
949  *	i.e. if DB_SALVAGE is not set.
950  */
951 static int
__db_vrfy_structure(dbp,vdp,dbname,meta_pgno,lp,rp,flags)952 __db_vrfy_structure(dbp, vdp, dbname, meta_pgno, lp, rp, flags)
953 	DB *dbp;
954 	VRFY_DBINFO *vdp;
955 	const char *dbname;
956 	db_pgno_t meta_pgno;
957 	void *lp, *rp;
958 	u_int32_t flags;
959 {
960 	DB *pgset;
961 	ENV *env;
962 	VRFY_PAGEINFO *pip;
963 	db_pgno_t i;
964 	int ret, isbad, hassubs, p;
965 
966 	isbad = 0;
967 	pip = NULL;
968 	env = dbp->env;
969 	pgset = vdp->pgset;
970 
971 	/*
972 	 * Providing feedback here is tricky;  in most situations,
973 	 * we fetch each page one more time, but we do so in a top-down
974 	 * order that depends on the access method.  Worse, we do this
975 	 * recursively in btree, such that on any call where we're traversing
976 	 * a subtree we don't know where that subtree is in the whole database;
977 	 * worse still, any given database may be one of several subdbs.
978 	 *
979 	 * The solution is to decrement a counter vdp->pgs_remaining each time
980 	 * we verify (and call feedback on) a page.  We may over- or
981 	 * under-count, but the structure feedback function will ensure that we
982 	 * never give a percentage under 50 or over 100.  (The first pass
983 	 * covered the range 0-50%.)
984 	 */
985 	if (dbp->db_feedback != NULL)
986 		vdp->pgs_remaining = vdp->last_pgno + 1;
987 
988 	/*
989 	 * Call the appropriate function to downwards-traverse the db type.
990 	 */
991 	switch (dbp->type) {
992 	case DB_BTREE:
993 	case DB_RECNO:
994 		if ((ret =
995 		    __bam_vrfy_structure(dbp, vdp, 0, lp, rp, flags)) != 0) {
996 			if (ret == DB_VERIFY_BAD)
997 				isbad = 1;
998 			else
999 				goto err;
1000 		}
1001 
1002 		/*
1003 		 * If we have subdatabases and we know that the database is,
1004 		 * thus far, sound, it's safe to walk the tree of subdatabases.
1005 		 * Do so, and verify the structure of the databases within.
1006 		 */
1007 		if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0)
1008 			goto err;
1009 		hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0;
1010 		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1011 			goto err;
1012 		pip = NULL;
1013 
1014 		if (isbad == 0 && hassubs)
1015 			if ((ret =
1016 			    __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) {
1017 				if (ret == DB_VERIFY_BAD)
1018 					isbad = 1;
1019 				else
1020 					goto err;
1021 			}
1022 		break;
1023 	case DB_HASH:
1024 		if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
1025 			if (ret == DB_VERIFY_BAD)
1026 				isbad = 1;
1027 			else
1028 				goto err;
1029 		}
1030 		break;
1031 	case DB_HEAP:
1032 		if ((ret = __heap_vrfy_structure(dbp, vdp, flags)) != 0) {
1033 			if (ret == DB_VERIFY_BAD)
1034 				isbad = 1;
1035 		}
1036 		/* Skip the freelist check for heap, it doesn't apply. */
1037 		goto err;
1038 	case DB_QUEUE:
1039 		if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) {
1040 			if (ret == DB_VERIFY_BAD)
1041 				isbad = 1;
1042 		}
1043 
1044 		/*
1045 		 * Queue pages may be unreferenced and totally zeroed, if
1046 		 * they're empty;  queue doesn't have much structure, so
1047 		 * this is unlikely to be wrong in any troublesome sense.
1048 		 * Skip to "err".
1049 		 */
1050 		goto err;
1051 	case DB_UNKNOWN:
1052 	default:
1053 		ret = __db_unknown_path(env, "__db_vrfy_structure");
1054 		goto err;
1055 	}
1056 
1057 	/* Walk free list. */
1058 	if ((ret =
1059 	    __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD)
1060 		isbad = 1;
1061 
1062 	/*
1063 	 * If structure checks up until now have failed, it's likely that
1064 	 * checking what pages have been missed will result in oodles of
1065 	 * extraneous error messages being EPRINTed.  Skip to the end
1066 	 * if this is the case;  we're going to be printing at least one
1067 	 * error anyway, and probably all the more salient ones.
1068 	 */
1069 	if (ret != 0 || isbad == 1)
1070 		goto err;
1071 
1072 	/*
1073 	 * Make sure no page has been missed and that no page is still marked
1074 	 * "all zeroes" unless we are looking at unused hash bucket pages or
1075 	 * pagesoff the end of database.
1076 	 */
1077 	for (i = 0; i < vdp->last_pgno + 1; i++) {
1078 		if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
1079 			goto err;
1080 		if ((ret = __db_vrfy_pgset_get(pgset,
1081 		    vdp->thread_info, vdp->txn, i, &p)) != 0)
1082 			goto err;
1083 		if (pip->type == P_OVERFLOW) {
1084 			if ((u_int32_t)p != pip->refcount) {
1085 				EPRINT((env, DB_STR_A("0533",
1086 		    "Page %lu: overflow refcount %lu, referenced %lu times",
1087 				    "%lu %lu %lu"), (u_long)i,
1088 				    (u_long)pip->refcount, (u_long)p));
1089 				isbad = 1;
1090 			}
1091 		} else if (p == 0 &&
1092 #ifndef HAVE_FTRUNCATE
1093 		    !(i > vdp->meta_last_pgno &&
1094 		    (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) &&
1095 #endif
1096 		    !(dbp->type == DB_HASH &&
1097 		    (pip->type == P_HASH || pip->type == P_INVALID))) {
1098 			/*
1099 			 * It is OK for unreferenced hash buckets to be
1100 			 * marked invalid and unreferenced.
1101 			 */
1102 			EPRINT((env, DB_STR_A("0534",
1103 			    "Page %lu: unreferenced page", "%lu"), (u_long)i));
1104 			isbad = 1;
1105 		}
1106 
1107 		if (F_ISSET(pip, VRFY_IS_ALLZEROES)
1108 #ifndef HAVE_FTRUNCATE
1109 		    && i <= vdp->meta_last_pgno
1110 #endif
1111 		    ) {
1112 			EPRINT((env, DB_STR_A("0535",
1113 			    "Page %lu: totally zeroed page", "%lu"),
1114 			    (u_long)i));
1115 			isbad = 1;
1116 		}
1117 		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1118 			goto err;
1119 		pip = NULL;
1120 	}
1121 
1122 err:	if (pip != NULL)
1123 		(void)__db_vrfy_putpageinfo(env, vdp, pip);
1124 
1125 	return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
1126 }
1127 
1128 /*
1129  * __db_is_valid_magicno
1130  */
1131 static int
__db_is_valid_magicno(magic,typep)1132 __db_is_valid_magicno(magic, typep)
1133 	u_int32_t magic;
1134 	DBTYPE *typep;
1135 {
1136 	switch (magic) {
1137 	case DB_BTREEMAGIC:
1138 		*typep = DB_BTREE;
1139 		return (1);
1140 	case DB_HASHMAGIC:
1141 		*typep = DB_HASH;
1142 		return (1);
1143 	case DB_HEAPMAGIC:
1144 		*typep = DB_HEAP;
1145 		return (1);
1146 	case DB_QAMMAGIC:
1147 		*typep = DB_QUEUE;
1148 		return (1);
1149 	default:
1150 		break;
1151 	}
1152 	*typep = DB_UNKNOWN;
1153 	return (0);
1154 }
1155 
1156 /*
1157  * __db_vrfy_common --
1158  *	Verify info common to all page types.
1159  *
1160  * PUBLIC: int  __db_vrfy_common
1161  * PUBLIC:     __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
1162  */
1163 int
__db_vrfy_common(dbp,vdp,h,pgno,flags)1164 __db_vrfy_common(dbp, vdp, h, pgno, flags)
1165 	DB *dbp;
1166 	VRFY_DBINFO *vdp;
1167 	PAGE *h;
1168 	db_pgno_t pgno;
1169 	u_int32_t flags;
1170 {
1171 	ENV *env;
1172 	VRFY_PAGEINFO *pip;
1173 	int ret, t_ret;
1174 	u_int8_t *p;
1175 
1176 	env = dbp->env;
1177 
1178 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1179 		return (ret);
1180 
1181 	pip->pgno = pgno;
1182 	F_CLR(pip, VRFY_IS_ALLZEROES);
1183 
1184 	/*
1185 	 * Hash expands the table by leaving some pages between the
1186 	 * old last and the new last totally zeroed.  These pages may
1187 	 * not be all zero if they were used, freed and then reallocated.
1188 	 *
1189 	 * Queue will create sparse files if sparse record numbers are used.
1190 	 */
1191 	if (pgno != 0 && PGNO(h) == 0) {
1192 		F_SET(pip, VRFY_IS_ALLZEROES);
1193 		for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++)
1194 			if (*p != 0) {
1195 				F_CLR(pip, VRFY_IS_ALLZEROES);
1196 				break;
1197 			}
1198 		/*
1199 		 * Mark it as a hash, and we'll
1200 		 * check that that makes sense structurally later.
1201 		 * (The queue verification doesn't care, since queues
1202 		 * don't really have much in the way of structure.)
1203 		 */
1204 		if (dbp->type != DB_HEAP)
1205 			pip->type = P_HASH;
1206 		ret = 0;
1207 		goto err;	/* well, not really an err. */
1208 	}
1209 
1210 	if (PGNO(h) != pgno) {
1211 		EPRINT((env, DB_STR_A("0536", "Page %lu: bad page number %lu",
1212 		    "%lu %lu"), (u_long)pgno, (u_long)h->pgno));
1213 		ret = DB_VERIFY_BAD;
1214 	}
1215 
1216 	switch (h->type) {
1217 	case P_INVALID:			/* Order matches ordinal value. */
1218 	case P_HASH_UNSORTED:
1219 	case P_IBTREE:
1220 	case P_IRECNO:
1221 	case P_LBTREE:
1222 	case P_LRECNO:
1223 	case P_OVERFLOW:
1224 	case P_HASHMETA:
1225 	case P_BTREEMETA:
1226 	case P_QAMMETA:
1227 	case P_QAMDATA:
1228 	case P_LDUP:
1229 	case P_HASH:
1230 	case P_HEAP:
1231 	case P_IHEAP:
1232 	case P_HEAPMETA:
1233 		break;
1234 	default:
1235 		EPRINT((env, DB_STR_A("0537", "Page %lu: bad page type %lu",
1236 		    "%lu %lu"), (u_long)pgno, (u_long)h->type));
1237 		ret = DB_VERIFY_BAD;
1238 	}
1239 	pip->type = h->type;
1240 
1241 err:	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1242 		ret = t_ret;
1243 
1244 	return (ret);
1245 }
1246 
1247 /*
1248  * __db_vrfy_invalid --
1249  *	Verify P_INVALID page.
1250  *	(Yes, there's not much to do here.)
1251  */
1252 static int
__db_vrfy_invalid(dbp,vdp,h,pgno,flags)1253 __db_vrfy_invalid(dbp, vdp, h, pgno, flags)
1254 	DB *dbp;
1255 	VRFY_DBINFO *vdp;
1256 	PAGE *h;
1257 	db_pgno_t pgno;
1258 	u_int32_t flags;
1259 {
1260 	ENV *env;
1261 	VRFY_PAGEINFO *pip;
1262 	int ret, t_ret;
1263 
1264 	env = dbp->env;
1265 
1266 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1267 		return (ret);
1268 	pip->next_pgno = pip->prev_pgno = 0;
1269 
1270 	if (!IS_VALID_PGNO(NEXT_PGNO(h))) {
1271 		EPRINT((env, DB_STR_A("0538", "Page %lu: invalid next_pgno %lu",
1272 		    "%lu %lu"), (u_long)pgno, (u_long)NEXT_PGNO(h)));
1273 		ret = DB_VERIFY_BAD;
1274 	} else
1275 		pip->next_pgno = NEXT_PGNO(h);
1276 
1277 	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1278 		ret = t_ret;
1279 	return (ret);
1280 }
1281 
1282 /*
1283  * __db_vrfy_datapage --
1284  *	Verify elements common to data pages (P_HASH, P_LBTREE,
1285  *	P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e.,
1286  *	those defined in the PAGE structure.
1287  *
1288  *	Called from each of the per-page routines, after the
1289  *	all-page-type-common elements of pip have been verified and filled
1290  *	in.
1291  *
1292  * PUBLIC: int __db_vrfy_datapage
1293  * PUBLIC:     __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
1294  */
1295 int
__db_vrfy_datapage(dbp,vdp,h,pgno,flags)1296 __db_vrfy_datapage(dbp, vdp, h, pgno, flags)
1297 	DB *dbp;
1298 	VRFY_DBINFO *vdp;
1299 	PAGE *h;
1300 	db_pgno_t pgno;
1301 	u_int32_t flags;
1302 {
1303 	ENV *env;
1304 	VRFY_PAGEINFO *pip;
1305 	u_int32_t smallest_entry;
1306 	int isbad, ret, t_ret;
1307 
1308 	env = dbp->env;
1309 
1310 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1311 		return (ret);
1312 	isbad = 0;
1313 
1314 	/*
1315 	 * prev_pgno and next_pgno:  store for inter-page checks,
1316 	 * verify that they point to actual pages and not to self.
1317 	 *
1318 	 * !!!
1319 	 * Internal btree pages, as well as heap pages, do not maintain these
1320 	 * fields (indeed, they overload them).  Skip.
1321 	 */
1322 	if (TYPE(h) != P_IBTREE &&
1323 	    TYPE(h) != P_IRECNO && TYPE(h) != P_HEAP && TYPE(h) != P_IHEAP) {
1324 		if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) {
1325 			isbad = 1;
1326 			EPRINT((env, DB_STR_A("0539",
1327 			    "Page %lu: invalid prev_pgno %lu", "%lu %lu"),
1328 			    (u_long)pip->pgno, (u_long)PREV_PGNO(h)));
1329 		}
1330 		if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) {
1331 			isbad = 1;
1332 			EPRINT((env, DB_STR_A("0540",
1333 			    "Page %lu: invalid next_pgno %lu", "%lu %lu"),
1334 			    (u_long)pip->pgno, (u_long)NEXT_PGNO(h)));
1335 		}
1336 		pip->prev_pgno = PREV_PGNO(h);
1337 		pip->next_pgno = NEXT_PGNO(h);
1338 	}
1339 
1340 	/*
1341 	 * Verify the number of entries on the page: there's no good way to
1342 	 * determine if this is accurate.  The best we can do is verify that
1343 	 * it's not more than can, in theory, fit on the page.  Then, we make
1344 	 * sure there are at least this many valid elements in inp[], and
1345 	 * hope the test catches most cases.
1346 	 */
1347 	switch (TYPE(h)) {
1348 	case P_HASH_UNSORTED:
1349 	case P_HASH:
1350 		smallest_entry = HKEYDATA_PSIZE(0);
1351 		break;
1352 	case P_HEAP:
1353 		smallest_entry = sizeof(HEAPHDR) + sizeof(db_indx_t);
1354 		break;
1355 	case P_IHEAP:
1356 		/* Really high_pgno. */
1357 		pip->prev_pgno = PREV_PGNO(h);
1358 		smallest_entry = 0;
1359 		break;
1360 	case P_IBTREE:
1361 		smallest_entry = BINTERNAL_PSIZE(0);
1362 		break;
1363 	case P_IRECNO:
1364 		smallest_entry = RINTERNAL_PSIZE;
1365 		break;
1366 	case P_LBTREE:
1367 	case P_LDUP:
1368 	case P_LRECNO:
1369 		smallest_entry = BKEYDATA_PSIZE(0);
1370 		break;
1371 	default:
1372 		smallest_entry = 0;
1373 		break;
1374 	}
1375 	if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) {
1376 		isbad = 1;
1377 		EPRINT((env, DB_STR_A("0541",
1378 		    "Page %lu: too many entries: %lu",
1379 		    "%lu %lu"), (u_long)pgno, (u_long)NUM_ENT(h)));
1380 	}
1381 
1382 	if (TYPE(h) != P_OVERFLOW)
1383 		pip->entries = NUM_ENT(h);
1384 
1385 	/*
1386 	 * btree level.  Should be zero unless we're a btree;
1387 	 * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL,
1388 	 * and we need to save it off.
1389 	 */
1390 	switch (TYPE(h)) {
1391 	case P_IBTREE:
1392 	case P_IRECNO:
1393 		if (LEVEL(h) < LEAFLEVEL + 1) {
1394 			isbad = 1;
1395 			EPRINT((env, DB_STR_A("0542",
1396 			    "Page %lu: bad btree level %lu", "%lu %lu"),
1397 			    (u_long)pgno, (u_long)LEVEL(h)));
1398 		}
1399 		pip->bt_level = LEVEL(h);
1400 		break;
1401 	case P_LBTREE:
1402 	case P_LDUP:
1403 	case P_LRECNO:
1404 		if (LEVEL(h) != LEAFLEVEL) {
1405 			isbad = 1;
1406 			EPRINT((env, DB_STR_A("0543",
1407 			    "Page %lu: btree leaf page has incorrect level %lu",
1408 			    "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h)));
1409 		}
1410 		break;
1411 	default:
1412 		if (LEVEL(h) != 0) {
1413 			isbad = 1;
1414 			EPRINT((env, DB_STR_A("0544",
1415 			    "Page %lu: nonzero level %lu in non-btree database",
1416 			    "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h)));
1417 		}
1418 		break;
1419 	}
1420 
1421 	/*
1422 	 * Even though inp[] occurs in all PAGEs, we look at it in the
1423 	 * access-method-specific code, since btree and hash treat
1424 	 * item lengths very differently, and one of the most important
1425 	 * things we want to verify is that the data--as specified
1426 	 * by offset and length--cover the right part of the page
1427 	 * without overlaps, gaps, or violations of the page boundary.
1428 	 */
1429 	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1430 		ret = t_ret;
1431 
1432 	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1433 }
1434 
1435 /*
1436  * __db_vrfy_meta --
1437  *	Verify the access-method common parts of a meta page, using
1438  *	normal mpool routines.
1439  *
1440  * PUBLIC: int __db_vrfy_meta
1441  * PUBLIC:     __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t));
1442  */
1443 int
__db_vrfy_meta(dbp,vdp,meta,pgno,flags)1444 __db_vrfy_meta(dbp, vdp, meta, pgno, flags)
1445 	DB *dbp;
1446 	VRFY_DBINFO *vdp;
1447 	DBMETA *meta;
1448 	db_pgno_t pgno;
1449 	u_int32_t flags;
1450 {
1451 	DBTYPE dbtype, magtype;
1452 	ENV *env;
1453 	VRFY_PAGEINFO *pip;
1454 	int isbad, ret, t_ret;
1455 
1456 	isbad = 0;
1457 	env = dbp->env;
1458 
1459 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1460 		return (ret);
1461 
1462 	/* type plausible for a meta page */
1463 	switch (meta->type) {
1464 	case P_BTREEMETA:
1465 		dbtype = DB_BTREE;
1466 		break;
1467 	case P_HASHMETA:
1468 		dbtype = DB_HASH;
1469 		break;
1470 	case P_HEAPMETA:
1471 		dbtype = DB_HEAP;
1472 		break;
1473 	case P_QAMMETA:
1474 		dbtype = DB_QUEUE;
1475 		break;
1476 	default:
1477 		ret = __db_unknown_path(env, "__db_vrfy_meta");
1478 		goto err;
1479 	}
1480 
1481 	/* magic number valid */
1482 	if (!__db_is_valid_magicno(meta->magic, &magtype)) {
1483 		isbad = 1;
1484 		EPRINT((env, DB_STR_A("0545", "Page %lu: invalid magic number",
1485 		    "%lu"), (u_long)pgno));
1486 	}
1487 	if (magtype != dbtype) {
1488 		isbad = 1;
1489 		EPRINT((env, DB_STR_A("0546",
1490 		    "Page %lu: magic number does not match database type",
1491 		    "%lu"), (u_long)pgno));
1492 	}
1493 
1494 	/* version */
1495 	if ((dbtype == DB_BTREE &&
1496 	    (meta->version > DB_BTREEVERSION ||
1497 	    meta->version < DB_BTREEOLDVER)) ||
1498 	    (dbtype == DB_HASH &&
1499 	    (meta->version > DB_HASHVERSION ||
1500 	    meta->version < DB_HASHOLDVER)) ||
1501 	    (dbtype == DB_HEAP &&
1502 	    (meta->version > DB_HEAPVERSION ||
1503 	    meta->version < DB_HEAPOLDVER)) ||
1504 	    (dbtype == DB_QUEUE &&
1505 	    (meta->version > DB_QAMVERSION ||
1506 	    meta->version < DB_QAMOLDVER))) {
1507 		isbad = 1;
1508 		EPRINT((env, DB_STR_A("0547",
1509     "Page %lu: unsupported database version %lu; extraneous errors may result",
1510 		    "%lu %lu"), (u_long)pgno, (u_long)meta->version));
1511 	}
1512 
1513 	/* pagesize */
1514 	if (meta->pagesize != dbp->pgsize) {
1515 		isbad = 1;
1516 		EPRINT((env, DB_STR_A("0548", "Page %lu: invalid pagesize %lu",
1517 		    "%lu %lu"), (u_long)pgno, (u_long)meta->pagesize));
1518 	}
1519 
1520 	/* Flags */
1521 	if (meta->metaflags != 0) {
1522 		if (FLD_ISSET(meta->metaflags,
1523 		    ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
1524 			isbad = 1;
1525 			EPRINT((env, DB_STR_A("0549",
1526 			    "Page %lu: bad meta-data flags value %#lx",
1527 			    "%lu %#lx"), (u_long)PGNO_BASE_MD,
1528 			    (u_long)meta->metaflags));
1529 		}
1530 		if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
1531 			F_SET(pip, VRFY_HAS_CHKSUM);
1532 		if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
1533 			F_SET(pip, VRFY_HAS_PART_RANGE);
1534 		if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
1535 			F_SET(pip, VRFY_HAS_PART_CALLBACK);
1536 	}
1537 
1538 	/*
1539 	 * Free list.
1540 	 *
1541 	 * If this is not the main, master-database meta page, it
1542 	 * should not have a free list.
1543 	 */
1544 	if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) {
1545 		isbad = 1;
1546 		EPRINT((env, DB_STR_A("0550",
1547 		    "Page %lu: nonempty free list on subdatabase metadata page",
1548 		    "%lu"), (u_long)pgno));
1549 	}
1550 
1551 	/* Can correctly be PGNO_INVALID--that's just the end of the list. */
1552 	if (IS_VALID_PGNO(meta->free))
1553 		pip->free = meta->free;
1554 	else {
1555 		isbad = 1;
1556 		EPRINT((env, DB_STR_A("0551",
1557 		    "Page %lu: nonsensical free list pgno %lu", "%lu %lu"),
1558 		    (u_long)pgno, (u_long)meta->free));
1559 	}
1560 
1561 	/*
1562 	 * Check that the meta page agrees with what we got from mpool.
1563 	 * If we don't have FTRUNCATE then mpool could include some
1564 	 * zeroed pages at the end of the file, we assume the meta page
1565 	 * is correct.  Queue does not update the meta page's last_pgno.
1566 	 */
1567 	if (pgno == PGNO_BASE_MD &&
1568 	    dbtype != DB_QUEUE && meta->last_pgno != vdp->last_pgno) {
1569 #ifdef HAVE_FTRUNCATE
1570 		isbad = 1;
1571 		EPRINT((env, DB_STR_A("0552",
1572 		    "Page %lu: last_pgno is not correct: %lu != %lu",
1573 		    "%lu %lu %lu"), (u_long)pgno,
1574 		    (u_long)meta->last_pgno, (u_long)vdp->last_pgno));
1575 #endif
1576 		vdp->meta_last_pgno = meta->last_pgno;
1577 	}
1578 
1579 	/*
1580 	 * We have now verified the common fields of the metadata page.
1581 	 * Clear the flag that told us they had been incompletely checked.
1582 	 */
1583 	F_CLR(pip, VRFY_INCOMPLETE);
1584 
1585 err:	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1586 		ret = t_ret;
1587 
1588 	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1589 }
1590 
1591 /*
1592  * __db_vrfy_freelist --
1593  *	Walk free list, checking off pages and verifying absence of
1594  *	loops.
1595  */
1596 static int
__db_vrfy_freelist(dbp,vdp,meta,flags)1597 __db_vrfy_freelist(dbp, vdp, meta, flags)
1598 	DB *dbp;
1599 	VRFY_DBINFO *vdp;
1600 	db_pgno_t meta;
1601 	u_int32_t flags;
1602 {
1603 	DB *pgset;
1604 	ENV *env;
1605 	VRFY_PAGEINFO *pip;
1606 	db_pgno_t cur_pgno, next_pgno;
1607 	int p, ret, t_ret;
1608 
1609 	env = dbp->env;
1610 	pgset = vdp->pgset;
1611 	DB_ASSERT(env, pgset != NULL);
1612 
1613 	if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
1614 		return (ret);
1615 	for (next_pgno = pip->free;
1616 	    next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) {
1617 		cur_pgno = pip->pgno;
1618 		if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1619 			return (t_ret);
1620 
1621 		/* This shouldn't happen, but just in case. */
1622 		if (!IS_VALID_PGNO(next_pgno)) {
1623 			EPRINT((env, DB_STR_A("0553",
1624 			    "Page %lu: invalid next_pgno %lu on free list page",
1625 			    "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
1626 			return (DB_VERIFY_BAD);
1627 		}
1628 
1629 		if (next_pgno > vdp->last_pgno) {
1630 			EPRINT((env, DB_STR_A("0713",
1631 			 "Page %lu: page %lu on free list beyond last_pgno %lu",
1632 			    "%lu %lu %lu"), (u_long)cur_pgno,
1633 			    (u_long)next_pgno, (u_long)vdp->last_pgno));
1634 			ret = DB_VERIFY_BAD;
1635 		}
1636 		/* Detect cycles. */
1637 		if ((t_ret = __db_vrfy_pgset_get(pgset,
1638 		    vdp->thread_info, vdp->txn, next_pgno, &p)) != 0)
1639 			return (t_ret);
1640 		if (p != 0) {
1641 			EPRINT((env, DB_STR_A("0554",
1642 		    "Page %lu: page %lu encountered a second time on free list",
1643 			    "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
1644 			return (DB_VERIFY_BAD);
1645 		}
1646 		if ((t_ret = __db_vrfy_pgset_inc(pgset,
1647 		    vdp->thread_info, vdp->txn, next_pgno)) != 0)
1648 			return (t_ret);
1649 
1650 		if ((t_ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
1651 			return (t_ret);
1652 
1653 		if (pip->type != P_INVALID) {
1654 			EPRINT((env, DB_STR_A("0555",
1655 			    "Page %lu: non-invalid page %lu on free list",
1656 			    "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
1657 			ret = DB_VERIFY_BAD;	  /* unsafe to continue */
1658 			break;
1659 		}
1660 	}
1661 
1662 	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1663 		ret = t_ret;
1664 	return (ret);
1665 }
1666 
1667 /*
1668  * __db_vrfy_subdbs --
1669  *	Walk the known-safe master database of subdbs with a cursor,
1670  *	verifying the structure of each subdatabase we encounter.
1671  */
1672 static int
__db_vrfy_subdbs(dbp,vdp,dbname,flags)1673 __db_vrfy_subdbs(dbp, vdp, dbname, flags)
1674 	DB *dbp;
1675 	VRFY_DBINFO *vdp;
1676 	const char *dbname;
1677 	u_int32_t flags;
1678 {
1679 	DB *mdbp;
1680 	DBC *dbc;
1681 	DBT key, data;
1682 	ENV *env;
1683 	VRFY_PAGEINFO *pip;
1684 	db_pgno_t meta_pgno;
1685 	int ret, t_ret, isbad;
1686 	u_int8_t type;
1687 
1688 	isbad = 0;
1689 	dbc = NULL;
1690 	env = dbp->env;
1691 
1692 	if ((ret = __db_master_open(dbp,
1693 	    vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0)
1694 		return (ret);
1695 
1696 	if ((ret = __db_cursor_int(mdbp, NULL,
1697 	    vdp->txn, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
1698 		goto err;
1699 
1700 	memset(&key, 0, sizeof(key));
1701 	memset(&data, 0, sizeof(data));
1702 	while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) {
1703 		if (data.size != sizeof(db_pgno_t)) {
1704 			EPRINT((env, DB_STR("0556",
1705 			    "Subdatabase entry not page-number size")));
1706 			isbad = 1;
1707 			goto err;
1708 		}
1709 		memcpy(&meta_pgno, data.data, data.size);
1710 		/*
1711 		 * Subdatabase meta pgnos are stored in network byte
1712 		 * order for cross-endian compatibility.  Swap if appropriate.
1713 		 */
1714 		DB_NTOHL_SWAP(env, &meta_pgno);
1715 		if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) {
1716 			EPRINT((env, DB_STR_A("0557",
1717 			    "Subdatabase entry references invalid page %lu",
1718 			    "%lu"), (u_long)meta_pgno));
1719 			isbad = 1;
1720 			goto err;
1721 		}
1722 		if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
1723 			goto err;
1724 		type = pip->type;
1725 		if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1726 			goto err;
1727 		switch (type) {
1728 		case P_BTREEMETA:
1729 			if ((ret = __bam_vrfy_structure(
1730 			    dbp, vdp, meta_pgno, NULL, NULL, flags)) != 0) {
1731 				if (ret == DB_VERIFY_BAD)
1732 					isbad = 1;
1733 				else
1734 					goto err;
1735 			}
1736 			break;
1737 		case P_HASHMETA:
1738 			if ((ret = __ham_vrfy_structure(
1739 			    dbp, vdp, meta_pgno, flags)) != 0) {
1740 				if (ret == DB_VERIFY_BAD)
1741 					isbad = 1;
1742 				else
1743 					goto err;
1744 			}
1745 			break;
1746 		case P_QAMMETA:
1747 		default:
1748 			EPRINT((env, DB_STR_A("0558",
1749 		    "Subdatabase entry references page %lu of invalid type %lu",
1750 			    "%lu %lu"), (u_long)meta_pgno, (u_long)type));
1751 			ret = DB_VERIFY_BAD;
1752 			goto err;
1753 		}
1754 	}
1755 
1756 	if (ret == DB_NOTFOUND)
1757 		ret = 0;
1758 
1759 err:	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1760 		ret = t_ret;
1761 
1762 	if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0)
1763 		ret = t_ret;
1764 
1765 	return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1766 }
1767 
1768 /*
1769  * __db_vrfy_struct_feedback --
1770  *	Provide feedback during top-down database structure traversal.
1771  *	(See comment at the beginning of __db_vrfy_structure.)
1772  *
1773  * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *));
1774  */
1775 void
__db_vrfy_struct_feedback(dbp,vdp)1776 __db_vrfy_struct_feedback(dbp, vdp)
1777 	DB *dbp;
1778 	VRFY_DBINFO *vdp;
1779 {
1780 	int progress;
1781 
1782 	if (dbp->db_feedback == NULL)
1783 		return;
1784 
1785 	if (vdp->pgs_remaining > 0)
1786 		vdp->pgs_remaining--;
1787 
1788 	/* Don't allow a feedback call of 100 until we're really done. */
1789 	progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1));
1790 	dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress);
1791 }
1792 
1793 /*
1794  * __db_vrfy_orderchkonly --
1795  *	Do an sort-order/hashing check on a known-otherwise-good subdb.
1796  */
1797 static int
__db_vrfy_orderchkonly(dbp,vdp,name,subdb,flags)1798 __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags)
1799 	DB *dbp;
1800 	VRFY_DBINFO *vdp;
1801 	const char *name, *subdb;
1802 	u_int32_t flags;
1803 {
1804 	BTMETA *btmeta;
1805 	DB *mdbp, *pgset;
1806 	DBC *pgsc;
1807 	DBT key, data;
1808 	DB_MPOOLFILE *mpf;
1809 	ENV *env;
1810 	HASH *h_internal;
1811 	HMETA *hmeta;
1812 	PAGE *h, *currpg;
1813 	db_pgno_t meta_pgno, p, pgno;
1814 	u_int32_t bucket;
1815 	int t_ret, ret;
1816 
1817 	pgset = NULL;
1818 	pgsc = NULL;
1819 	env = dbp->env;
1820 	mpf = dbp->mpf;
1821 	currpg = h = NULL;
1822 
1823 	LF_CLR(DB_NOORDERCHK);
1824 
1825 	/* Open the master database and get the meta_pgno for the subdb. */
1826 	if ((ret = __db_master_open(dbp,
1827 	    vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0)
1828 		goto err;
1829 
1830 	DB_INIT_DBT(key, subdb, strlen(subdb));
1831 	memset(&data, 0, sizeof(data));
1832 	if ((ret = __db_get(mdbp,
1833 	    vdp->thread_info, NULL, &key, &data, 0)) != 0) {
1834 		if (ret == DB_NOTFOUND)
1835 			ret = ENOENT;
1836 		goto err;
1837 	}
1838 
1839 	if (data.size != sizeof(db_pgno_t)) {
1840 		EPRINT((env, DB_STR("0559",
1841 		    "Subdatabase entry of invalid size")));
1842 		ret = DB_VERIFY_BAD;
1843 		goto err;
1844 	}
1845 
1846 	memcpy(&meta_pgno, data.data, data.size);
1847 
1848 	/*
1849 	 * Subdatabase meta pgnos are stored in network byte
1850 	 * order for cross-endian compatibility.  Swap if appropriate.
1851 	 */
1852 	DB_NTOHL_SWAP(env, &meta_pgno);
1853 
1854 	if ((ret = __memp_fget(mpf,
1855 	     &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0)
1856 		goto err;
1857 
1858 	if ((ret = __db_vrfy_pgset(env,
1859 	    vdp->thread_info, dbp->pgsize, &pgset)) != 0)
1860 		goto err;
1861 
1862 	switch (TYPE(h)) {
1863 	case P_BTREEMETA:
1864 		btmeta = (BTMETA *)h;
1865 		if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) {
1866 			/* Recnos have no order to check. */
1867 			ret = 0;
1868 			goto err;
1869 		}
1870 		if ((ret =
1871 		    __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0)
1872 			goto err;
1873 		if ((ret = __db_cursor_int(pgset, NULL, vdp->txn, dbp->type,
1874 		    PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
1875 			goto err;
1876 		while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
1877 			if ((ret = __memp_fget(mpf, &p,
1878 			     vdp->thread_info, NULL, 0, &currpg)) != 0)
1879 				goto err;
1880 			if ((ret = __bam_vrfy_itemorder(dbp, NULL,
1881 			    vdp->thread_info, currpg, p, NUM_ENT(currpg), 1,
1882 			    F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0)
1883 				goto err;
1884 			if ((ret = __memp_fput(mpf,
1885 			    vdp->thread_info, currpg, dbp->priority)) != 0)
1886 				goto err;
1887 			currpg = NULL;
1888 		}
1889 
1890 		/*
1891 		 * The normal exit condition for the loop above is DB_NOTFOUND.
1892 		 * If we see that, zero it and continue on to cleanup.
1893 		 * Otherwise, it's a real error and will be returned.
1894 		 */
1895 		if (ret == DB_NOTFOUND)
1896 			ret = 0;
1897 		break;
1898 	case P_HASHMETA:
1899 		hmeta = (HMETA *)h;
1900 		h_internal = (HASH *)dbp->h_internal;
1901 		/*
1902 		 * Make sure h_charkey is right.
1903 		 */
1904 		if (h_internal == NULL) {
1905 			EPRINT((env, DB_STR_A("0560",
1906 			    "Page %lu: DB->h_internal field is NULL", "%lu"),
1907 			    (u_long)meta_pgno));
1908 			ret = DB_VERIFY_BAD;
1909 			goto err;
1910 		}
1911 		if (h_internal->h_hash == NULL)
1912 			h_internal->h_hash = hmeta->dbmeta.version < 5
1913 			? __ham_func4 : __ham_func5;
1914 		if (hmeta->h_charkey !=
1915 		    h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) {
1916 			EPRINT((env, DB_STR_A("0561",
1917 			    "Page %lu: incorrect hash function for database",
1918 			    "%lu"), (u_long)meta_pgno));
1919 			ret = DB_VERIFY_BAD;
1920 			goto err;
1921 		}
1922 
1923 		/*
1924 		 * Foreach bucket, verify hashing on each page in the
1925 		 * corresponding chain of pages.
1926 		 */
1927 		if ((ret = __db_cursor_int(dbp, NULL, vdp->txn, dbp->type,
1928 		    PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
1929 			goto err;
1930 		for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
1931 			pgno = BS_TO_PAGE(bucket, hmeta->spares);
1932 			while (pgno != PGNO_INVALID) {
1933 				if ((ret = __memp_fget(mpf, &pgno,
1934 				    vdp->thread_info, NULL, 0, &currpg)) != 0)
1935 					goto err;
1936 				if ((ret = __ham_vrfy_hashing(pgsc,
1937 				    NUM_ENT(currpg), hmeta, bucket, pgno,
1938 				    flags, h_internal->h_hash)) != 0)
1939 					goto err;
1940 				pgno = NEXT_PGNO(currpg);
1941 				if ((ret = __memp_fput(mpf, vdp->thread_info,
1942 				    currpg, dbp->priority)) != 0)
1943 					goto err;
1944 				currpg = NULL;
1945 			}
1946 		}
1947 		break;
1948 	default:
1949 		EPRINT((env, DB_STR_A("0562",
1950 		    "Page %lu: database metapage of bad type %lu",
1951 		    "%lu %lu"), (u_long)meta_pgno, (u_long)TYPE(h)));
1952 		ret = DB_VERIFY_BAD;
1953 		break;
1954 	}
1955 
1956 err:	if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
1957 		ret = t_ret;
1958 	if (pgset != NULL &&
1959 	    (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0)
1960 		ret = t_ret;
1961 	if (h != NULL && (t_ret = __memp_fput(mpf,
1962 	    vdp->thread_info, h, dbp->priority)) != 0)
1963 		ret = t_ret;
1964 	if (currpg != NULL &&
1965 	    (t_ret = __memp_fput(mpf,
1966 		vdp->thread_info, currpg, dbp->priority)) != 0)
1967 		ret = t_ret;
1968 	if ((t_ret = __db_close(mdbp, NULL, 0)) != 0)
1969 		ret = t_ret;
1970 	return (ret);
1971 }
1972 
1973 /*
1974  * __db_salvage_pg --
1975  *	Walk through a page, salvaging all likely or plausible (w/
1976  *	DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp.
1977  *
1978  * PUBLIC: int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t,
1979  * PUBLIC:     PAGE *, void *, int (*)(void *, const void *), u_int32_t));
1980  */
1981 int
__db_salvage_pg(dbp,vdp,pgno,h,handle,callback,flags)1982 __db_salvage_pg(dbp, vdp, pgno, h, handle, callback, flags)
1983 	DB *dbp;
1984 	VRFY_DBINFO *vdp;
1985 	db_pgno_t pgno;
1986 	PAGE *h;
1987 	void *handle;
1988 	int (*callback) __P((void *, const void *));
1989 	u_int32_t flags;
1990 {
1991 	ENV *env;
1992 	VRFY_PAGEINFO *pip;
1993 	int keyflag, ret, t_ret;
1994 
1995 	env = dbp->env;
1996 	DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
1997 
1998 	/*
1999 	 * !!!
2000 	 * We dump record numbers when salvaging Queue databases, but not for
2001 	 * immutable Recno databases.  The problem is we can't figure out the
2002 	 * record number from the database page in the Recno case, while the
2003 	 * offset in the file is sufficient for Queue.
2004 	 */
2005 	keyflag = 0;
2006 
2007 	/* If we got this page in the subdb pass, we can safely skip it. */
2008 	if (__db_salvage_isdone(vdp, pgno))
2009 		return (0);
2010 
2011 	switch (TYPE(h)) {
2012 	case P_BTREEMETA:
2013 		ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags);
2014 		break;
2015 	case P_HASH:
2016 	case P_HASH_UNSORTED:
2017 	case P_HEAP:
2018 	case P_LBTREE:
2019 	case P_QAMDATA:
2020 		return (__db_salvage_leaf(dbp,
2021 		    vdp, pgno, h, handle, callback, flags));
2022 	case P_HASHMETA:
2023 		ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags);
2024 		break;
2025 	case P_HEAPMETA:
2026 		ret = __heap_vrfy_meta(dbp, vdp, (HEAPMETA *)h, pgno, flags);
2027 		break;
2028 	case P_IBTREE:
2029 		/*
2030 		 * We need to mark any overflow keys on internal pages as seen,
2031 		 * so we don't print them out in __db_salvage_unknowns.  But if
2032 		 * we're an upgraded database, a P_LBTREE page may very well
2033 		 * have a reference to the same overflow pages (this practice
2034 		 * stopped somewhere around db4.5).  To give P_LBTREEs a chance
2035 		 * to print out any keys on shared pages, mark the page now and
2036 		 * deal with it at the end.
2037 		 */
2038 		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE));
2039 	case P_IHEAP:
2040 		/*
2041 		 * There's nothing to salvage from heap region pages.  Just mark
2042 		 * that we've seen the page.
2043 		 */
2044 		return (__db_salvage_markdone(vdp, pgno));
2045 	case P_LDUP:
2046 		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP));
2047 	case P_LRECNO:
2048 		/*
2049 		 * Recno leaves are tough, because the leaf could be (1) a dup
2050 		 * page, or it could be (2) a regular database leaf page.
2051 		 * Fortunately, RECNO databases are not allowed to have
2052 		 * duplicates.
2053 		 *
2054 		 * If there are no subdatabases, dump the page immediately if
2055 		 * it's a leaf in a RECNO database, otherwise wait and hopefully
2056 		 * it will be dumped by the leaf page that refers to it,
2057 		 * otherwise we'll get it with the unknowns.
2058 		 *
2059 		 * If there are subdatabases, there might be mixed types and
2060 		 * dbp->type can't be trusted.  We'll only get here after
2061 		 * salvaging each database, though, so salvaging this page
2062 		 * immediately isn't important.  If this page is a dup, it might
2063 		 * get salvaged later on, otherwise the unknowns pass will pick
2064 		 * it up.  Note that SALVAGE_HASSUBDBS won't get set if we're
2065 		 * salvaging aggressively.
2066 		 *
2067 		 * If we're salvaging aggressively, we don't know whether or not
2068 		 * there's subdatabases, so we wait on all recno pages.
2069 		 */
2070 		if (!LF_ISSET(DB_AGGRESSIVE) &&
2071 		    !F_ISSET(vdp, SALVAGE_HASSUBDBS) && dbp->type == DB_RECNO)
2072 			return (__db_salvage_leaf(dbp,
2073 			    vdp, pgno, h, handle, callback, flags));
2074 		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNODUP));
2075 	case P_OVERFLOW:
2076 		return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW));
2077 	case P_QAMMETA:
2078 		keyflag = 1;
2079 		ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags);
2080 		break;
2081 	case P_INVALID:
2082 	case P_IRECNO:
2083 	case __P_DUPLICATE:
2084 	default:
2085 		/*
2086 		 * There's no need to display an error, the page type was
2087 		 * already checked and reported on.
2088 		 */
2089 		return (0);
2090 	}
2091 	if (ret != 0)
2092 		return (ret);
2093 
2094 	/*
2095 	 * We have to display the dump header if it's a metadata page.  It's
2096 	 * our last chance as the page was marked "seen" in the vrfy routine,
2097 	 * and  we won't see the page again.  We don't display headers for
2098 	 * the first database in a multi-database file, that database simply
2099 	 * contains a list of subdatabases.
2100 	 */
2101 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
2102 		return (ret);
2103 	if (!F_ISSET(pip, VRFY_HAS_SUBDBS) && !LF_ISSET(DB_VERIFY_PARTITION))
2104 		ret = __db_prheader(
2105 		    dbp, NULL, 0, keyflag, handle, callback, vdp, pgno);
2106 	if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
2107 		ret = t_ret;
2108 	return (ret);
2109 }
2110 
2111 /*
2112  * __db_salvage_leaf --
2113  *	Walk through a leaf, salvaging all likely key/data pairs and marking
2114  *	seen pages in vdp.
2115  *
2116  * PUBLIC: int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t,
2117  * PUBLIC:     PAGE *, void *, int (*)(void *, const void *), u_int32_t));
2118  */
2119 int
__db_salvage_leaf(dbp,vdp,pgno,h,handle,callback,flags)2120 __db_salvage_leaf(dbp, vdp, pgno, h, handle, callback, flags)
2121 	DB *dbp;
2122 	VRFY_DBINFO *vdp;
2123 	db_pgno_t pgno;
2124 	PAGE *h;
2125 	void *handle;
2126 	int (*callback) __P((void *, const void *));
2127 	u_int32_t flags;
2128 {
2129 	ENV *env;
2130 
2131 	env = dbp->env;
2132 	DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
2133 
2134 	/* If we got this page in the subdb pass, we can safely skip it. */
2135 	if (__db_salvage_isdone(vdp, pgno))
2136 		return (0);
2137 
2138 	switch (TYPE(h)) {
2139 	case P_HASH_UNSORTED:
2140 	case P_HASH:
2141 		return (__ham_salvage(dbp, vdp,
2142 		    pgno, h, handle, callback, flags));
2143 	case P_HEAP:
2144 		return (__heap_salvage(dbp, vdp,
2145 		    pgno, h, handle, callback, flags));
2146 	case P_LBTREE:
2147 	case P_LRECNO:
2148 		return (__bam_salvage(dbp, vdp,
2149 		    pgno, TYPE(h), h, handle, callback, NULL, flags));
2150 	case P_QAMDATA:
2151 		return (__qam_salvage(dbp, vdp,
2152 		    pgno, h, handle, callback, flags));
2153 	default:
2154 		/*
2155 		 * There's no need to display an error, the page type was
2156 		 * already checked and reported on.
2157 		 */
2158 		return (0);
2159 	}
2160 }
2161 
2162 /*
2163  * __db_salvage_unknowns --
2164  *	Walk through the salvager database, printing with key "UNKNOWN"
2165  *	any pages we haven't dealt with.
2166  */
2167 static int
__db_salvage_unknowns(dbp,vdp,handle,callback,flags)2168 __db_salvage_unknowns(dbp, vdp, handle, callback, flags)
2169 	DB *dbp;
2170 	VRFY_DBINFO *vdp;
2171 	void *handle;
2172 	int (*callback) __P((void *, const void *));
2173 	u_int32_t flags;
2174 {
2175 	DBC *dbc;
2176 	DBT unkdbt, key, *dbt;
2177 	DB_MPOOLFILE *mpf;
2178 	ENV *env;
2179 	PAGE *h;
2180 	db_pgno_t pgno;
2181 	u_int32_t pgtype, ovfl_bufsz, tmp_flags;
2182 	int ret, t_ret;
2183 	void *ovflbuf;
2184 
2185 	dbc = NULL;
2186 	env = dbp->env;
2187 	mpf = dbp->mpf;
2188 
2189 	DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
2190 
2191 	if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
2192 		return (ret);
2193 	ovfl_bufsz = dbp->pgsize;
2194 
2195 	/*
2196 	 * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW
2197 	 * pages, because they may be referenced by the standard database
2198 	 * pages that we're resolving.
2199 	 */
2200 	while ((t_ret =
2201 	    __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) {
2202 		if ((t_ret = __memp_fget(mpf,
2203 		    &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
2204 			if (ret == 0)
2205 				ret = t_ret;
2206 			continue;
2207 		}
2208 
2209 		dbt = NULL;
2210 		tmp_flags = 0;
2211 		switch (pgtype) {
2212 		case SALVAGE_LDUP:
2213 		case SALVAGE_LRECNODUP:
2214 			dbt = &unkdbt;
2215 			tmp_flags = DB_SA_UNKNOWNKEY;
2216 			/* FALLTHROUGH */
2217 		case SALVAGE_IBTREE:
2218 		case SALVAGE_LBTREE:
2219 		case SALVAGE_LRECNO:
2220 			if ((t_ret = __bam_salvage(
2221 			    dbp, vdp, pgno, pgtype, h, handle,
2222 			    callback, dbt, tmp_flags | flags)) != 0 && ret == 0)
2223 				ret = t_ret;
2224 			break;
2225 		case SALVAGE_OVERFLOW:
2226 			DB_ASSERT(env, 0);	/* Shouldn't ever happen. */
2227 			break;
2228 		case SALVAGE_HASH:
2229 			if ((t_ret = __ham_salvage(dbp, vdp,
2230 			    pgno, h, handle, callback, flags)) != 0 && ret == 0)
2231 				ret = t_ret;
2232 			break;
2233 		case SALVAGE_INVALID:
2234 		case SALVAGE_IGNORE:
2235 		default:
2236 			/*
2237 			 * Shouldn't happen, but if it does, just do what the
2238 			 * nice man says.
2239 			 */
2240 			DB_ASSERT(env, 0);
2241 			break;
2242 		}
2243 		if ((t_ret = __memp_fput(mpf,
2244 		    vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2245 			ret = t_ret;
2246 	}
2247 
2248 	/* We should have reached the end of the database. */
2249 	if (t_ret == DB_NOTFOUND)
2250 		t_ret = 0;
2251 	if (t_ret != 0 && ret == 0)
2252 		ret = t_ret;
2253 
2254 	/* Re-open the cursor so we traverse the database again. */
2255 	if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
2256 		ret = t_ret;
2257 	dbc = NULL;
2258 
2259 	/* Now, deal with any remaining overflow pages. */
2260 	while ((t_ret =
2261 	    __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) {
2262 		if ((t_ret = __memp_fget(mpf,
2263 		    &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
2264 			if (ret == 0)
2265 				ret = t_ret;
2266 			continue;
2267 		}
2268 
2269 		switch (pgtype) {
2270 		case SALVAGE_OVERFLOW:
2271 			/*
2272 			 * XXX:
2273 			 * This may generate multiple "UNKNOWN" keys in
2274 			 * a database with no dups.  What to do?
2275 			 */
2276 			if ((t_ret = __db_safe_goff(dbp, vdp,
2277 			    pgno, &key, &ovflbuf, &ovfl_bufsz, flags)) != 0 ||
2278 			    ((vdp->type == DB_BTREE || vdp->type == DB_HASH) &&
2279 			    (t_ret = __db_vrfy_prdbt(&unkdbt,
2280 			    0, " ", handle, callback, 0, 0, vdp)) != 0) ||
2281 			    (t_ret = __db_vrfy_prdbt(
2282 			    &key, 0, " ", handle, callback, 0, 0, vdp)) != 0)
2283 				if (ret == 0)
2284 					ret = t_ret;
2285 			break;
2286 		default:
2287 			DB_ASSERT(env, 0);	/* Shouldn't ever happen. */
2288 			break;
2289 		}
2290 		if ((t_ret = __memp_fput(mpf,
2291 		    vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2292 			ret = t_ret;
2293 	}
2294 
2295 	/* We should have reached the end of the database. */
2296 	if (t_ret == DB_NOTFOUND)
2297 		t_ret = 0;
2298 	if (t_ret != 0 && ret == 0)
2299 		ret = t_ret;
2300 
2301 	if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
2302 		ret = t_ret;
2303 
2304 	__os_free(env, ovflbuf);
2305 
2306 	return (ret);
2307 }
2308 
2309 /*
2310  * Offset of the ith inp array entry, which we can compare to the offset
2311  * the entry stores.
2312  */
2313 #define	INP_OFFSET(dbp, h, i)	\
2314     ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h)))
2315 
2316 /*
2317  * __db_vrfy_inpitem --
2318  *	Verify that a single entry in the inp array is sane, and update
2319  *	the high water mark and current item offset.  (The former of these is
2320  *	used for state information between calls, and is required;  it must
2321  *	be initialized to the pagesize before the first call.)
2322  *
2323  *	Returns DB_VERIFY_FATAL if inp has collided with the data,
2324  *	since verification can't continue from there;  returns DB_VERIFY_BAD
2325  *	if anything else is wrong.
2326  *
2327  * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *,
2328  * PUBLIC:     db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *));
2329  */
2330 int
__db_vrfy_inpitem(dbp,h,pgno,i,is_btree,flags,himarkp,offsetp)2331 __db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
2332 	DB *dbp;
2333 	PAGE *h;
2334 	db_pgno_t pgno;
2335 	u_int32_t i;
2336 	int is_btree;
2337 	u_int32_t flags, *himarkp, *offsetp;
2338 {
2339 	BKEYDATA *bk;
2340 	ENV *env;
2341 	db_indx_t *inp, offset, len;
2342 
2343 	env = dbp->env;
2344 
2345 	DB_ASSERT(env, himarkp != NULL);
2346 	inp = P_INP(dbp, h);
2347 
2348 	/*
2349 	 * Check that the inp array, which grows from the beginning of the
2350 	 * page forward, has not collided with the data, which grow from the
2351 	 * end of the page backward.
2352 	 */
2353 	if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) {
2354 		/* We've collided with the data.  We need to bail. */
2355 		EPRINT((env, DB_STR_A("0563",
2356 		    "Page %lu: entries listing %lu overlaps data",
2357 		    "%lu %lu"), (u_long)pgno, (u_long)i));
2358 		return (DB_VERIFY_FATAL);
2359 	}
2360 
2361 	offset = inp[i];
2362 
2363 	/*
2364 	 * Check that the item offset is reasonable:  it points somewhere
2365 	 * after the inp array and before the end of the page.
2366 	 */
2367 	if (offset <= INP_OFFSET(dbp, h, i) || offset >= dbp->pgsize) {
2368 		EPRINT((env, DB_STR_A("0564",
2369 		    "Page %lu: bad offset %lu at page index %lu",
2370 		    "%lu %lu %lu"), (u_long)pgno, (u_long)offset, (u_long)i));
2371 		return (DB_VERIFY_BAD);
2372 	}
2373 
2374 	/* Update the high-water mark (what HOFFSET should be) */
2375 	if (offset < *himarkp)
2376 		*himarkp = offset;
2377 
2378 	if (is_btree) {
2379 		/*
2380 		 * Check alignment;  if it's unaligned, it's unsafe to
2381 		 * manipulate this item.
2382 		 */
2383 		if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) {
2384 			EPRINT((env, DB_STR_A("0565",
2385 			    "Page %lu: unaligned offset %lu at page index %lu",
2386 			    "%lu %lu %lu"), (u_long)pgno, (u_long)offset,
2387 			    (u_long)i));
2388 			return (DB_VERIFY_BAD);
2389 		}
2390 
2391 		/*
2392 		 * Check that the item length remains on-page.
2393 		 */
2394 		bk = GET_BKEYDATA(dbp, h, i);
2395 
2396 		/*
2397 		 * We need to verify the type of the item here;
2398 		 * we can't simply assume that it will be one of the
2399 		 * expected three.  If it's not a recognizable type,
2400 		 * it can't be considered to have a verifiable
2401 		 * length, so it's not possible to certify it as safe.
2402 		 */
2403 		switch (B_TYPE(bk->type)) {
2404 		case B_KEYDATA:
2405 			len = bk->len;
2406 			break;
2407 		case B_DUPLICATE:
2408 		case B_OVERFLOW:
2409 			len = BOVERFLOW_SIZE;
2410 			break;
2411 		default:
2412 			EPRINT((env, DB_STR_A("0566",
2413 			    "Page %lu: item %lu of unrecognizable type",
2414 			    "%lu %lu"), (u_long)pgno, (u_long)i));
2415 			return (DB_VERIFY_BAD);
2416 		}
2417 
2418 		if ((size_t)(offset + len) > dbp->pgsize) {
2419 			EPRINT((env, DB_STR_A("0567",
2420 			    "Page %lu: item %lu extends past page boundary",
2421 			    "%lu %lu"), (u_long)pgno, (u_long)i));
2422 			return (DB_VERIFY_BAD);
2423 		}
2424 	}
2425 
2426 	if (offsetp != NULL)
2427 		*offsetp = offset;
2428 	return (0);
2429 }
2430 
2431 /*
2432  * __db_vrfy_duptype--
2433  *	Given a page number and a set of flags to __bam_vrfy_subtree,
2434  *	verify that the dup tree type is correct--i.e., it's a recno
2435  *	if DUPSORT is not set and a btree if it is.
2436  *
2437  * PUBLIC: int __db_vrfy_duptype
2438  * PUBLIC:     __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
2439  */
2440 int
__db_vrfy_duptype(dbp,vdp,pgno,flags)2441 __db_vrfy_duptype(dbp, vdp, pgno, flags)
2442 	DB *dbp;
2443 	VRFY_DBINFO *vdp;
2444 	db_pgno_t pgno;
2445 	u_int32_t flags;
2446 {
2447 	ENV *env;
2448 	VRFY_PAGEINFO *pip;
2449 	int ret, isbad;
2450 
2451 	env = dbp->env;
2452 	isbad = 0;
2453 
2454 	if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
2455 		return (ret);
2456 
2457 	switch (pip->type) {
2458 	case P_IBTREE:
2459 	case P_LDUP:
2460 		if (!LF_ISSET(DB_ST_DUPSORT)) {
2461 			EPRINT((env, DB_STR_A("0568",
2462 	    "Page %lu: sorted duplicate set in unsorted-dup database",
2463 			    "%lu"), (u_long)pgno));
2464 			isbad = 1;
2465 		}
2466 		break;
2467 	case P_IRECNO:
2468 	case P_LRECNO:
2469 		if (LF_ISSET(DB_ST_DUPSORT)) {
2470 			EPRINT((env, DB_STR_A("0569",
2471 	    "Page %lu: unsorted duplicate set in sorted-dup database",
2472 			    "%lu"), (u_long)pgno));
2473 			isbad = 1;
2474 		}
2475 		break;
2476 	default:
2477 		/*
2478 		 * If the page is entirely zeroed, its pip->type will be a lie
2479 		 * (we assumed it was a hash page, as they're allowed to be
2480 		 * zeroed);  handle this case specially.
2481 		 */
2482 		if (F_ISSET(pip, VRFY_IS_ALLZEROES))
2483 			ZEROPG_ERR_PRINT(env, pgno, DB_STR_P("duplicate page"));
2484 		else
2485 			EPRINT((env, DB_STR_A("0570",
2486 		    "Page %lu: duplicate page of inappropriate type %lu",
2487 			    "%lu %lu"), (u_long)pgno, (u_long)pip->type));
2488 		isbad = 1;
2489 		break;
2490 	}
2491 
2492 	if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
2493 		return (ret);
2494 	return (isbad == 1 ? DB_VERIFY_BAD : 0);
2495 }
2496 
2497 /*
2498  * __db_salvage_duptree --
2499  *	Attempt to salvage a given duplicate tree, given its alleged root.
2500  *
2501  *	The key that corresponds to this dup set has been passed to us
2502  *	in DBT *key.  Because data items follow keys, though, it has been
2503  *	printed once already.
2504  *
2505  *	The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a
2506  *	P_IBTREE, or a P_IRECNO.  If it's an internal page, use the verifier
2507  *	functions to make sure it's safe;  if it's not, we simply bail and the
2508  *	data will have to be printed with no key later on.  if it is safe,
2509  *	recurse on each of its children.
2510  *
2511  *	Whether or not it's safe, if it's a leaf page, __bam_salvage it.
2512  *
2513  *	At all times, use the DB hanging off vdp to mark and check what we've
2514  *	done, so each page gets printed exactly once and we don't get caught
2515  *	in any cycles.
2516  *
2517  * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t,
2518  * PUBLIC:     DBT *, void *, int (*)(void *, const void *), u_int32_t));
2519  */
2520 int
__db_salvage_duptree(dbp,vdp,pgno,key,handle,callback,flags)2521 __db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags)
2522 	DB *dbp;
2523 	VRFY_DBINFO *vdp;
2524 	db_pgno_t pgno;
2525 	DBT *key;
2526 	void *handle;
2527 	int (*callback) __P((void *, const void *));
2528 	u_int32_t flags;
2529 {
2530 	DB_MPOOLFILE *mpf;
2531 	PAGE *h;
2532 	int ret, t_ret;
2533 
2534 	mpf = dbp->mpf;
2535 
2536 	if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno))
2537 		return (DB_VERIFY_BAD);
2538 
2539 	/* We have a plausible page.  Try it. */
2540 	if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
2541 		return (ret);
2542 
2543 	switch (TYPE(h)) {
2544 	case P_IBTREE:
2545 	case P_IRECNO:
2546 		if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0)
2547 			goto err;
2548 		if ((ret = __bam_vrfy(dbp,
2549 		    vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 ||
2550 		    (ret = __db_salvage_markdone(vdp, pgno)) != 0)
2551 			goto err;
2552 		/*
2553 		 * We have a known-healthy internal page.  Walk it.
2554 		 */
2555 		if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key,
2556 		    handle, callback, flags)) != 0)
2557 			goto err;
2558 		break;
2559 	case P_LRECNO:
2560 	case P_LDUP:
2561 		if ((ret = __bam_salvage(dbp,
2562 		    vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0)
2563 			goto err;
2564 		break;
2565 	default:
2566 		ret = DB_VERIFY_BAD;
2567 		goto err;
2568 	}
2569 
2570 err:	if ((t_ret = __memp_fput(mpf,
2571 	     vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2572 		ret = t_ret;
2573 	return (ret);
2574 }
2575 
2576 /*
2577  * __db_salvage_all --
2578  *	Salvage only the leaves we find by walking the tree.  If we have subdbs,
2579  *	salvage each of them individually.
2580  */
2581 static int
__db_salvage_all(dbp,vdp,handle,callback,flags,hassubsp)2582 __db_salvage_all(dbp, vdp, handle, callback, flags, hassubsp)
2583 	DB *dbp;
2584 	VRFY_DBINFO *vdp;
2585 	void *handle;
2586 	int (*callback) __P((void *, const void *));
2587 	u_int32_t flags;
2588 	int *hassubsp;
2589 {
2590 	DB *pgset;
2591 	DBC *pgsc;
2592 	DB_MPOOLFILE *mpf;
2593 	ENV *env;
2594 	PAGE *h;
2595 	VRFY_PAGEINFO *pip;
2596 	db_pgno_t p, meta_pgno;
2597 	int ret, t_ret;
2598 
2599 	*hassubsp = 0;
2600 
2601 	env = dbp->env;
2602 	pgset = NULL;
2603 	pgsc = NULL;
2604 	mpf = dbp->mpf;
2605 	h = NULL;
2606 	pip = NULL;
2607 	ret = 0;
2608 
2609 	/*
2610 	 * Check to make sure the page is OK and find out if it contains
2611 	 * subdatabases.
2612 	 */
2613 	meta_pgno = PGNO_BASE_MD;
2614 	if ((t_ret = __memp_fget(mpf,
2615 	    &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 &&
2616 	    (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 &&
2617 	    (t_ret = __db_salvage_pg(
2618 		dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 &&
2619 	    (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0)
2620 		if (F_ISSET(pip, VRFY_HAS_SUBDBS))
2621 			*hassubsp = 1;
2622 	if (pip != NULL &&
2623 	    (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
2624 		ret = t_ret;
2625 	if (h != NULL) {
2626 		if ((t_ret = __memp_fput(mpf,
2627 		     vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2628 			ret = t_ret;
2629 		h = NULL;
2630 	}
2631 	if (ret != 0)
2632 		return (ret);
2633 
2634 	/* Without subdatabases, we can just dump from the meta pgno. */
2635 	if (*hassubsp == 0)
2636 		return (__db_salvage(dbp,
2637 		    vdp, PGNO_BASE_MD, handle, callback, flags));
2638 
2639 	/*
2640 	 * We have subdbs.  Try to crack them.
2641 	 *
2642 	 * To do so, get a set of leaf pages in the master database, and then
2643 	 * walk each of the valid ones, salvaging subdbs as we go.  If any
2644 	 * prove invalid, just drop them;  we'll pick them up on a later pass.
2645 	 */
2646 	if ((ret = __db_vrfy_pgset(env,
2647 	    vdp->thread_info, dbp->pgsize, &pgset)) != 0)
2648 		goto err;
2649 	if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0)
2650 		goto err;
2651 	if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0)
2652 		goto err;
2653 	while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
2654 		if ((t_ret = __memp_fget(mpf,
2655 		    &p, vdp->thread_info, NULL, 0, &h)) == 0 &&
2656 		    (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 &&
2657 		    (t_ret =
2658 		    __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0)
2659 			t_ret = __db_salvage_subdbpg(
2660 			    dbp, vdp, h, handle, callback, flags);
2661 		if (t_ret != 0 && ret == 0)
2662 			ret = t_ret;
2663 		if (h != NULL) {
2664 			if ((t_ret = __memp_fput(mpf, vdp->thread_info,
2665 			    h, dbp->priority)) != 0 && ret == 0)
2666 				ret = t_ret;
2667 			h = NULL;
2668 		}
2669 	}
2670 
2671 	if (t_ret != DB_NOTFOUND && ret == 0)
2672 		ret = t_ret;
2673 
2674 err:	if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
2675 		ret = t_ret;
2676 	if (pgset != NULL &&
2677 	    (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0)
2678 		ret = t_ret;
2679 	if (h != NULL &&
2680 	    (t_ret = __memp_fput(mpf,
2681 		vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2682 		ret = t_ret;
2683 	return (ret);
2684 }
2685 
2686 /*
2687  * __db_salvage_subdbpg --
2688  *	Given a known-good leaf page in the master database, salvage all
2689  *	leaf pages corresponding to each subdb.
2690  */
2691 static int
__db_salvage_subdbpg(dbp,vdp,master,handle,callback,flags)2692 __db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags)
2693 	DB *dbp;
2694 	VRFY_DBINFO *vdp;
2695 	PAGE *master;
2696 	void *handle;
2697 	int (*callback) __P((void *, const void *));
2698 	u_int32_t flags;
2699 {
2700 	BKEYDATA *bkkey, *bkdata;
2701 	BOVERFLOW *bo;
2702 	DB *pgset;
2703 	DBC *pgsc;
2704 	DBT key;
2705 	DB_MPOOLFILE *mpf;
2706 	ENV *env;
2707 	PAGE *subpg;
2708 	db_indx_t i;
2709 	db_pgno_t meta_pgno;
2710 	int ret, err_ret, t_ret;
2711 	char *subdbname;
2712 	u_int32_t ovfl_bufsz;
2713 
2714 	env = dbp->env;
2715 	mpf = dbp->mpf;
2716 	ret = err_ret = 0;
2717 	subdbname = NULL;
2718 	pgsc = NULL;
2719 	pgset = NULL;
2720 	ovfl_bufsz = 0;
2721 
2722 	/*
2723 	 * For each entry, get and salvage the set of pages
2724 	 * corresponding to that entry.
2725 	 */
2726 	for (i = 0; i < NUM_ENT(master); i += P_INDX) {
2727 		bkkey = GET_BKEYDATA(dbp, master, i);
2728 		bkdata = GET_BKEYDATA(dbp, master, i + O_INDX);
2729 
2730 		/* Get the subdatabase name. */
2731 		if (B_TYPE(bkkey->type) == B_OVERFLOW) {
2732 			/*
2733 			 * We can, in principle anyway, have a subdb
2734 			 * name so long it overflows.  Ick.
2735 			 */
2736 			bo = (BOVERFLOW *)bkkey;
2737 			if ((ret = __db_safe_goff(dbp, vdp, bo->pgno,
2738 			    &key, &subdbname, &ovfl_bufsz, flags)) != 0) {
2739 				err_ret = DB_VERIFY_BAD;
2740 				continue;
2741 			}
2742 
2743 			/* Nul-terminate it. */
2744 			if (ovfl_bufsz < key.size + 1) {
2745 				if ((ret = __os_realloc(env,
2746 				    key.size + 1, &subdbname)) != 0)
2747 					goto err;
2748 				ovfl_bufsz = key.size + 1;
2749 			}
2750 			subdbname[key.size] = '\0';
2751 		} else if (B_TYPE(bkkey->type) == B_KEYDATA) {
2752 			if (ovfl_bufsz < (u_int32_t)bkkey->len + 1) {
2753 				if ((ret = __os_realloc(env,
2754 				    bkkey->len + 1, &subdbname)) != 0)
2755 					goto err;
2756 				ovfl_bufsz = bkkey->len + 1;
2757 			}
2758 			DB_ASSERT(env, subdbname != NULL);
2759 			memcpy(subdbname, bkkey->data, bkkey->len);
2760 			subdbname[bkkey->len] = '\0';
2761 		}
2762 
2763 		/* Get the corresponding pgno. */
2764 		if (bkdata->len != sizeof(db_pgno_t)) {
2765 			err_ret = DB_VERIFY_BAD;
2766 			continue;
2767 		}
2768 		memcpy(&meta_pgno,
2769 		    (db_pgno_t *)bkdata->data, sizeof(db_pgno_t));
2770 
2771 		/*
2772 		 * Subdatabase meta pgnos are stored in network byte
2773 		 * order for cross-endian compatibility.  Swap if appropriate.
2774 		 */
2775 		DB_NTOHL_SWAP(env, &meta_pgno);
2776 
2777 		/* If we can't get the subdb meta page, just skip the subdb. */
2778 		if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf,
2779 		    &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) {
2780 			err_ret = ret;
2781 			continue;
2782 		}
2783 
2784 		/*
2785 		 * Verify the subdatabase meta page.  This has two functions.
2786 		 * First, if it's bad, we have no choice but to skip the subdb
2787 		 * and let the pages just get printed on a later pass.  Second,
2788 		 * the access-method-specific meta verification routines record
2789 		 * the various state info (such as the presence of dups)
2790 		 * that we need for __db_prheader().
2791 		 */
2792 		if ((ret =
2793 		    __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) {
2794 			err_ret = ret;
2795 			(void)__memp_fput(mpf,
2796 			    vdp->thread_info, subpg, dbp->priority);
2797 			continue;
2798 		}
2799 		switch (TYPE(subpg)) {
2800 		case P_BTREEMETA:
2801 			if ((ret = __bam_vrfy_meta(dbp,
2802 			    vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) {
2803 				err_ret = ret;
2804 				(void)__memp_fput(mpf,
2805 				    vdp->thread_info, subpg, dbp->priority);
2806 				continue;
2807 			}
2808 			break;
2809 		case P_HASHMETA:
2810 			if ((ret = __ham_vrfy_meta(dbp,
2811 			    vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) {
2812 				err_ret = ret;
2813 				(void)__memp_fput(mpf,
2814 				    vdp->thread_info, subpg, dbp->priority);
2815 				continue;
2816 			}
2817 			break;
2818 		default:
2819 			/* This isn't an appropriate page;  skip this subdb. */
2820 			err_ret = DB_VERIFY_BAD;
2821 			continue;
2822 		}
2823 
2824 		if ((ret = __memp_fput(mpf,
2825 		    vdp->thread_info, subpg, dbp->priority)) != 0) {
2826 			err_ret = ret;
2827 			continue;
2828 		}
2829 
2830 		/* Print a subdatabase header. */
2831 		if ((ret = __db_prheader(dbp,
2832 		    subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0)
2833 			goto err;
2834 
2835 		/* Salvage meta_pgno's tree. */
2836 		if ((ret = __db_salvage(dbp,
2837 		    vdp, meta_pgno, handle, callback, flags)) != 0)
2838 			err_ret = ret;
2839 
2840 		/* Print a subdatabase footer. */
2841 		if ((ret = __db_prfooter(handle, callback)) != 0)
2842 			goto err;
2843 	}
2844 
2845 err:	if (subdbname)
2846 		__os_free(env, subdbname);
2847 
2848 	if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
2849 		ret = t_ret;
2850 
2851 	if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
2852 		ret = t_ret;
2853 
2854 	if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0)
2855 		return (t_ret);
2856 
2857 	return ((err_ret != 0) ? err_ret : ret);
2858 }
2859 
2860 /*
2861  * __db_salvage --
2862  *      Given a meta page number, salvage all data from leaf pages found by
2863  *      walking the meta page's tree.
2864  */
2865 static int
__db_salvage(dbp,vdp,meta_pgno,handle,callback,flags)2866 __db_salvage(dbp, vdp, meta_pgno, handle, callback, flags)
2867      DB *dbp;
2868      VRFY_DBINFO *vdp;
2869      db_pgno_t meta_pgno;
2870      void *handle;
2871      int (*callback) __P((void *, const void *));
2872      u_int32_t flags;
2873 
2874 {
2875 	DB *pgset;
2876 	DBC *dbc, *pgsc;
2877 	DB_MPOOLFILE *mpf;
2878 	ENV *env;
2879 	PAGE *subpg;
2880 	db_pgno_t p;
2881 	int err_ret, ret, t_ret;
2882 
2883 	env = dbp->env;
2884 	mpf = dbp->mpf;
2885 	err_ret = ret = t_ret = 0;
2886 	pgsc = NULL;
2887 	pgset = NULL;
2888 	dbc = NULL;
2889 
2890 	if ((ret = __db_vrfy_pgset(env,
2891 	    vdp->thread_info, dbp->pgsize, &pgset)) != 0)
2892 		goto err;
2893 
2894 	/* Get all page numbers referenced from this meta page. */
2895 	if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno,
2896 	    flags, pgset)) != 0) {
2897 		err_ret = ret;
2898 		goto err;
2899 	}
2900 
2901 	if ((ret = __db_cursor(pgset,
2902 	    vdp->thread_info, NULL, &pgsc, 0)) != 0)
2903 		goto err;
2904 
2905 	if (dbp->type == DB_QUEUE &&
2906 	    (ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0)
2907 		goto err;
2908 
2909 	/* Salvage every page in pgset. */
2910 	while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
2911 		if (dbp->type == DB_QUEUE) {
2912 #ifdef HAVE_QUEUE
2913 			ret = __qam_fget(dbc, &p, 0, &subpg);
2914 #else
2915 			ret = __db_no_queue_am(env);
2916 #endif
2917 			/* Don't report an error for pages not found in a queue.
2918 			 * The pgset is a best guess, it doesn't know about
2919 			 * deleted extents which leads to this error.
2920 			 */
2921 			if (ret == ENOENT || ret == DB_PAGE_NOTFOUND)
2922 				continue;
2923 		} else
2924 			ret = __memp_fget(mpf,
2925 			    &p, vdp->thread_info, NULL, 0, &subpg);
2926 		if (ret != 0) {
2927 			err_ret = ret;
2928 			continue;
2929 		}
2930 
2931 		if ((ret = __db_salvage_pg(dbp, vdp, p, subpg,
2932 		    handle, callback, flags)) != 0)
2933 			err_ret = ret;
2934 
2935 		if (dbp->type == DB_QUEUE)
2936 #ifdef HAVE_QUEUE
2937 			ret = __qam_fput(dbc, p, subpg, dbp->priority);
2938 #else
2939 			ret = __db_no_queue_am(env);
2940 #endif
2941 		else
2942 			ret = __memp_fput(mpf,
2943 			    vdp->thread_info, subpg, dbp->priority);
2944 		if (ret != 0)
2945 			err_ret = ret;
2946 	}
2947 
2948 	if (ret == DB_NOTFOUND)
2949 		ret = 0;
2950 
2951 err:
2952 	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0)
2953 		ret = t_ret;
2954 	if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
2955 		ret = t_ret;
2956 	if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
2957 		ret = t_ret;
2958 
2959 	return ((err_ret != 0) ? err_ret : ret);
2960 }
2961 
2962 /*
2963  * __db_meta2pgset --
2964  *	Given a known-safe meta page number, return the set of pages
2965  *	corresponding to the database it represents.  Return DB_VERIFY_BAD if
2966  *	it's not a suitable meta page or is invalid.
2967  */
2968 static int
__db_meta2pgset(dbp,vdp,pgno,flags,pgset)2969 __db_meta2pgset(dbp, vdp, pgno, flags, pgset)
2970 	DB *dbp;
2971 	VRFY_DBINFO *vdp;
2972 	db_pgno_t pgno;
2973 	u_int32_t flags;
2974 	DB *pgset;
2975 {
2976 	DB_MPOOLFILE *mpf;
2977 	PAGE *h;
2978 	int ret, t_ret;
2979 
2980 	mpf = dbp->mpf;
2981 
2982 	if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
2983 		return (ret);
2984 
2985 	switch (TYPE(h)) {
2986 	case P_BTREEMETA:
2987 		ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset);
2988 		break;
2989 	case P_HASHMETA:
2990 		ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset);
2991 		break;
2992 	case P_HEAPMETA:
2993 		ret = __heap_meta2pgset(dbp, vdp, (HEAPMETA *)h, pgset);
2994 		break;
2995 	case P_QAMMETA:
2996 #ifdef HAVE_QUEUE
2997 		ret = __qam_meta2pgset(dbp, vdp, pgset);
2998 		break;
2999 #endif
3000 	default:
3001 		ret = DB_VERIFY_BAD;
3002 		break;
3003 	}
3004 
3005 	if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0)
3006 		return (t_ret);
3007 	return (ret);
3008 }
3009 
3010 /*
3011  * __db_guesspgsize --
3012  *	Try to guess what the pagesize is if the one on the meta page
3013  *	and the one in the db are invalid.
3014  */
3015 static u_int
__db_guesspgsize(env,fhp)3016 __db_guesspgsize(env, fhp)
3017 	ENV *env;
3018 	DB_FH *fhp;
3019 {
3020 	db_pgno_t i;
3021 	size_t nr;
3022 	u_int32_t guess;
3023 	u_int8_t type;
3024 
3025 	for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) {
3026 		/*
3027 		 * We try to read three pages ahead after the first one
3028 		 * and make sure we have plausible types for all of them.
3029 		 * If the seeks fail, continue with a smaller size;
3030 		 * we're probably just looking past the end of the database.
3031 		 * If they succeed and the types are reasonable, also continue
3032 		 * with a size smaller;  we may be looking at pages N,
3033 		 * 2N, and 3N for some N > 1.
3034 		 *
3035 		 * As soon as we hit an invalid type, we stop and return
3036 		 * our previous guess; that last one was probably the page size.
3037 		 */
3038 		for (i = 1; i <= 3; i++) {
3039 			if (__os_seek(
3040 			    env, fhp, i, guess, SSZ(DBMETA, type)) != 0)
3041 				break;
3042 			if (__os_read(env,
3043 			    fhp, &type, 1, &nr) != 0 || nr == 0)
3044 				break;
3045 			if (type == P_INVALID || type >= P_PAGETYPE_MAX)
3046 				return (guess << 1);
3047 		}
3048 	}
3049 
3050 	/*
3051 	 * If we're just totally confused--the corruption takes up most of the
3052 	 * beginning pages of the database--go with the default size.
3053 	 */
3054 	return (DB_DEF_IOSIZE);
3055 }
3056