1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2000, 2013 Oracle and/or its affiliates. All rights reserved.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/db_swap.h"
14 #include "dbinc/db_verify.h"
15 #include "dbinc/btree.h"
16 #include "dbinc/fop.h"
17 #include "dbinc/hash.h"
18 #include "dbinc/heap.h"
19 #include "dbinc/lock.h"
20 #include "dbinc/mp.h"
21 #include "dbinc/qam.h"
22 #include "dbinc/txn.h"
23
24 /*
25 * This is the code for DB->verify, the DB database consistency checker.
26 * For now, it checks all subdatabases in a database, and verifies
27 * everything it knows how to (i.e. it's all-or-nothing, and one can't
28 * check only for a subset of possible problems).
29 */
30
31 static u_int __db_guesspgsize __P((ENV *, DB_FH *));
32 static int __db_is_valid_magicno __P((u_int32_t, DBTYPE *));
33 static int __db_meta2pgset
34 __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *));
35 static int __db_salvage __P((DB *, VRFY_DBINFO *,
36 db_pgno_t, void *, int (*)(void *, const void *), u_int32_t));
37 static int __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *,
38 PAGE *, void *, int (*)(void *, const void *), u_int32_t));
39 static int __db_salvage_all __P((DB *, VRFY_DBINFO *, void *,
40 int(*)(void *, const void *), u_int32_t, int *));
41 static int __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *,
42 int (*)(void *, const void *), u_int32_t));
43 static int __db_verify_arg __P((DB *, const char *, void *, u_int32_t));
44 static int __db_vrfy_freelist
45 __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
46 static int __db_vrfy_getpagezero
47 __P((DB *, DB_FH *, const char *, u_int8_t *, u_int32_t));
48 static int __db_vrfy_invalid
49 __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
50 static int __db_vrfy_orderchkonly __P((DB *,
51 VRFY_DBINFO *, const char *, const char *, u_int32_t));
52 static int __db_vrfy_pagezero __P((DB *,
53 VRFY_DBINFO *, DB_FH *, const char *, u_int32_t));
54 static int __db_vrfy_subdbs
55 __P((DB *, VRFY_DBINFO *, const char *, u_int32_t));
56 static int __db_vrfy_structure __P((DB *, VRFY_DBINFO *,
57 const char *, db_pgno_t, void *, void *, u_int32_t));
58 static int __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *,
59 void *, int (*)(void *, const void *), u_int32_t));
60
61 #define VERIFY_FLAGS \
62 (DB_AGGRESSIVE | \
63 DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF)
64
65 /*
66 * __db_verify_pp --
67 * DB->verify public interface.
68 *
69 * PUBLIC: int __db_verify_pp
70 * PUBLIC: __P((DB *, const char *, const char *, FILE *, u_int32_t));
71 */
72 int
__db_verify_pp(dbp,file,database,outfile,flags)73 __db_verify_pp(dbp, file, database, outfile, flags)
74 DB *dbp;
75 const char *file, *database;
76 FILE *outfile;
77 u_int32_t flags;
78 {
79 /*
80 * __db_verify_pp is a wrapper to __db_verify_internal, which lets
81 * us pass appropriate equivalents to FILE * in from the non-C APIs.
82 * That's why the usual ENV_ENTER macros are in __db_verify_internal,
83 * not here.
84 */
85 return (__db_verify_internal(dbp,
86 file, database, outfile, __db_pr_callback, flags));
87 }
88
89 /*
90 * __db_verify_internal --
91 *
92 * PUBLIC: int __db_verify_internal __P((DB *, const char *,
93 * PUBLIC: const char *, void *, int (*)(void *, const void *), u_int32_t));
94 */
95 int
__db_verify_internal(dbp,fname,dname,handle,callback,flags)96 __db_verify_internal(dbp, fname, dname, handle, callback, flags)
97 DB *dbp;
98 const char *fname, *dname;
99 void *handle;
100 int (*callback) __P((void *, const void *));
101 u_int32_t flags;
102 {
103 DB_THREAD_INFO *ip;
104 ENV *env;
105 int ret, t_ret;
106
107 env = dbp->env;
108
109 DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify");
110
111 if (!LF_ISSET(DB_SALVAGE))
112 LF_SET(DB_UNREF);
113
114 ENV_ENTER(env, ip);
115
116 if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0)
117 ret = __db_verify(dbp, ip,
118 fname, dname, handle, callback, NULL, NULL, flags);
119
120 /* Db.verify is a DB handle destructor. */
121 if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
122 ret = t_ret;
123
124 ENV_LEAVE(env, ip);
125 return (ret);
126 }
127
128 /*
129 * __db_verify_arg --
130 * Check DB->verify arguments.
131 */
132 static int
__db_verify_arg(dbp,dname,handle,flags)133 __db_verify_arg(dbp, dname, handle, flags)
134 DB *dbp;
135 const char *dname;
136 void *handle;
137 u_int32_t flags;
138 {
139 ENV *env;
140 int ret;
141
142 env = dbp->env;
143
144 if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0)
145 return (ret);
146
147 /*
148 * DB_SALVAGE is mutually exclusive with the other flags except
149 * DB_AGGRESSIVE, DB_PRINTABLE.
150 *
151 * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging.
152 *
153 * DB_SALVAGE requires an output stream.
154 */
155 if (LF_ISSET(DB_SALVAGE)) {
156 if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE)))
157 return (__db_ferr(env, "DB->verify", 1));
158 if (handle == NULL) {
159 __db_errx(env, DB_STR("0518",
160 "DB_SALVAGE requires a an output handle"));
161 return (EINVAL);
162 }
163 } else
164 if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE))
165 return (__db_ferr(env, "DB->verify", 1));
166
167 /*
168 * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and
169 * DB_NOORDERCHK, and requires a database name.
170 */
171 if ((ret = __db_fcchk(env, "DB->verify", flags,
172 DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0)
173 return (ret);
174 if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) {
175 __db_errx(env, DB_STR("0519",
176 "DB_ORDERCHKONLY requires a database name"));
177 return (EINVAL);
178 }
179 return (0);
180 }
181
182 /*
183 * __db_verify --
184 * Walk the entire file page-by-page, either verifying with or without
185 * dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data
186 * pairs can be found and dumping them in standard (db_load-ready)
187 * dump format.
188 *
189 * (Salvaging isn't really a verification operation, but we put it
190 * here anyway because it requires essentially identical top-level
191 * code.)
192 *
193 * flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE
194 * (and optionally DB_AGGRESSIVE).
195 * PUBLIC: int __db_verify __P((DB *, DB_THREAD_INFO *, const char *,
196 * PUBLIC: const char *, void *, int (*)(void *, const void *),
197 * PUBLIC: void *, void *, u_int32_t));
198 */
199 int
__db_verify(dbp,ip,name,subdb,handle,callback,lp,rp,flags)200 __db_verify(dbp, ip, name, subdb, handle, callback, lp, rp, flags)
201 DB *dbp;
202 DB_THREAD_INFO *ip;
203 const char *name, *subdb;
204 void *handle;
205 int (*callback) __P((void *, const void *));
206 void *lp, *rp;
207 u_int32_t flags;
208 {
209 DB_FH *fhp;
210 ENV *env;
211 VRFY_DBINFO *vdp;
212 u_int32_t sflags;
213 int has_subdbs, isbad, ret, t_ret;
214 char *real_name;
215
216 env = dbp->env;
217 fhp = NULL;
218 vdp = NULL;
219 real_name = NULL;
220 has_subdbs = isbad = ret = t_ret = 0;
221
222 F_SET(dbp, DB_AM_VERIFYING);
223
224 /* Initialize any feedback function. */
225 if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
226 dbp->db_feedback(dbp, DB_VERIFY, 0);
227
228 /*
229 * We don't know how large the cache is, and if the database
230 * in question uses a small page size--which we don't know
231 * yet!--it may be uncomfortably small for the default page
232 * size [#2143]. However, the things we need temporary
233 * databases for in dbinfo are largely tiny, so using a
234 * 1024-byte pagesize is probably not going to be a big hit,
235 * and will make us fit better into small spaces.
236 */
237 if ((ret = __db_vrfy_dbinfo_create(env, ip, 1024, &vdp)) != 0)
238 goto err;
239
240 /*
241 * Note whether the user has requested that we use printable
242 * chars where possible. We won't get here with this flag if
243 * we're not salvaging.
244 */
245 if (LF_ISSET(DB_PRINTABLE))
246 F_SET(vdp, SALVAGE_PRINTABLE);
247
248 if (name != NULL) {
249 /* Find the real name of the file. */
250 if ((ret = __db_appname(env,
251 DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0)
252 goto err;
253
254 /*
255 * Our first order of business is to verify page 0, which is the
256 * metadata page for the master database of subdatabases or of
257 * the only database in the file. We want to do this by hand
258 * rather than just calling __db_open in case it's
259 * corrupt--various things in __db_open might act funny.
260 *
261 * Once we know the metadata page is healthy, I believe that
262 * it's safe to open the database normally and then use the page
263 * swapping code, which makes life easier.
264 */
265 if ((ret = __os_open(env,
266 real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0)
267 goto err;
268 } else {
269 MAKE_INMEM(dbp);
270 }
271
272 /* Verify the metadata page 0; set pagesize and type. */
273 if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, subdb, flags)) != 0) {
274 if (ret == DB_VERIFY_BAD)
275 isbad = 1;
276 else
277 goto err;
278 }
279
280 /*
281 * We can assume at this point that dbp->pagesize and dbp->type are
282 * set correctly, or at least as well as they can be, and that
283 * locking, logging, and txns are not in use. Thus we can trust
284 * the memp code not to look at the page, and thus to be safe
285 * enough to use.
286 *
287 * The dbp is not open, but the file is open in the fhp, and we
288 * cannot assume that __db_open is safe. Call __env_setup,
289 * the [safe] part of __db_open that initializes the environment--
290 * and the mpool--manually.
291 */
292 if ((ret = __env_setup(dbp, NULL,
293 name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0)
294 goto err;
295
296 /*
297 * Set our name in the Queue subsystem; we may need it later
298 * to deal with extents. In-memory databases are not allowed to have
299 * extents.
300 */
301 if (dbp->type == DB_QUEUE && name != NULL &&
302 (ret = __qam_set_ext_data(dbp, name)) != 0)
303 goto err;
304
305 /* Mark the dbp as opened, so that we correctly handle its close. */
306 F_SET(dbp, DB_AM_OPEN_CALLED);
307
308 /*
309 * Find out the page number of the last page in the database. We'll
310 * use this later to verify the metadata page. We don't verify now
311 * because the data from __db_vrfy_pagezero could be stale.
312 */
313 if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0)
314 goto err;
315 /*
316 * DB_ORDERCHKONLY is a special case; our file consists of
317 * several subdatabases, which use different hash, bt_compare,
318 * and/or dup_compare functions. Consequently, we couldn't verify
319 * sorting and hashing simply by calling DB->verify() on the file.
320 * DB_ORDERCHKONLY allows us to come back and check those things; it
321 * requires a subdatabase, and assumes that everything but that
322 * database's sorting/hashing is correct.
323 */
324 if (LF_ISSET(DB_ORDERCHKONLY)) {
325 ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags);
326 goto done;
327 }
328
329 sflags = flags;
330 if (dbp->p_internal != NULL)
331 LF_CLR(DB_SALVAGE);
332
333 /*
334 * When salvaging, we use a db to keep track of whether we've seen a
335 * given overflow or dup page in the course of traversing normal data.
336 * If in the end we have not, we assume its key got lost and print it
337 * with key "UNKNOWN".
338 */
339 if (LF_ISSET(DB_SALVAGE)) {
340 if ((ret = __db_salvage_init(vdp)) != 0)
341 goto err;
342
343 /*
344 * If we're not being aggressive, salvage by walking the tree
345 * and only printing the leaves we find. "has_subdbs" will
346 * indicate whether we found subdatabases.
347 */
348 if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_all(
349 dbp, vdp, handle, callback, flags, &has_subdbs) != 0)
350 isbad = 1;
351
352 /*
353 * If we have subdatabases, flag if any keys are found that
354 * don't belong to a subdatabase -- they'll need to have an
355 * "__OTHER__" subdatabase header printed first.
356 */
357 if (has_subdbs) {
358 F_SET(vdp, SALVAGE_PRINTHEADER);
359 F_SET(vdp, SALVAGE_HASSUBDBS);
360 }
361 }
362
363 /* Walk all the pages, if a page cannot be read, verify structure. */
364 if ((ret =
365 __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) {
366 if (ret == DB_VERIFY_BAD)
367 isbad = 1;
368 else if (ret != DB_PAGE_NOTFOUND)
369 goto err;
370 }
371
372 /* If we're verifying, verify inter-page structure. */
373 if (!LF_ISSET(DB_SALVAGE) && isbad == 0)
374 if ((t_ret = __db_vrfy_structure(dbp,
375 vdp, name, 0, lp, rp, flags)) != 0) {
376 if (t_ret == DB_VERIFY_BAD)
377 isbad = 1;
378 else
379 goto err;
380 }
381
382 /*
383 * If we're salvaging, output with key UNKNOWN any overflow or dup pages
384 * we haven't been able to put in context. Then destroy the salvager's
385 * state-saving database.
386 */
387 if (LF_ISSET(DB_SALVAGE)) {
388 if ((ret = __db_salvage_unknowns(dbp,
389 vdp, handle, callback, flags)) != 0)
390 isbad = 1;
391 }
392
393 flags = sflags;
394
395 #ifdef HAVE_PARTITION
396 if (t_ret == 0 && dbp->p_internal != NULL)
397 t_ret = __part_verify(dbp, vdp, name, handle, callback, flags);
398 #endif
399
400 if (ret == 0)
401 ret = t_ret;
402
403 /* Don't display a footer for a database holding other databases. */
404 if (LF_ISSET(DB_SALVAGE | DB_VERIFY_PARTITION) == DB_SALVAGE &&
405 (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER)))
406 (void)__db_prfooter(handle, callback);
407
408 done: err:
409 /* Send feedback that we're done. */
410 if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
411 dbp->db_feedback(dbp, DB_VERIFY, 100);
412
413 if (LF_ISSET(DB_SALVAGE) &&
414 (t_ret = __db_salvage_destroy(vdp)) != 0 && ret == 0)
415 ret = t_ret;
416 if (fhp != NULL &&
417 (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
418 ret = t_ret;
419 if (vdp != NULL &&
420 (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0)
421 ret = t_ret;
422 if (real_name != NULL)
423 __os_free(env, real_name);
424
425 /*
426 * DB_VERIFY_FATAL is a private error, translate to a public one.
427 *
428 * If we didn't find a page, it's probably a page number was corrupted.
429 * Return the standard corruption error.
430 *
431 * Otherwise, if we found corruption along the way, set the return.
432 */
433 if (ret == DB_VERIFY_FATAL ||
434 ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1))
435 ret = DB_VERIFY_BAD;
436
437 /* Make sure there's a public complaint if we found corruption. */
438 if (ret != 0)
439 __db_err(env, ret, "%s", name);
440
441 return (ret);
442 }
443
444 /*
445 * __db_vrfy_getpagezero --
446 * Store the master metadata page into a local buffer. For safety, skip
447 * the DB paging code and read the page directly from disk (via seek and
448 * read) or the mpool.
449 */
450 static int
__db_vrfy_getpagezero(dbp,fhp,name,mbuf,flags)451 __db_vrfy_getpagezero(dbp, fhp, name, mbuf, flags)
452 DB *dbp;
453 DB_FH *fhp;
454 const char *name;
455 u_int8_t *mbuf;
456 u_int32_t flags;
457 {
458 DB_MPOOLFILE *mpf;
459 ENV *env;
460 PAGE *h;
461 db_pgno_t pgno;
462 int ret, t_ret;
463 size_t nr;
464
465 env = dbp->env;
466
467 if (F_ISSET(dbp, DB_AM_INMEM)) {
468 /*
469 * Now get the metadata page from the cache, if possible. If
470 * we're verifying an in-memory db, this is the only metadata
471 * page we have.
472 *
473 *
474 * Open the in-memory db file and get the metadata page.
475 */
476 if ((ret = __memp_fcreate_pp(env->dbenv, &mpf, DB_VERIFY)) != 0)
477 return (ret);
478 if ((ret = __memp_set_flags(mpf, DB_MPOOL_NOFILE, 1)) != 0)
479 goto mpf_err;
480 if ((ret = __memp_fopen_pp(mpf,
481 name, DB_ODDFILESIZE | DB_RDONLY, 0, 0)) != 0)
482 goto mpf_err;
483 pgno = PGNO_BASE_MD;
484 if ((ret = __memp_fget_pp(mpf, &pgno, NULL, 0, &h)) != 0) {
485 __db_err(env, ret, DB_STR_A("0747",
486 "Metadata page %lu cannot be read from mpool",
487 "%lu"), (u_long)pgno);
488 goto mpf_err;
489 }
490 memcpy(mbuf, (u_int8_t *)h, DBMETASIZE);
491 ret = __memp_fput_pp(mpf, h, DB_PRIORITY_UNCHANGED, 0);
492 mpf_err: if ((t_ret = __memp_fclose_pp(mpf, 0)) != 0 || ret != 0) {
493 return (ret == 0 ? t_ret : ret);
494 }
495 } else {
496 /*
497 * Seek to the metadata page.
498 *
499 * Note that if we're just starting a verification, dbp->pgsize
500 * may be zero; this is okay, as we want page zero anyway and
501 * 0*0 == 0.
502 */
503 if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 ||
504 (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) {
505 __db_err(env, ret, DB_STR_A("0520",
506 "Metadata page %lu cannot be read", "%lu"),
507 (u_long)PGNO_BASE_MD);
508 return (ret);
509 }
510
511 if (nr != DBMETASIZE) {
512 EPRINT((env, DB_STR_A("0521",
513 "Page %lu: Incomplete metadata page", "%lu"),
514 (u_long)PGNO_BASE_MD));
515 return (DB_VERIFY_FATAL);
516 }
517 }
518
519 return (ret);
520 }
521
522 /*
523 * __db_vrfy_pagezero --
524 * Verify the master metadata page. Use seek, read, and a local buffer
525 * rather than the DB paging code, for safety.
526 *
527 * Must correctly (or best-guess) set dbp->type and dbp->pagesize.
528 */
529 static int
__db_vrfy_pagezero(dbp,vdp,fhp,name,flags)530 __db_vrfy_pagezero(dbp, vdp, fhp, name, flags)
531 DB *dbp;
532 VRFY_DBINFO *vdp;
533 DB_FH *fhp;
534 const char *name;
535 u_int32_t flags;
536 {
537 DBMETA *meta;
538 ENV *env;
539 VRFY_PAGEINFO *pip;
540 db_pgno_t freelist;
541 int isbad, ret, swapped;
542 u_int8_t mbuf[DBMETASIZE];
543
544 isbad = ret = swapped = 0;
545 freelist = 0;
546 env = dbp->env;
547 meta = (DBMETA *)mbuf;
548 dbp->type = DB_UNKNOWN;
549
550 if ((ret = __db_vrfy_getpagezero(dbp, fhp, name, mbuf, flags)) != 0)
551 return (ret);
552
553 if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
554 return (ret);
555
556 if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
557 EPRINT((env, DB_STR_A("0522",
558 "Page %lu: metadata page corrupted", "%lu"),
559 (u_long)PGNO_BASE_MD));
560 isbad = 1;
561 if (ret != DB_CHKSUM_FAIL) {
562 EPRINT((env, DB_STR_A("0523",
563 "Page %lu: could not check metadata page", "%lu"),
564 (u_long)PGNO_BASE_MD));
565 return (DB_VERIFY_FATAL);
566 }
567 }
568
569 /*
570 * Check all of the fields that we can.
571 *
572 * 08-11: Current page number. Must == pgno.
573 * Note that endianness doesn't matter--it's zero.
574 */
575 if (meta->pgno != PGNO_BASE_MD) {
576 isbad = 1;
577 EPRINT((env, DB_STR_A("0524",
578 "Page %lu: pgno incorrectly set to %lu", "%lu %lu"),
579 (u_long)PGNO_BASE_MD, (u_long)meta->pgno));
580 }
581
582 /* 12-15: Magic number. Must be one of valid set. */
583 if (__db_is_valid_magicno(meta->magic, &dbp->type))
584 swapped = 0;
585 else {
586 M_32_SWAP(meta->magic);
587 if (__db_is_valid_magicno(meta->magic,
588 &dbp->type))
589 swapped = 1;
590 else {
591 isbad = 1;
592 EPRINT((env, DB_STR_A("0525",
593 "Page %lu: bad magic number %lu", "%lu %lu"),
594 (u_long)PGNO_BASE_MD, (u_long)meta->magic));
595 }
596 }
597
598 /*
599 * 16-19: Version. Must be current; for now, we
600 * don't support verification of old versions.
601 */
602 if (swapped)
603 M_32_SWAP(meta->version);
604 if ((dbp->type == DB_BTREE &&
605 (meta->version > DB_BTREEVERSION ||
606 meta->version < DB_BTREEOLDVER)) ||
607 (dbp->type == DB_HASH &&
608 (meta->version > DB_HASHVERSION ||
609 meta->version < DB_HASHOLDVER)) ||
610 (dbp->type == DB_HEAP &&
611 (meta->version > DB_HEAPVERSION ||
612 meta->version < DB_HEAPOLDVER)) ||
613 (dbp->type == DB_QUEUE &&
614 (meta->version > DB_QAMVERSION ||
615 meta->version < DB_QAMOLDVER))) {
616 isbad = 1;
617 EPRINT((env, DB_STR_A("0526",
618 "Page %lu: unsupported DB version %lu; extraneous errors may result",
619 "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->version));
620 }
621
622 /*
623 * 20-23: Pagesize. Must be power of two,
624 * greater than 512, and less than 64K.
625 */
626 if (swapped)
627 M_32_SWAP(meta->pagesize);
628 if (IS_VALID_PAGESIZE(meta->pagesize))
629 dbp->pgsize = meta->pagesize;
630 else {
631 isbad = 1;
632 EPRINT((env, DB_STR_A("0527", "Page %lu: bad page size %lu",
633 "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->pagesize));
634
635 /*
636 * Now try to settle on a pagesize to use.
637 * If the user-supplied one is reasonable,
638 * use it; else, guess.
639 */
640 if (!IS_VALID_PAGESIZE(dbp->pgsize))
641 dbp->pgsize = __db_guesspgsize(env, fhp);
642 }
643
644 /*
645 * 25: Page type. Must be correct for dbp->type,
646 * which is by now set as well as it can be.
647 */
648 /* Needs no swapping--only one byte! */
649 if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) ||
650 (dbp->type == DB_HASH && meta->type != P_HASHMETA) ||
651 (dbp->type == DB_HEAP && meta->type != P_HEAPMETA) ||
652 (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) {
653 isbad = 1;
654 EPRINT((env, DB_STR_A("0528", "Page %lu: bad page type %lu",
655 "%lu %lu"), (u_long)PGNO_BASE_MD, (u_long)meta->type));
656 }
657
658 /*
659 * 26: Meta-flags.
660 */
661 if (meta->metaflags != 0) {
662 if (FLD_ISSET(meta->metaflags,
663 ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
664 isbad = 1;
665 EPRINT((env, DB_STR_A("0529",
666 "Page %lu: bad meta-data flags value %#lx",
667 "%lu %#lx"), (u_long)PGNO_BASE_MD,
668 (u_long)meta->metaflags));
669 }
670 if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
671 F_SET(pip, VRFY_HAS_CHKSUM);
672 if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
673 F_SET(pip, VRFY_HAS_PART_RANGE);
674 if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
675 F_SET(pip, VRFY_HAS_PART_CALLBACK);
676
677 if (FLD_ISSET(meta->metaflags,
678 DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) &&
679 (ret = __partition_init(dbp, meta->metaflags)) != 0)
680 return (ret);
681 }
682
683 /*
684 * 28-31: Free list page number.
685 * 32-35: Last page in database file.
686 * We'll verify last_pgno once we open the db in the mpool;
687 * for now, just store it.
688 */
689 if (swapped)
690 M_32_SWAP(meta->free);
691 freelist = meta->free;
692 if (swapped)
693 M_32_SWAP(meta->last_pgno);
694 vdp->meta_last_pgno = meta->last_pgno;
695
696 /*
697 * Initialize vdp->pages to fit a single pageinfo structure for
698 * this one page. We'll realloc later when we know how many
699 * pages there are.
700 */
701 pip->pgno = PGNO_BASE_MD;
702 pip->type = meta->type;
703
704 /*
705 * Signal that we still have to check the info specific to
706 * a given type of meta page.
707 */
708 F_SET(pip, VRFY_INCOMPLETE);
709
710 pip->free = freelist;
711
712 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
713 return (ret);
714
715 /* Set up the dbp's fileid. We don't use the regular open path. */
716 memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN);
717 dbp->preserve_fid = 1;
718
719 if (swapped == 1)
720 F_SET(dbp, DB_AM_SWAP);
721
722 return (isbad ? DB_VERIFY_BAD : 0);
723 }
724
725 /*
726 * __db_vrfy_walkpages --
727 * Main loop of the verifier/salvager. Walks through,
728 * page by page, and verifies all pages and/or prints all data pages.
729 */
730 static int
__db_vrfy_walkpages(dbp,vdp,handle,callback,flags)731 __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)
732 DB *dbp;
733 VRFY_DBINFO *vdp;
734 void *handle;
735 int (*callback) __P((void *, const void *));
736 u_int32_t flags;
737 {
738 DB_MPOOLFILE *mpf;
739 ENV *env;
740 PAGE *h;
741 VRFY_PAGEINFO *pip;
742 db_pgno_t i;
743 int ret, t_ret, isbad;
744
745 env = dbp->env;
746 mpf = dbp->mpf;
747 h = NULL;
748 ret = isbad = t_ret = 0;
749
750 for (i = 0; i <= vdp->last_pgno; i++) {
751 /*
752 * If DB_SALVAGE is set, we inspect our database of completed
753 * pages, and skip any we've already printed in the subdb pass.
754 */
755 if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0))
756 continue;
757
758 /*
759 * An individual page get can fail if:
760 * * This is a hash database, it is expected to find
761 * empty buckets, which don't have allocated pages. Create
762 * a dummy page so the verification can proceed.
763 * * We are salvaging, flag the error and continue.
764 */
765 if ((t_ret = __memp_fget(mpf, &i,
766 vdp->thread_info, NULL, 0, &h)) != 0) {
767 if (dbp->type == DB_HASH ||
768 (dbp->type == DB_QUEUE &&
769 F_ISSET(dbp, DB_AM_INMEM))) {
770 if ((t_ret =
771 __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
772 goto err1;
773 pip->type = P_INVALID;
774 pip->pgno = i;
775 F_CLR(pip, VRFY_IS_ALLZEROES);
776 F_SET(pip, VRFY_NONEXISTENT);
777 if ((t_ret = __db_vrfy_putpageinfo(
778 env, vdp, pip)) != 0)
779 goto err1;
780 continue;
781 }
782 if (t_ret == DB_PAGE_NOTFOUND) {
783 EPRINT((env, DB_STR_A("0530",
784 "Page %lu: beyond the end of the file, metadata page has last page as %lu",
785 "%lu %lu"), (u_long)i,
786 (u_long)vdp->last_pgno));
787 if (ret == 0)
788 return (t_ret);
789 }
790
791 err1: if (ret == 0)
792 ret = t_ret;
793 if (LF_ISSET(DB_SALVAGE))
794 continue;
795 return (ret);
796 }
797
798 if (LF_ISSET(DB_SALVAGE)) {
799 /*
800 * We pretty much don't want to quit unless a
801 * bomb hits. May as well return that something
802 * was screwy, however.
803 */
804 if ((t_ret = __db_salvage_pg(dbp,
805 vdp, i, h, handle, callback, flags)) != 0) {
806 if (ret == 0)
807 ret = t_ret;
808 isbad = 1;
809 }
810 } else {
811 /*
812 * If we are not salvaging, and we get any error
813 * other than DB_VERIFY_BAD, return immediately;
814 * it may not be safe to proceed. If we get
815 * DB_VERIFY_BAD, keep going; listing more errors
816 * may make it easier to diagnose problems and
817 * determine the magnitude of the corruption.
818 *
819 * Verify info common to all page types.
820 */
821 if (i != PGNO_BASE_MD) {
822 ret = __db_vrfy_common(dbp, vdp, h, i, flags);
823 if (ret == DB_VERIFY_BAD)
824 isbad = 1;
825 else if (ret != 0)
826 goto err;
827 }
828
829 switch (TYPE(h)) {
830 case P_INVALID:
831 ret = __db_vrfy_invalid(dbp, vdp, h, i, flags);
832 break;
833 case __P_DUPLICATE:
834 isbad = 1;
835 EPRINT((env, DB_STR_A("0531",
836 "Page %lu: old-style duplicate page",
837 "%lu"), (u_long)i));
838 break;
839 case P_HASH_UNSORTED:
840 case P_HASH:
841 ret = __ham_vrfy(dbp, vdp, h, i, flags);
842 break;
843 case P_HEAP:
844 case P_IHEAP:
845 ret = __heap_vrfy(dbp, vdp, h, i, flags);
846 break;
847 case P_IBTREE:
848 case P_IRECNO:
849 case P_LBTREE:
850 case P_LDUP:
851 ret = __bam_vrfy(dbp, vdp, h, i, flags);
852 break;
853 case P_LRECNO:
854 ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags);
855 break;
856 case P_OVERFLOW:
857 ret = __db_vrfy_overflow(dbp, vdp, h, i, flags);
858 break;
859 case P_HASHMETA:
860 ret = __ham_vrfy_meta(dbp,
861 vdp, (HMETA *)h, i, flags);
862 break;
863 case P_HEAPMETA:
864 ret = __heap_vrfy_meta(dbp,
865 vdp, (HEAPMETA *)h, i, flags);
866 break;
867 case P_BTREEMETA:
868 ret = __bam_vrfy_meta(dbp,
869 vdp, (BTMETA *)h, i, flags);
870 break;
871 case P_QAMMETA:
872 ret = __qam_vrfy_meta(dbp,
873 vdp, (QMETA *)h, i, flags);
874 break;
875 case P_QAMDATA:
876 ret = __qam_vrfy_data(dbp,
877 vdp, (QPAGE *)h, i, flags);
878 break;
879 default:
880 EPRINT((env, DB_STR_A("0532",
881 "Page %lu: unknown page type %lu",
882 "%lu %lu"), (u_long)i, (u_long)TYPE(h)));
883 isbad = 1;
884 break;
885 }
886
887 /*
888 * Set up error return.
889 */
890 if (ret == DB_VERIFY_BAD)
891 isbad = 1;
892 else if (ret != 0)
893 goto err;
894
895 /*
896 * Provide feedback to the application about our
897 * progress. The range 0-50% comes from the fact
898 * that this is the first of two passes through the
899 * database (front-to-back, then top-to-bottom).
900 */
901 if (dbp->db_feedback != NULL)
902 dbp->db_feedback(dbp, DB_VERIFY,
903 (int)((i + 1) * 50 / (vdp->last_pgno + 1)));
904 }
905
906 /*
907 * Just as with the page get, bail if and only if we're
908 * not salvaging.
909 */
910 if ((t_ret = __memp_fput(mpf,
911 vdp->thread_info, h, dbp->priority)) != 0) {
912 if (ret == 0)
913 ret = t_ret;
914 if (!LF_ISSET(DB_SALVAGE))
915 return (ret);
916 }
917 }
918
919 /*
920 * If we've seen a Queue metadata page, we may need to walk Queue
921 * extent pages that won't show up between 0 and vdp->last_pgno.
922 */
923 if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
924 __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
925 if (ret == 0)
926 ret = t_ret;
927 if (t_ret == DB_VERIFY_BAD)
928 isbad = 1;
929 else if (!LF_ISSET(DB_SALVAGE))
930 return (ret);
931 }
932
933 if (0) {
934 err: if (h != NULL && (t_ret = __memp_fput(mpf,
935 vdp->thread_info, h, dbp->priority)) != 0)
936 return (ret == 0 ? t_ret : ret);
937 }
938
939 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
940 }
941
942 /*
943 * __db_vrfy_structure--
944 * After a beginning-to-end walk through the database has been
945 * completed, put together the information that has been collected
946 * to verify the overall database structure.
947 *
948 * Should only be called if we want to do a database verification,
949 * i.e. if DB_SALVAGE is not set.
950 */
951 static int
__db_vrfy_structure(dbp,vdp,dbname,meta_pgno,lp,rp,flags)952 __db_vrfy_structure(dbp, vdp, dbname, meta_pgno, lp, rp, flags)
953 DB *dbp;
954 VRFY_DBINFO *vdp;
955 const char *dbname;
956 db_pgno_t meta_pgno;
957 void *lp, *rp;
958 u_int32_t flags;
959 {
960 DB *pgset;
961 ENV *env;
962 VRFY_PAGEINFO *pip;
963 db_pgno_t i;
964 int ret, isbad, hassubs, p;
965
966 isbad = 0;
967 pip = NULL;
968 env = dbp->env;
969 pgset = vdp->pgset;
970
971 /*
972 * Providing feedback here is tricky; in most situations,
973 * we fetch each page one more time, but we do so in a top-down
974 * order that depends on the access method. Worse, we do this
975 * recursively in btree, such that on any call where we're traversing
976 * a subtree we don't know where that subtree is in the whole database;
977 * worse still, any given database may be one of several subdbs.
978 *
979 * The solution is to decrement a counter vdp->pgs_remaining each time
980 * we verify (and call feedback on) a page. We may over- or
981 * under-count, but the structure feedback function will ensure that we
982 * never give a percentage under 50 or over 100. (The first pass
983 * covered the range 0-50%.)
984 */
985 if (dbp->db_feedback != NULL)
986 vdp->pgs_remaining = vdp->last_pgno + 1;
987
988 /*
989 * Call the appropriate function to downwards-traverse the db type.
990 */
991 switch (dbp->type) {
992 case DB_BTREE:
993 case DB_RECNO:
994 if ((ret =
995 __bam_vrfy_structure(dbp, vdp, 0, lp, rp, flags)) != 0) {
996 if (ret == DB_VERIFY_BAD)
997 isbad = 1;
998 else
999 goto err;
1000 }
1001
1002 /*
1003 * If we have subdatabases and we know that the database is,
1004 * thus far, sound, it's safe to walk the tree of subdatabases.
1005 * Do so, and verify the structure of the databases within.
1006 */
1007 if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0)
1008 goto err;
1009 hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0;
1010 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1011 goto err;
1012 pip = NULL;
1013
1014 if (isbad == 0 && hassubs)
1015 if ((ret =
1016 __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) {
1017 if (ret == DB_VERIFY_BAD)
1018 isbad = 1;
1019 else
1020 goto err;
1021 }
1022 break;
1023 case DB_HASH:
1024 if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
1025 if (ret == DB_VERIFY_BAD)
1026 isbad = 1;
1027 else
1028 goto err;
1029 }
1030 break;
1031 case DB_HEAP:
1032 if ((ret = __heap_vrfy_structure(dbp, vdp, flags)) != 0) {
1033 if (ret == DB_VERIFY_BAD)
1034 isbad = 1;
1035 }
1036 /* Skip the freelist check for heap, it doesn't apply. */
1037 goto err;
1038 case DB_QUEUE:
1039 if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) {
1040 if (ret == DB_VERIFY_BAD)
1041 isbad = 1;
1042 }
1043
1044 /*
1045 * Queue pages may be unreferenced and totally zeroed, if
1046 * they're empty; queue doesn't have much structure, so
1047 * this is unlikely to be wrong in any troublesome sense.
1048 * Skip to "err".
1049 */
1050 goto err;
1051 case DB_UNKNOWN:
1052 default:
1053 ret = __db_unknown_path(env, "__db_vrfy_structure");
1054 goto err;
1055 }
1056
1057 /* Walk free list. */
1058 if ((ret =
1059 __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD)
1060 isbad = 1;
1061
1062 /*
1063 * If structure checks up until now have failed, it's likely that
1064 * checking what pages have been missed will result in oodles of
1065 * extraneous error messages being EPRINTed. Skip to the end
1066 * if this is the case; we're going to be printing at least one
1067 * error anyway, and probably all the more salient ones.
1068 */
1069 if (ret != 0 || isbad == 1)
1070 goto err;
1071
1072 /*
1073 * Make sure no page has been missed and that no page is still marked
1074 * "all zeroes" unless we are looking at unused hash bucket pages or
1075 * pagesoff the end of database.
1076 */
1077 for (i = 0; i < vdp->last_pgno + 1; i++) {
1078 if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
1079 goto err;
1080 if ((ret = __db_vrfy_pgset_get(pgset,
1081 vdp->thread_info, vdp->txn, i, &p)) != 0)
1082 goto err;
1083 if (pip->type == P_OVERFLOW) {
1084 if ((u_int32_t)p != pip->refcount) {
1085 EPRINT((env, DB_STR_A("0533",
1086 "Page %lu: overflow refcount %lu, referenced %lu times",
1087 "%lu %lu %lu"), (u_long)i,
1088 (u_long)pip->refcount, (u_long)p));
1089 isbad = 1;
1090 }
1091 } else if (p == 0 &&
1092 #ifndef HAVE_FTRUNCATE
1093 !(i > vdp->meta_last_pgno &&
1094 (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) &&
1095 #endif
1096 !(dbp->type == DB_HASH &&
1097 (pip->type == P_HASH || pip->type == P_INVALID))) {
1098 /*
1099 * It is OK for unreferenced hash buckets to be
1100 * marked invalid and unreferenced.
1101 */
1102 EPRINT((env, DB_STR_A("0534",
1103 "Page %lu: unreferenced page", "%lu"), (u_long)i));
1104 isbad = 1;
1105 }
1106
1107 if (F_ISSET(pip, VRFY_IS_ALLZEROES)
1108 #ifndef HAVE_FTRUNCATE
1109 && i <= vdp->meta_last_pgno
1110 #endif
1111 ) {
1112 EPRINT((env, DB_STR_A("0535",
1113 "Page %lu: totally zeroed page", "%lu"),
1114 (u_long)i));
1115 isbad = 1;
1116 }
1117 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1118 goto err;
1119 pip = NULL;
1120 }
1121
1122 err: if (pip != NULL)
1123 (void)__db_vrfy_putpageinfo(env, vdp, pip);
1124
1125 return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
1126 }
1127
1128 /*
1129 * __db_is_valid_magicno
1130 */
1131 static int
__db_is_valid_magicno(magic,typep)1132 __db_is_valid_magicno(magic, typep)
1133 u_int32_t magic;
1134 DBTYPE *typep;
1135 {
1136 switch (magic) {
1137 case DB_BTREEMAGIC:
1138 *typep = DB_BTREE;
1139 return (1);
1140 case DB_HASHMAGIC:
1141 *typep = DB_HASH;
1142 return (1);
1143 case DB_HEAPMAGIC:
1144 *typep = DB_HEAP;
1145 return (1);
1146 case DB_QAMMAGIC:
1147 *typep = DB_QUEUE;
1148 return (1);
1149 default:
1150 break;
1151 }
1152 *typep = DB_UNKNOWN;
1153 return (0);
1154 }
1155
1156 /*
1157 * __db_vrfy_common --
1158 * Verify info common to all page types.
1159 *
1160 * PUBLIC: int __db_vrfy_common
1161 * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
1162 */
1163 int
__db_vrfy_common(dbp,vdp,h,pgno,flags)1164 __db_vrfy_common(dbp, vdp, h, pgno, flags)
1165 DB *dbp;
1166 VRFY_DBINFO *vdp;
1167 PAGE *h;
1168 db_pgno_t pgno;
1169 u_int32_t flags;
1170 {
1171 ENV *env;
1172 VRFY_PAGEINFO *pip;
1173 int ret, t_ret;
1174 u_int8_t *p;
1175
1176 env = dbp->env;
1177
1178 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1179 return (ret);
1180
1181 pip->pgno = pgno;
1182 F_CLR(pip, VRFY_IS_ALLZEROES);
1183
1184 /*
1185 * Hash expands the table by leaving some pages between the
1186 * old last and the new last totally zeroed. These pages may
1187 * not be all zero if they were used, freed and then reallocated.
1188 *
1189 * Queue will create sparse files if sparse record numbers are used.
1190 */
1191 if (pgno != 0 && PGNO(h) == 0) {
1192 F_SET(pip, VRFY_IS_ALLZEROES);
1193 for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++)
1194 if (*p != 0) {
1195 F_CLR(pip, VRFY_IS_ALLZEROES);
1196 break;
1197 }
1198 /*
1199 * Mark it as a hash, and we'll
1200 * check that that makes sense structurally later.
1201 * (The queue verification doesn't care, since queues
1202 * don't really have much in the way of structure.)
1203 */
1204 if (dbp->type != DB_HEAP)
1205 pip->type = P_HASH;
1206 ret = 0;
1207 goto err; /* well, not really an err. */
1208 }
1209
1210 if (PGNO(h) != pgno) {
1211 EPRINT((env, DB_STR_A("0536", "Page %lu: bad page number %lu",
1212 "%lu %lu"), (u_long)pgno, (u_long)h->pgno));
1213 ret = DB_VERIFY_BAD;
1214 }
1215
1216 switch (h->type) {
1217 case P_INVALID: /* Order matches ordinal value. */
1218 case P_HASH_UNSORTED:
1219 case P_IBTREE:
1220 case P_IRECNO:
1221 case P_LBTREE:
1222 case P_LRECNO:
1223 case P_OVERFLOW:
1224 case P_HASHMETA:
1225 case P_BTREEMETA:
1226 case P_QAMMETA:
1227 case P_QAMDATA:
1228 case P_LDUP:
1229 case P_HASH:
1230 case P_HEAP:
1231 case P_IHEAP:
1232 case P_HEAPMETA:
1233 break;
1234 default:
1235 EPRINT((env, DB_STR_A("0537", "Page %lu: bad page type %lu",
1236 "%lu %lu"), (u_long)pgno, (u_long)h->type));
1237 ret = DB_VERIFY_BAD;
1238 }
1239 pip->type = h->type;
1240
1241 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1242 ret = t_ret;
1243
1244 return (ret);
1245 }
1246
1247 /*
1248 * __db_vrfy_invalid --
1249 * Verify P_INVALID page.
1250 * (Yes, there's not much to do here.)
1251 */
1252 static int
__db_vrfy_invalid(dbp,vdp,h,pgno,flags)1253 __db_vrfy_invalid(dbp, vdp, h, pgno, flags)
1254 DB *dbp;
1255 VRFY_DBINFO *vdp;
1256 PAGE *h;
1257 db_pgno_t pgno;
1258 u_int32_t flags;
1259 {
1260 ENV *env;
1261 VRFY_PAGEINFO *pip;
1262 int ret, t_ret;
1263
1264 env = dbp->env;
1265
1266 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1267 return (ret);
1268 pip->next_pgno = pip->prev_pgno = 0;
1269
1270 if (!IS_VALID_PGNO(NEXT_PGNO(h))) {
1271 EPRINT((env, DB_STR_A("0538", "Page %lu: invalid next_pgno %lu",
1272 "%lu %lu"), (u_long)pgno, (u_long)NEXT_PGNO(h)));
1273 ret = DB_VERIFY_BAD;
1274 } else
1275 pip->next_pgno = NEXT_PGNO(h);
1276
1277 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1278 ret = t_ret;
1279 return (ret);
1280 }
1281
1282 /*
1283 * __db_vrfy_datapage --
1284 * Verify elements common to data pages (P_HASH, P_LBTREE,
1285 * P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e.,
1286 * those defined in the PAGE structure.
1287 *
1288 * Called from each of the per-page routines, after the
1289 * all-page-type-common elements of pip have been verified and filled
1290 * in.
1291 *
1292 * PUBLIC: int __db_vrfy_datapage
1293 * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
1294 */
1295 int
__db_vrfy_datapage(dbp,vdp,h,pgno,flags)1296 __db_vrfy_datapage(dbp, vdp, h, pgno, flags)
1297 DB *dbp;
1298 VRFY_DBINFO *vdp;
1299 PAGE *h;
1300 db_pgno_t pgno;
1301 u_int32_t flags;
1302 {
1303 ENV *env;
1304 VRFY_PAGEINFO *pip;
1305 u_int32_t smallest_entry;
1306 int isbad, ret, t_ret;
1307
1308 env = dbp->env;
1309
1310 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1311 return (ret);
1312 isbad = 0;
1313
1314 /*
1315 * prev_pgno and next_pgno: store for inter-page checks,
1316 * verify that they point to actual pages and not to self.
1317 *
1318 * !!!
1319 * Internal btree pages, as well as heap pages, do not maintain these
1320 * fields (indeed, they overload them). Skip.
1321 */
1322 if (TYPE(h) != P_IBTREE &&
1323 TYPE(h) != P_IRECNO && TYPE(h) != P_HEAP && TYPE(h) != P_IHEAP) {
1324 if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) {
1325 isbad = 1;
1326 EPRINT((env, DB_STR_A("0539",
1327 "Page %lu: invalid prev_pgno %lu", "%lu %lu"),
1328 (u_long)pip->pgno, (u_long)PREV_PGNO(h)));
1329 }
1330 if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) {
1331 isbad = 1;
1332 EPRINT((env, DB_STR_A("0540",
1333 "Page %lu: invalid next_pgno %lu", "%lu %lu"),
1334 (u_long)pip->pgno, (u_long)NEXT_PGNO(h)));
1335 }
1336 pip->prev_pgno = PREV_PGNO(h);
1337 pip->next_pgno = NEXT_PGNO(h);
1338 }
1339
1340 /*
1341 * Verify the number of entries on the page: there's no good way to
1342 * determine if this is accurate. The best we can do is verify that
1343 * it's not more than can, in theory, fit on the page. Then, we make
1344 * sure there are at least this many valid elements in inp[], and
1345 * hope the test catches most cases.
1346 */
1347 switch (TYPE(h)) {
1348 case P_HASH_UNSORTED:
1349 case P_HASH:
1350 smallest_entry = HKEYDATA_PSIZE(0);
1351 break;
1352 case P_HEAP:
1353 smallest_entry = sizeof(HEAPHDR) + sizeof(db_indx_t);
1354 break;
1355 case P_IHEAP:
1356 /* Really high_pgno. */
1357 pip->prev_pgno = PREV_PGNO(h);
1358 smallest_entry = 0;
1359 break;
1360 case P_IBTREE:
1361 smallest_entry = BINTERNAL_PSIZE(0);
1362 break;
1363 case P_IRECNO:
1364 smallest_entry = RINTERNAL_PSIZE;
1365 break;
1366 case P_LBTREE:
1367 case P_LDUP:
1368 case P_LRECNO:
1369 smallest_entry = BKEYDATA_PSIZE(0);
1370 break;
1371 default:
1372 smallest_entry = 0;
1373 break;
1374 }
1375 if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) {
1376 isbad = 1;
1377 EPRINT((env, DB_STR_A("0541",
1378 "Page %lu: too many entries: %lu",
1379 "%lu %lu"), (u_long)pgno, (u_long)NUM_ENT(h)));
1380 }
1381
1382 if (TYPE(h) != P_OVERFLOW)
1383 pip->entries = NUM_ENT(h);
1384
1385 /*
1386 * btree level. Should be zero unless we're a btree;
1387 * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL,
1388 * and we need to save it off.
1389 */
1390 switch (TYPE(h)) {
1391 case P_IBTREE:
1392 case P_IRECNO:
1393 if (LEVEL(h) < LEAFLEVEL + 1) {
1394 isbad = 1;
1395 EPRINT((env, DB_STR_A("0542",
1396 "Page %lu: bad btree level %lu", "%lu %lu"),
1397 (u_long)pgno, (u_long)LEVEL(h)));
1398 }
1399 pip->bt_level = LEVEL(h);
1400 break;
1401 case P_LBTREE:
1402 case P_LDUP:
1403 case P_LRECNO:
1404 if (LEVEL(h) != LEAFLEVEL) {
1405 isbad = 1;
1406 EPRINT((env, DB_STR_A("0543",
1407 "Page %lu: btree leaf page has incorrect level %lu",
1408 "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h)));
1409 }
1410 break;
1411 default:
1412 if (LEVEL(h) != 0) {
1413 isbad = 1;
1414 EPRINT((env, DB_STR_A("0544",
1415 "Page %lu: nonzero level %lu in non-btree database",
1416 "%lu %lu"), (u_long)pgno, (u_long)LEVEL(h)));
1417 }
1418 break;
1419 }
1420
1421 /*
1422 * Even though inp[] occurs in all PAGEs, we look at it in the
1423 * access-method-specific code, since btree and hash treat
1424 * item lengths very differently, and one of the most important
1425 * things we want to verify is that the data--as specified
1426 * by offset and length--cover the right part of the page
1427 * without overlaps, gaps, or violations of the page boundary.
1428 */
1429 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1430 ret = t_ret;
1431
1432 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1433 }
1434
1435 /*
1436 * __db_vrfy_meta --
1437 * Verify the access-method common parts of a meta page, using
1438 * normal mpool routines.
1439 *
1440 * PUBLIC: int __db_vrfy_meta
1441 * PUBLIC: __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t));
1442 */
1443 int
__db_vrfy_meta(dbp,vdp,meta,pgno,flags)1444 __db_vrfy_meta(dbp, vdp, meta, pgno, flags)
1445 DB *dbp;
1446 VRFY_DBINFO *vdp;
1447 DBMETA *meta;
1448 db_pgno_t pgno;
1449 u_int32_t flags;
1450 {
1451 DBTYPE dbtype, magtype;
1452 ENV *env;
1453 VRFY_PAGEINFO *pip;
1454 int isbad, ret, t_ret;
1455
1456 isbad = 0;
1457 env = dbp->env;
1458
1459 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
1460 return (ret);
1461
1462 /* type plausible for a meta page */
1463 switch (meta->type) {
1464 case P_BTREEMETA:
1465 dbtype = DB_BTREE;
1466 break;
1467 case P_HASHMETA:
1468 dbtype = DB_HASH;
1469 break;
1470 case P_HEAPMETA:
1471 dbtype = DB_HEAP;
1472 break;
1473 case P_QAMMETA:
1474 dbtype = DB_QUEUE;
1475 break;
1476 default:
1477 ret = __db_unknown_path(env, "__db_vrfy_meta");
1478 goto err;
1479 }
1480
1481 /* magic number valid */
1482 if (!__db_is_valid_magicno(meta->magic, &magtype)) {
1483 isbad = 1;
1484 EPRINT((env, DB_STR_A("0545", "Page %lu: invalid magic number",
1485 "%lu"), (u_long)pgno));
1486 }
1487 if (magtype != dbtype) {
1488 isbad = 1;
1489 EPRINT((env, DB_STR_A("0546",
1490 "Page %lu: magic number does not match database type",
1491 "%lu"), (u_long)pgno));
1492 }
1493
1494 /* version */
1495 if ((dbtype == DB_BTREE &&
1496 (meta->version > DB_BTREEVERSION ||
1497 meta->version < DB_BTREEOLDVER)) ||
1498 (dbtype == DB_HASH &&
1499 (meta->version > DB_HASHVERSION ||
1500 meta->version < DB_HASHOLDVER)) ||
1501 (dbtype == DB_HEAP &&
1502 (meta->version > DB_HEAPVERSION ||
1503 meta->version < DB_HEAPOLDVER)) ||
1504 (dbtype == DB_QUEUE &&
1505 (meta->version > DB_QAMVERSION ||
1506 meta->version < DB_QAMOLDVER))) {
1507 isbad = 1;
1508 EPRINT((env, DB_STR_A("0547",
1509 "Page %lu: unsupported database version %lu; extraneous errors may result",
1510 "%lu %lu"), (u_long)pgno, (u_long)meta->version));
1511 }
1512
1513 /* pagesize */
1514 if (meta->pagesize != dbp->pgsize) {
1515 isbad = 1;
1516 EPRINT((env, DB_STR_A("0548", "Page %lu: invalid pagesize %lu",
1517 "%lu %lu"), (u_long)pgno, (u_long)meta->pagesize));
1518 }
1519
1520 /* Flags */
1521 if (meta->metaflags != 0) {
1522 if (FLD_ISSET(meta->metaflags,
1523 ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
1524 isbad = 1;
1525 EPRINT((env, DB_STR_A("0549",
1526 "Page %lu: bad meta-data flags value %#lx",
1527 "%lu %#lx"), (u_long)PGNO_BASE_MD,
1528 (u_long)meta->metaflags));
1529 }
1530 if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
1531 F_SET(pip, VRFY_HAS_CHKSUM);
1532 if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
1533 F_SET(pip, VRFY_HAS_PART_RANGE);
1534 if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
1535 F_SET(pip, VRFY_HAS_PART_CALLBACK);
1536 }
1537
1538 /*
1539 * Free list.
1540 *
1541 * If this is not the main, master-database meta page, it
1542 * should not have a free list.
1543 */
1544 if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) {
1545 isbad = 1;
1546 EPRINT((env, DB_STR_A("0550",
1547 "Page %lu: nonempty free list on subdatabase metadata page",
1548 "%lu"), (u_long)pgno));
1549 }
1550
1551 /* Can correctly be PGNO_INVALID--that's just the end of the list. */
1552 if (IS_VALID_PGNO(meta->free))
1553 pip->free = meta->free;
1554 else {
1555 isbad = 1;
1556 EPRINT((env, DB_STR_A("0551",
1557 "Page %lu: nonsensical free list pgno %lu", "%lu %lu"),
1558 (u_long)pgno, (u_long)meta->free));
1559 }
1560
1561 /*
1562 * Check that the meta page agrees with what we got from mpool.
1563 * If we don't have FTRUNCATE then mpool could include some
1564 * zeroed pages at the end of the file, we assume the meta page
1565 * is correct. Queue does not update the meta page's last_pgno.
1566 */
1567 if (pgno == PGNO_BASE_MD &&
1568 dbtype != DB_QUEUE && meta->last_pgno != vdp->last_pgno) {
1569 #ifdef HAVE_FTRUNCATE
1570 isbad = 1;
1571 EPRINT((env, DB_STR_A("0552",
1572 "Page %lu: last_pgno is not correct: %lu != %lu",
1573 "%lu %lu %lu"), (u_long)pgno,
1574 (u_long)meta->last_pgno, (u_long)vdp->last_pgno));
1575 #endif
1576 vdp->meta_last_pgno = meta->last_pgno;
1577 }
1578
1579 /*
1580 * We have now verified the common fields of the metadata page.
1581 * Clear the flag that told us they had been incompletely checked.
1582 */
1583 F_CLR(pip, VRFY_INCOMPLETE);
1584
1585 err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1586 ret = t_ret;
1587
1588 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1589 }
1590
1591 /*
1592 * __db_vrfy_freelist --
1593 * Walk free list, checking off pages and verifying absence of
1594 * loops.
1595 */
1596 static int
__db_vrfy_freelist(dbp,vdp,meta,flags)1597 __db_vrfy_freelist(dbp, vdp, meta, flags)
1598 DB *dbp;
1599 VRFY_DBINFO *vdp;
1600 db_pgno_t meta;
1601 u_int32_t flags;
1602 {
1603 DB *pgset;
1604 ENV *env;
1605 VRFY_PAGEINFO *pip;
1606 db_pgno_t cur_pgno, next_pgno;
1607 int p, ret, t_ret;
1608
1609 env = dbp->env;
1610 pgset = vdp->pgset;
1611 DB_ASSERT(env, pgset != NULL);
1612
1613 if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
1614 return (ret);
1615 for (next_pgno = pip->free;
1616 next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) {
1617 cur_pgno = pip->pgno;
1618 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1619 return (t_ret);
1620
1621 /* This shouldn't happen, but just in case. */
1622 if (!IS_VALID_PGNO(next_pgno)) {
1623 EPRINT((env, DB_STR_A("0553",
1624 "Page %lu: invalid next_pgno %lu on free list page",
1625 "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
1626 return (DB_VERIFY_BAD);
1627 }
1628
1629 if (next_pgno > vdp->last_pgno) {
1630 EPRINT((env, DB_STR_A("0713",
1631 "Page %lu: page %lu on free list beyond last_pgno %lu",
1632 "%lu %lu %lu"), (u_long)cur_pgno,
1633 (u_long)next_pgno, (u_long)vdp->last_pgno));
1634 ret = DB_VERIFY_BAD;
1635 }
1636 /* Detect cycles. */
1637 if ((t_ret = __db_vrfy_pgset_get(pgset,
1638 vdp->thread_info, vdp->txn, next_pgno, &p)) != 0)
1639 return (t_ret);
1640 if (p != 0) {
1641 EPRINT((env, DB_STR_A("0554",
1642 "Page %lu: page %lu encountered a second time on free list",
1643 "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
1644 return (DB_VERIFY_BAD);
1645 }
1646 if ((t_ret = __db_vrfy_pgset_inc(pgset,
1647 vdp->thread_info, vdp->txn, next_pgno)) != 0)
1648 return (t_ret);
1649
1650 if ((t_ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
1651 return (t_ret);
1652
1653 if (pip->type != P_INVALID) {
1654 EPRINT((env, DB_STR_A("0555",
1655 "Page %lu: non-invalid page %lu on free list",
1656 "%lu %lu"), (u_long)cur_pgno, (u_long)next_pgno));
1657 ret = DB_VERIFY_BAD; /* unsafe to continue */
1658 break;
1659 }
1660 }
1661
1662 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
1663 ret = t_ret;
1664 return (ret);
1665 }
1666
1667 /*
1668 * __db_vrfy_subdbs --
1669 * Walk the known-safe master database of subdbs with a cursor,
1670 * verifying the structure of each subdatabase we encounter.
1671 */
1672 static int
__db_vrfy_subdbs(dbp,vdp,dbname,flags)1673 __db_vrfy_subdbs(dbp, vdp, dbname, flags)
1674 DB *dbp;
1675 VRFY_DBINFO *vdp;
1676 const char *dbname;
1677 u_int32_t flags;
1678 {
1679 DB *mdbp;
1680 DBC *dbc;
1681 DBT key, data;
1682 ENV *env;
1683 VRFY_PAGEINFO *pip;
1684 db_pgno_t meta_pgno;
1685 int ret, t_ret, isbad;
1686 u_int8_t type;
1687
1688 isbad = 0;
1689 dbc = NULL;
1690 env = dbp->env;
1691
1692 if ((ret = __db_master_open(dbp,
1693 vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0)
1694 return (ret);
1695
1696 if ((ret = __db_cursor_int(mdbp, NULL,
1697 vdp->txn, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
1698 goto err;
1699
1700 memset(&key, 0, sizeof(key));
1701 memset(&data, 0, sizeof(data));
1702 while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) {
1703 if (data.size != sizeof(db_pgno_t)) {
1704 EPRINT((env, DB_STR("0556",
1705 "Subdatabase entry not page-number size")));
1706 isbad = 1;
1707 goto err;
1708 }
1709 memcpy(&meta_pgno, data.data, data.size);
1710 /*
1711 * Subdatabase meta pgnos are stored in network byte
1712 * order for cross-endian compatibility. Swap if appropriate.
1713 */
1714 DB_NTOHL_SWAP(env, &meta_pgno);
1715 if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) {
1716 EPRINT((env, DB_STR_A("0557",
1717 "Subdatabase entry references invalid page %lu",
1718 "%lu"), (u_long)meta_pgno));
1719 isbad = 1;
1720 goto err;
1721 }
1722 if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
1723 goto err;
1724 type = pip->type;
1725 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
1726 goto err;
1727 switch (type) {
1728 case P_BTREEMETA:
1729 if ((ret = __bam_vrfy_structure(
1730 dbp, vdp, meta_pgno, NULL, NULL, flags)) != 0) {
1731 if (ret == DB_VERIFY_BAD)
1732 isbad = 1;
1733 else
1734 goto err;
1735 }
1736 break;
1737 case P_HASHMETA:
1738 if ((ret = __ham_vrfy_structure(
1739 dbp, vdp, meta_pgno, flags)) != 0) {
1740 if (ret == DB_VERIFY_BAD)
1741 isbad = 1;
1742 else
1743 goto err;
1744 }
1745 break;
1746 case P_QAMMETA:
1747 default:
1748 EPRINT((env, DB_STR_A("0558",
1749 "Subdatabase entry references page %lu of invalid type %lu",
1750 "%lu %lu"), (u_long)meta_pgno, (u_long)type));
1751 ret = DB_VERIFY_BAD;
1752 goto err;
1753 }
1754 }
1755
1756 if (ret == DB_NOTFOUND)
1757 ret = 0;
1758
1759 err: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1760 ret = t_ret;
1761
1762 if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0)
1763 ret = t_ret;
1764
1765 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
1766 }
1767
1768 /*
1769 * __db_vrfy_struct_feedback --
1770 * Provide feedback during top-down database structure traversal.
1771 * (See comment at the beginning of __db_vrfy_structure.)
1772 *
1773 * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *));
1774 */
1775 void
__db_vrfy_struct_feedback(dbp,vdp)1776 __db_vrfy_struct_feedback(dbp, vdp)
1777 DB *dbp;
1778 VRFY_DBINFO *vdp;
1779 {
1780 int progress;
1781
1782 if (dbp->db_feedback == NULL)
1783 return;
1784
1785 if (vdp->pgs_remaining > 0)
1786 vdp->pgs_remaining--;
1787
1788 /* Don't allow a feedback call of 100 until we're really done. */
1789 progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1));
1790 dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress);
1791 }
1792
1793 /*
1794 * __db_vrfy_orderchkonly --
1795 * Do an sort-order/hashing check on a known-otherwise-good subdb.
1796 */
1797 static int
__db_vrfy_orderchkonly(dbp,vdp,name,subdb,flags)1798 __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags)
1799 DB *dbp;
1800 VRFY_DBINFO *vdp;
1801 const char *name, *subdb;
1802 u_int32_t flags;
1803 {
1804 BTMETA *btmeta;
1805 DB *mdbp, *pgset;
1806 DBC *pgsc;
1807 DBT key, data;
1808 DB_MPOOLFILE *mpf;
1809 ENV *env;
1810 HASH *h_internal;
1811 HMETA *hmeta;
1812 PAGE *h, *currpg;
1813 db_pgno_t meta_pgno, p, pgno;
1814 u_int32_t bucket;
1815 int t_ret, ret;
1816
1817 pgset = NULL;
1818 pgsc = NULL;
1819 env = dbp->env;
1820 mpf = dbp->mpf;
1821 currpg = h = NULL;
1822
1823 LF_CLR(DB_NOORDERCHK);
1824
1825 /* Open the master database and get the meta_pgno for the subdb. */
1826 if ((ret = __db_master_open(dbp,
1827 vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0)
1828 goto err;
1829
1830 DB_INIT_DBT(key, subdb, strlen(subdb));
1831 memset(&data, 0, sizeof(data));
1832 if ((ret = __db_get(mdbp,
1833 vdp->thread_info, NULL, &key, &data, 0)) != 0) {
1834 if (ret == DB_NOTFOUND)
1835 ret = ENOENT;
1836 goto err;
1837 }
1838
1839 if (data.size != sizeof(db_pgno_t)) {
1840 EPRINT((env, DB_STR("0559",
1841 "Subdatabase entry of invalid size")));
1842 ret = DB_VERIFY_BAD;
1843 goto err;
1844 }
1845
1846 memcpy(&meta_pgno, data.data, data.size);
1847
1848 /*
1849 * Subdatabase meta pgnos are stored in network byte
1850 * order for cross-endian compatibility. Swap if appropriate.
1851 */
1852 DB_NTOHL_SWAP(env, &meta_pgno);
1853
1854 if ((ret = __memp_fget(mpf,
1855 &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0)
1856 goto err;
1857
1858 if ((ret = __db_vrfy_pgset(env,
1859 vdp->thread_info, dbp->pgsize, &pgset)) != 0)
1860 goto err;
1861
1862 switch (TYPE(h)) {
1863 case P_BTREEMETA:
1864 btmeta = (BTMETA *)h;
1865 if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) {
1866 /* Recnos have no order to check. */
1867 ret = 0;
1868 goto err;
1869 }
1870 if ((ret =
1871 __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0)
1872 goto err;
1873 if ((ret = __db_cursor_int(pgset, NULL, vdp->txn, dbp->type,
1874 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
1875 goto err;
1876 while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
1877 if ((ret = __memp_fget(mpf, &p,
1878 vdp->thread_info, NULL, 0, &currpg)) != 0)
1879 goto err;
1880 if ((ret = __bam_vrfy_itemorder(dbp, NULL,
1881 vdp->thread_info, currpg, p, NUM_ENT(currpg), 1,
1882 F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0)
1883 goto err;
1884 if ((ret = __memp_fput(mpf,
1885 vdp->thread_info, currpg, dbp->priority)) != 0)
1886 goto err;
1887 currpg = NULL;
1888 }
1889
1890 /*
1891 * The normal exit condition for the loop above is DB_NOTFOUND.
1892 * If we see that, zero it and continue on to cleanup.
1893 * Otherwise, it's a real error and will be returned.
1894 */
1895 if (ret == DB_NOTFOUND)
1896 ret = 0;
1897 break;
1898 case P_HASHMETA:
1899 hmeta = (HMETA *)h;
1900 h_internal = (HASH *)dbp->h_internal;
1901 /*
1902 * Make sure h_charkey is right.
1903 */
1904 if (h_internal == NULL) {
1905 EPRINT((env, DB_STR_A("0560",
1906 "Page %lu: DB->h_internal field is NULL", "%lu"),
1907 (u_long)meta_pgno));
1908 ret = DB_VERIFY_BAD;
1909 goto err;
1910 }
1911 if (h_internal->h_hash == NULL)
1912 h_internal->h_hash = hmeta->dbmeta.version < 5
1913 ? __ham_func4 : __ham_func5;
1914 if (hmeta->h_charkey !=
1915 h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) {
1916 EPRINT((env, DB_STR_A("0561",
1917 "Page %lu: incorrect hash function for database",
1918 "%lu"), (u_long)meta_pgno));
1919 ret = DB_VERIFY_BAD;
1920 goto err;
1921 }
1922
1923 /*
1924 * Foreach bucket, verify hashing on each page in the
1925 * corresponding chain of pages.
1926 */
1927 if ((ret = __db_cursor_int(dbp, NULL, vdp->txn, dbp->type,
1928 PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
1929 goto err;
1930 for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
1931 pgno = BS_TO_PAGE(bucket, hmeta->spares);
1932 while (pgno != PGNO_INVALID) {
1933 if ((ret = __memp_fget(mpf, &pgno,
1934 vdp->thread_info, NULL, 0, &currpg)) != 0)
1935 goto err;
1936 if ((ret = __ham_vrfy_hashing(pgsc,
1937 NUM_ENT(currpg), hmeta, bucket, pgno,
1938 flags, h_internal->h_hash)) != 0)
1939 goto err;
1940 pgno = NEXT_PGNO(currpg);
1941 if ((ret = __memp_fput(mpf, vdp->thread_info,
1942 currpg, dbp->priority)) != 0)
1943 goto err;
1944 currpg = NULL;
1945 }
1946 }
1947 break;
1948 default:
1949 EPRINT((env, DB_STR_A("0562",
1950 "Page %lu: database metapage of bad type %lu",
1951 "%lu %lu"), (u_long)meta_pgno, (u_long)TYPE(h)));
1952 ret = DB_VERIFY_BAD;
1953 break;
1954 }
1955
1956 err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
1957 ret = t_ret;
1958 if (pgset != NULL &&
1959 (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0)
1960 ret = t_ret;
1961 if (h != NULL && (t_ret = __memp_fput(mpf,
1962 vdp->thread_info, h, dbp->priority)) != 0)
1963 ret = t_ret;
1964 if (currpg != NULL &&
1965 (t_ret = __memp_fput(mpf,
1966 vdp->thread_info, currpg, dbp->priority)) != 0)
1967 ret = t_ret;
1968 if ((t_ret = __db_close(mdbp, NULL, 0)) != 0)
1969 ret = t_ret;
1970 return (ret);
1971 }
1972
1973 /*
1974 * __db_salvage_pg --
1975 * Walk through a page, salvaging all likely or plausible (w/
1976 * DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp.
1977 *
1978 * PUBLIC: int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t,
1979 * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t));
1980 */
1981 int
__db_salvage_pg(dbp,vdp,pgno,h,handle,callback,flags)1982 __db_salvage_pg(dbp, vdp, pgno, h, handle, callback, flags)
1983 DB *dbp;
1984 VRFY_DBINFO *vdp;
1985 db_pgno_t pgno;
1986 PAGE *h;
1987 void *handle;
1988 int (*callback) __P((void *, const void *));
1989 u_int32_t flags;
1990 {
1991 ENV *env;
1992 VRFY_PAGEINFO *pip;
1993 int keyflag, ret, t_ret;
1994
1995 env = dbp->env;
1996 DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
1997
1998 /*
1999 * !!!
2000 * We dump record numbers when salvaging Queue databases, but not for
2001 * immutable Recno databases. The problem is we can't figure out the
2002 * record number from the database page in the Recno case, while the
2003 * offset in the file is sufficient for Queue.
2004 */
2005 keyflag = 0;
2006
2007 /* If we got this page in the subdb pass, we can safely skip it. */
2008 if (__db_salvage_isdone(vdp, pgno))
2009 return (0);
2010
2011 switch (TYPE(h)) {
2012 case P_BTREEMETA:
2013 ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags);
2014 break;
2015 case P_HASH:
2016 case P_HASH_UNSORTED:
2017 case P_HEAP:
2018 case P_LBTREE:
2019 case P_QAMDATA:
2020 return (__db_salvage_leaf(dbp,
2021 vdp, pgno, h, handle, callback, flags));
2022 case P_HASHMETA:
2023 ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags);
2024 break;
2025 case P_HEAPMETA:
2026 ret = __heap_vrfy_meta(dbp, vdp, (HEAPMETA *)h, pgno, flags);
2027 break;
2028 case P_IBTREE:
2029 /*
2030 * We need to mark any overflow keys on internal pages as seen,
2031 * so we don't print them out in __db_salvage_unknowns. But if
2032 * we're an upgraded database, a P_LBTREE page may very well
2033 * have a reference to the same overflow pages (this practice
2034 * stopped somewhere around db4.5). To give P_LBTREEs a chance
2035 * to print out any keys on shared pages, mark the page now and
2036 * deal with it at the end.
2037 */
2038 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE));
2039 case P_IHEAP:
2040 /*
2041 * There's nothing to salvage from heap region pages. Just mark
2042 * that we've seen the page.
2043 */
2044 return (__db_salvage_markdone(vdp, pgno));
2045 case P_LDUP:
2046 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP));
2047 case P_LRECNO:
2048 /*
2049 * Recno leaves are tough, because the leaf could be (1) a dup
2050 * page, or it could be (2) a regular database leaf page.
2051 * Fortunately, RECNO databases are not allowed to have
2052 * duplicates.
2053 *
2054 * If there are no subdatabases, dump the page immediately if
2055 * it's a leaf in a RECNO database, otherwise wait and hopefully
2056 * it will be dumped by the leaf page that refers to it,
2057 * otherwise we'll get it with the unknowns.
2058 *
2059 * If there are subdatabases, there might be mixed types and
2060 * dbp->type can't be trusted. We'll only get here after
2061 * salvaging each database, though, so salvaging this page
2062 * immediately isn't important. If this page is a dup, it might
2063 * get salvaged later on, otherwise the unknowns pass will pick
2064 * it up. Note that SALVAGE_HASSUBDBS won't get set if we're
2065 * salvaging aggressively.
2066 *
2067 * If we're salvaging aggressively, we don't know whether or not
2068 * there's subdatabases, so we wait on all recno pages.
2069 */
2070 if (!LF_ISSET(DB_AGGRESSIVE) &&
2071 !F_ISSET(vdp, SALVAGE_HASSUBDBS) && dbp->type == DB_RECNO)
2072 return (__db_salvage_leaf(dbp,
2073 vdp, pgno, h, handle, callback, flags));
2074 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNODUP));
2075 case P_OVERFLOW:
2076 return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW));
2077 case P_QAMMETA:
2078 keyflag = 1;
2079 ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags);
2080 break;
2081 case P_INVALID:
2082 case P_IRECNO:
2083 case __P_DUPLICATE:
2084 default:
2085 /*
2086 * There's no need to display an error, the page type was
2087 * already checked and reported on.
2088 */
2089 return (0);
2090 }
2091 if (ret != 0)
2092 return (ret);
2093
2094 /*
2095 * We have to display the dump header if it's a metadata page. It's
2096 * our last chance as the page was marked "seen" in the vrfy routine,
2097 * and we won't see the page again. We don't display headers for
2098 * the first database in a multi-database file, that database simply
2099 * contains a list of subdatabases.
2100 */
2101 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
2102 return (ret);
2103 if (!F_ISSET(pip, VRFY_HAS_SUBDBS) && !LF_ISSET(DB_VERIFY_PARTITION))
2104 ret = __db_prheader(
2105 dbp, NULL, 0, keyflag, handle, callback, vdp, pgno);
2106 if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
2107 ret = t_ret;
2108 return (ret);
2109 }
2110
2111 /*
2112 * __db_salvage_leaf --
2113 * Walk through a leaf, salvaging all likely key/data pairs and marking
2114 * seen pages in vdp.
2115 *
2116 * PUBLIC: int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t,
2117 * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t));
2118 */
2119 int
__db_salvage_leaf(dbp,vdp,pgno,h,handle,callback,flags)2120 __db_salvage_leaf(dbp, vdp, pgno, h, handle, callback, flags)
2121 DB *dbp;
2122 VRFY_DBINFO *vdp;
2123 db_pgno_t pgno;
2124 PAGE *h;
2125 void *handle;
2126 int (*callback) __P((void *, const void *));
2127 u_int32_t flags;
2128 {
2129 ENV *env;
2130
2131 env = dbp->env;
2132 DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
2133
2134 /* If we got this page in the subdb pass, we can safely skip it. */
2135 if (__db_salvage_isdone(vdp, pgno))
2136 return (0);
2137
2138 switch (TYPE(h)) {
2139 case P_HASH_UNSORTED:
2140 case P_HASH:
2141 return (__ham_salvage(dbp, vdp,
2142 pgno, h, handle, callback, flags));
2143 case P_HEAP:
2144 return (__heap_salvage(dbp, vdp,
2145 pgno, h, handle, callback, flags));
2146 case P_LBTREE:
2147 case P_LRECNO:
2148 return (__bam_salvage(dbp, vdp,
2149 pgno, TYPE(h), h, handle, callback, NULL, flags));
2150 case P_QAMDATA:
2151 return (__qam_salvage(dbp, vdp,
2152 pgno, h, handle, callback, flags));
2153 default:
2154 /*
2155 * There's no need to display an error, the page type was
2156 * already checked and reported on.
2157 */
2158 return (0);
2159 }
2160 }
2161
2162 /*
2163 * __db_salvage_unknowns --
2164 * Walk through the salvager database, printing with key "UNKNOWN"
2165 * any pages we haven't dealt with.
2166 */
2167 static int
__db_salvage_unknowns(dbp,vdp,handle,callback,flags)2168 __db_salvage_unknowns(dbp, vdp, handle, callback, flags)
2169 DB *dbp;
2170 VRFY_DBINFO *vdp;
2171 void *handle;
2172 int (*callback) __P((void *, const void *));
2173 u_int32_t flags;
2174 {
2175 DBC *dbc;
2176 DBT unkdbt, key, *dbt;
2177 DB_MPOOLFILE *mpf;
2178 ENV *env;
2179 PAGE *h;
2180 db_pgno_t pgno;
2181 u_int32_t pgtype, ovfl_bufsz, tmp_flags;
2182 int ret, t_ret;
2183 void *ovflbuf;
2184
2185 dbc = NULL;
2186 env = dbp->env;
2187 mpf = dbp->mpf;
2188
2189 DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
2190
2191 if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
2192 return (ret);
2193 ovfl_bufsz = dbp->pgsize;
2194
2195 /*
2196 * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW
2197 * pages, because they may be referenced by the standard database
2198 * pages that we're resolving.
2199 */
2200 while ((t_ret =
2201 __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) {
2202 if ((t_ret = __memp_fget(mpf,
2203 &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
2204 if (ret == 0)
2205 ret = t_ret;
2206 continue;
2207 }
2208
2209 dbt = NULL;
2210 tmp_flags = 0;
2211 switch (pgtype) {
2212 case SALVAGE_LDUP:
2213 case SALVAGE_LRECNODUP:
2214 dbt = &unkdbt;
2215 tmp_flags = DB_SA_UNKNOWNKEY;
2216 /* FALLTHROUGH */
2217 case SALVAGE_IBTREE:
2218 case SALVAGE_LBTREE:
2219 case SALVAGE_LRECNO:
2220 if ((t_ret = __bam_salvage(
2221 dbp, vdp, pgno, pgtype, h, handle,
2222 callback, dbt, tmp_flags | flags)) != 0 && ret == 0)
2223 ret = t_ret;
2224 break;
2225 case SALVAGE_OVERFLOW:
2226 DB_ASSERT(env, 0); /* Shouldn't ever happen. */
2227 break;
2228 case SALVAGE_HASH:
2229 if ((t_ret = __ham_salvage(dbp, vdp,
2230 pgno, h, handle, callback, flags)) != 0 && ret == 0)
2231 ret = t_ret;
2232 break;
2233 case SALVAGE_INVALID:
2234 case SALVAGE_IGNORE:
2235 default:
2236 /*
2237 * Shouldn't happen, but if it does, just do what the
2238 * nice man says.
2239 */
2240 DB_ASSERT(env, 0);
2241 break;
2242 }
2243 if ((t_ret = __memp_fput(mpf,
2244 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2245 ret = t_ret;
2246 }
2247
2248 /* We should have reached the end of the database. */
2249 if (t_ret == DB_NOTFOUND)
2250 t_ret = 0;
2251 if (t_ret != 0 && ret == 0)
2252 ret = t_ret;
2253
2254 /* Re-open the cursor so we traverse the database again. */
2255 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
2256 ret = t_ret;
2257 dbc = NULL;
2258
2259 /* Now, deal with any remaining overflow pages. */
2260 while ((t_ret =
2261 __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) {
2262 if ((t_ret = __memp_fget(mpf,
2263 &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
2264 if (ret == 0)
2265 ret = t_ret;
2266 continue;
2267 }
2268
2269 switch (pgtype) {
2270 case SALVAGE_OVERFLOW:
2271 /*
2272 * XXX:
2273 * This may generate multiple "UNKNOWN" keys in
2274 * a database with no dups. What to do?
2275 */
2276 if ((t_ret = __db_safe_goff(dbp, vdp,
2277 pgno, &key, &ovflbuf, &ovfl_bufsz, flags)) != 0 ||
2278 ((vdp->type == DB_BTREE || vdp->type == DB_HASH) &&
2279 (t_ret = __db_vrfy_prdbt(&unkdbt,
2280 0, " ", handle, callback, 0, 0, vdp)) != 0) ||
2281 (t_ret = __db_vrfy_prdbt(
2282 &key, 0, " ", handle, callback, 0, 0, vdp)) != 0)
2283 if (ret == 0)
2284 ret = t_ret;
2285 break;
2286 default:
2287 DB_ASSERT(env, 0); /* Shouldn't ever happen. */
2288 break;
2289 }
2290 if ((t_ret = __memp_fput(mpf,
2291 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2292 ret = t_ret;
2293 }
2294
2295 /* We should have reached the end of the database. */
2296 if (t_ret == DB_NOTFOUND)
2297 t_ret = 0;
2298 if (t_ret != 0 && ret == 0)
2299 ret = t_ret;
2300
2301 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
2302 ret = t_ret;
2303
2304 __os_free(env, ovflbuf);
2305
2306 return (ret);
2307 }
2308
2309 /*
2310 * Offset of the ith inp array entry, which we can compare to the offset
2311 * the entry stores.
2312 */
2313 #define INP_OFFSET(dbp, h, i) \
2314 ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h)))
2315
2316 /*
2317 * __db_vrfy_inpitem --
2318 * Verify that a single entry in the inp array is sane, and update
2319 * the high water mark and current item offset. (The former of these is
2320 * used for state information between calls, and is required; it must
2321 * be initialized to the pagesize before the first call.)
2322 *
2323 * Returns DB_VERIFY_FATAL if inp has collided with the data,
2324 * since verification can't continue from there; returns DB_VERIFY_BAD
2325 * if anything else is wrong.
2326 *
2327 * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *,
2328 * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *));
2329 */
2330 int
__db_vrfy_inpitem(dbp,h,pgno,i,is_btree,flags,himarkp,offsetp)2331 __db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
2332 DB *dbp;
2333 PAGE *h;
2334 db_pgno_t pgno;
2335 u_int32_t i;
2336 int is_btree;
2337 u_int32_t flags, *himarkp, *offsetp;
2338 {
2339 BKEYDATA *bk;
2340 ENV *env;
2341 db_indx_t *inp, offset, len;
2342
2343 env = dbp->env;
2344
2345 DB_ASSERT(env, himarkp != NULL);
2346 inp = P_INP(dbp, h);
2347
2348 /*
2349 * Check that the inp array, which grows from the beginning of the
2350 * page forward, has not collided with the data, which grow from the
2351 * end of the page backward.
2352 */
2353 if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) {
2354 /* We've collided with the data. We need to bail. */
2355 EPRINT((env, DB_STR_A("0563",
2356 "Page %lu: entries listing %lu overlaps data",
2357 "%lu %lu"), (u_long)pgno, (u_long)i));
2358 return (DB_VERIFY_FATAL);
2359 }
2360
2361 offset = inp[i];
2362
2363 /*
2364 * Check that the item offset is reasonable: it points somewhere
2365 * after the inp array and before the end of the page.
2366 */
2367 if (offset <= INP_OFFSET(dbp, h, i) || offset >= dbp->pgsize) {
2368 EPRINT((env, DB_STR_A("0564",
2369 "Page %lu: bad offset %lu at page index %lu",
2370 "%lu %lu %lu"), (u_long)pgno, (u_long)offset, (u_long)i));
2371 return (DB_VERIFY_BAD);
2372 }
2373
2374 /* Update the high-water mark (what HOFFSET should be) */
2375 if (offset < *himarkp)
2376 *himarkp = offset;
2377
2378 if (is_btree) {
2379 /*
2380 * Check alignment; if it's unaligned, it's unsafe to
2381 * manipulate this item.
2382 */
2383 if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) {
2384 EPRINT((env, DB_STR_A("0565",
2385 "Page %lu: unaligned offset %lu at page index %lu",
2386 "%lu %lu %lu"), (u_long)pgno, (u_long)offset,
2387 (u_long)i));
2388 return (DB_VERIFY_BAD);
2389 }
2390
2391 /*
2392 * Check that the item length remains on-page.
2393 */
2394 bk = GET_BKEYDATA(dbp, h, i);
2395
2396 /*
2397 * We need to verify the type of the item here;
2398 * we can't simply assume that it will be one of the
2399 * expected three. If it's not a recognizable type,
2400 * it can't be considered to have a verifiable
2401 * length, so it's not possible to certify it as safe.
2402 */
2403 switch (B_TYPE(bk->type)) {
2404 case B_KEYDATA:
2405 len = bk->len;
2406 break;
2407 case B_DUPLICATE:
2408 case B_OVERFLOW:
2409 len = BOVERFLOW_SIZE;
2410 break;
2411 default:
2412 EPRINT((env, DB_STR_A("0566",
2413 "Page %lu: item %lu of unrecognizable type",
2414 "%lu %lu"), (u_long)pgno, (u_long)i));
2415 return (DB_VERIFY_BAD);
2416 }
2417
2418 if ((size_t)(offset + len) > dbp->pgsize) {
2419 EPRINT((env, DB_STR_A("0567",
2420 "Page %lu: item %lu extends past page boundary",
2421 "%lu %lu"), (u_long)pgno, (u_long)i));
2422 return (DB_VERIFY_BAD);
2423 }
2424 }
2425
2426 if (offsetp != NULL)
2427 *offsetp = offset;
2428 return (0);
2429 }
2430
2431 /*
2432 * __db_vrfy_duptype--
2433 * Given a page number and a set of flags to __bam_vrfy_subtree,
2434 * verify that the dup tree type is correct--i.e., it's a recno
2435 * if DUPSORT is not set and a btree if it is.
2436 *
2437 * PUBLIC: int __db_vrfy_duptype
2438 * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
2439 */
2440 int
__db_vrfy_duptype(dbp,vdp,pgno,flags)2441 __db_vrfy_duptype(dbp, vdp, pgno, flags)
2442 DB *dbp;
2443 VRFY_DBINFO *vdp;
2444 db_pgno_t pgno;
2445 u_int32_t flags;
2446 {
2447 ENV *env;
2448 VRFY_PAGEINFO *pip;
2449 int ret, isbad;
2450
2451 env = dbp->env;
2452 isbad = 0;
2453
2454 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
2455 return (ret);
2456
2457 switch (pip->type) {
2458 case P_IBTREE:
2459 case P_LDUP:
2460 if (!LF_ISSET(DB_ST_DUPSORT)) {
2461 EPRINT((env, DB_STR_A("0568",
2462 "Page %lu: sorted duplicate set in unsorted-dup database",
2463 "%lu"), (u_long)pgno));
2464 isbad = 1;
2465 }
2466 break;
2467 case P_IRECNO:
2468 case P_LRECNO:
2469 if (LF_ISSET(DB_ST_DUPSORT)) {
2470 EPRINT((env, DB_STR_A("0569",
2471 "Page %lu: unsorted duplicate set in sorted-dup database",
2472 "%lu"), (u_long)pgno));
2473 isbad = 1;
2474 }
2475 break;
2476 default:
2477 /*
2478 * If the page is entirely zeroed, its pip->type will be a lie
2479 * (we assumed it was a hash page, as they're allowed to be
2480 * zeroed); handle this case specially.
2481 */
2482 if (F_ISSET(pip, VRFY_IS_ALLZEROES))
2483 ZEROPG_ERR_PRINT(env, pgno, DB_STR_P("duplicate page"));
2484 else
2485 EPRINT((env, DB_STR_A("0570",
2486 "Page %lu: duplicate page of inappropriate type %lu",
2487 "%lu %lu"), (u_long)pgno, (u_long)pip->type));
2488 isbad = 1;
2489 break;
2490 }
2491
2492 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
2493 return (ret);
2494 return (isbad == 1 ? DB_VERIFY_BAD : 0);
2495 }
2496
2497 /*
2498 * __db_salvage_duptree --
2499 * Attempt to salvage a given duplicate tree, given its alleged root.
2500 *
2501 * The key that corresponds to this dup set has been passed to us
2502 * in DBT *key. Because data items follow keys, though, it has been
2503 * printed once already.
2504 *
2505 * The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a
2506 * P_IBTREE, or a P_IRECNO. If it's an internal page, use the verifier
2507 * functions to make sure it's safe; if it's not, we simply bail and the
2508 * data will have to be printed with no key later on. if it is safe,
2509 * recurse on each of its children.
2510 *
2511 * Whether or not it's safe, if it's a leaf page, __bam_salvage it.
2512 *
2513 * At all times, use the DB hanging off vdp to mark and check what we've
2514 * done, so each page gets printed exactly once and we don't get caught
2515 * in any cycles.
2516 *
2517 * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t,
2518 * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t));
2519 */
2520 int
__db_salvage_duptree(dbp,vdp,pgno,key,handle,callback,flags)2521 __db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags)
2522 DB *dbp;
2523 VRFY_DBINFO *vdp;
2524 db_pgno_t pgno;
2525 DBT *key;
2526 void *handle;
2527 int (*callback) __P((void *, const void *));
2528 u_int32_t flags;
2529 {
2530 DB_MPOOLFILE *mpf;
2531 PAGE *h;
2532 int ret, t_ret;
2533
2534 mpf = dbp->mpf;
2535
2536 if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno))
2537 return (DB_VERIFY_BAD);
2538
2539 /* We have a plausible page. Try it. */
2540 if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
2541 return (ret);
2542
2543 switch (TYPE(h)) {
2544 case P_IBTREE:
2545 case P_IRECNO:
2546 if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0)
2547 goto err;
2548 if ((ret = __bam_vrfy(dbp,
2549 vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 ||
2550 (ret = __db_salvage_markdone(vdp, pgno)) != 0)
2551 goto err;
2552 /*
2553 * We have a known-healthy internal page. Walk it.
2554 */
2555 if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key,
2556 handle, callback, flags)) != 0)
2557 goto err;
2558 break;
2559 case P_LRECNO:
2560 case P_LDUP:
2561 if ((ret = __bam_salvage(dbp,
2562 vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0)
2563 goto err;
2564 break;
2565 default:
2566 ret = DB_VERIFY_BAD;
2567 goto err;
2568 }
2569
2570 err: if ((t_ret = __memp_fput(mpf,
2571 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2572 ret = t_ret;
2573 return (ret);
2574 }
2575
2576 /*
2577 * __db_salvage_all --
2578 * Salvage only the leaves we find by walking the tree. If we have subdbs,
2579 * salvage each of them individually.
2580 */
2581 static int
__db_salvage_all(dbp,vdp,handle,callback,flags,hassubsp)2582 __db_salvage_all(dbp, vdp, handle, callback, flags, hassubsp)
2583 DB *dbp;
2584 VRFY_DBINFO *vdp;
2585 void *handle;
2586 int (*callback) __P((void *, const void *));
2587 u_int32_t flags;
2588 int *hassubsp;
2589 {
2590 DB *pgset;
2591 DBC *pgsc;
2592 DB_MPOOLFILE *mpf;
2593 ENV *env;
2594 PAGE *h;
2595 VRFY_PAGEINFO *pip;
2596 db_pgno_t p, meta_pgno;
2597 int ret, t_ret;
2598
2599 *hassubsp = 0;
2600
2601 env = dbp->env;
2602 pgset = NULL;
2603 pgsc = NULL;
2604 mpf = dbp->mpf;
2605 h = NULL;
2606 pip = NULL;
2607 ret = 0;
2608
2609 /*
2610 * Check to make sure the page is OK and find out if it contains
2611 * subdatabases.
2612 */
2613 meta_pgno = PGNO_BASE_MD;
2614 if ((t_ret = __memp_fget(mpf,
2615 &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 &&
2616 (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 &&
2617 (t_ret = __db_salvage_pg(
2618 dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 &&
2619 (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0)
2620 if (F_ISSET(pip, VRFY_HAS_SUBDBS))
2621 *hassubsp = 1;
2622 if (pip != NULL &&
2623 (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
2624 ret = t_ret;
2625 if (h != NULL) {
2626 if ((t_ret = __memp_fput(mpf,
2627 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2628 ret = t_ret;
2629 h = NULL;
2630 }
2631 if (ret != 0)
2632 return (ret);
2633
2634 /* Without subdatabases, we can just dump from the meta pgno. */
2635 if (*hassubsp == 0)
2636 return (__db_salvage(dbp,
2637 vdp, PGNO_BASE_MD, handle, callback, flags));
2638
2639 /*
2640 * We have subdbs. Try to crack them.
2641 *
2642 * To do so, get a set of leaf pages in the master database, and then
2643 * walk each of the valid ones, salvaging subdbs as we go. If any
2644 * prove invalid, just drop them; we'll pick them up on a later pass.
2645 */
2646 if ((ret = __db_vrfy_pgset(env,
2647 vdp->thread_info, dbp->pgsize, &pgset)) != 0)
2648 goto err;
2649 if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0)
2650 goto err;
2651 if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0)
2652 goto err;
2653 while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
2654 if ((t_ret = __memp_fget(mpf,
2655 &p, vdp->thread_info, NULL, 0, &h)) == 0 &&
2656 (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 &&
2657 (t_ret =
2658 __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0)
2659 t_ret = __db_salvage_subdbpg(
2660 dbp, vdp, h, handle, callback, flags);
2661 if (t_ret != 0 && ret == 0)
2662 ret = t_ret;
2663 if (h != NULL) {
2664 if ((t_ret = __memp_fput(mpf, vdp->thread_info,
2665 h, dbp->priority)) != 0 && ret == 0)
2666 ret = t_ret;
2667 h = NULL;
2668 }
2669 }
2670
2671 if (t_ret != DB_NOTFOUND && ret == 0)
2672 ret = t_ret;
2673
2674 err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
2675 ret = t_ret;
2676 if (pgset != NULL &&
2677 (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0)
2678 ret = t_ret;
2679 if (h != NULL &&
2680 (t_ret = __memp_fput(mpf,
2681 vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
2682 ret = t_ret;
2683 return (ret);
2684 }
2685
2686 /*
2687 * __db_salvage_subdbpg --
2688 * Given a known-good leaf page in the master database, salvage all
2689 * leaf pages corresponding to each subdb.
2690 */
2691 static int
__db_salvage_subdbpg(dbp,vdp,master,handle,callback,flags)2692 __db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags)
2693 DB *dbp;
2694 VRFY_DBINFO *vdp;
2695 PAGE *master;
2696 void *handle;
2697 int (*callback) __P((void *, const void *));
2698 u_int32_t flags;
2699 {
2700 BKEYDATA *bkkey, *bkdata;
2701 BOVERFLOW *bo;
2702 DB *pgset;
2703 DBC *pgsc;
2704 DBT key;
2705 DB_MPOOLFILE *mpf;
2706 ENV *env;
2707 PAGE *subpg;
2708 db_indx_t i;
2709 db_pgno_t meta_pgno;
2710 int ret, err_ret, t_ret;
2711 char *subdbname;
2712 u_int32_t ovfl_bufsz;
2713
2714 env = dbp->env;
2715 mpf = dbp->mpf;
2716 ret = err_ret = 0;
2717 subdbname = NULL;
2718 pgsc = NULL;
2719 pgset = NULL;
2720 ovfl_bufsz = 0;
2721
2722 /*
2723 * For each entry, get and salvage the set of pages
2724 * corresponding to that entry.
2725 */
2726 for (i = 0; i < NUM_ENT(master); i += P_INDX) {
2727 bkkey = GET_BKEYDATA(dbp, master, i);
2728 bkdata = GET_BKEYDATA(dbp, master, i + O_INDX);
2729
2730 /* Get the subdatabase name. */
2731 if (B_TYPE(bkkey->type) == B_OVERFLOW) {
2732 /*
2733 * We can, in principle anyway, have a subdb
2734 * name so long it overflows. Ick.
2735 */
2736 bo = (BOVERFLOW *)bkkey;
2737 if ((ret = __db_safe_goff(dbp, vdp, bo->pgno,
2738 &key, &subdbname, &ovfl_bufsz, flags)) != 0) {
2739 err_ret = DB_VERIFY_BAD;
2740 continue;
2741 }
2742
2743 /* Nul-terminate it. */
2744 if (ovfl_bufsz < key.size + 1) {
2745 if ((ret = __os_realloc(env,
2746 key.size + 1, &subdbname)) != 0)
2747 goto err;
2748 ovfl_bufsz = key.size + 1;
2749 }
2750 subdbname[key.size] = '\0';
2751 } else if (B_TYPE(bkkey->type) == B_KEYDATA) {
2752 if (ovfl_bufsz < (u_int32_t)bkkey->len + 1) {
2753 if ((ret = __os_realloc(env,
2754 bkkey->len + 1, &subdbname)) != 0)
2755 goto err;
2756 ovfl_bufsz = bkkey->len + 1;
2757 }
2758 DB_ASSERT(env, subdbname != NULL);
2759 memcpy(subdbname, bkkey->data, bkkey->len);
2760 subdbname[bkkey->len] = '\0';
2761 }
2762
2763 /* Get the corresponding pgno. */
2764 if (bkdata->len != sizeof(db_pgno_t)) {
2765 err_ret = DB_VERIFY_BAD;
2766 continue;
2767 }
2768 memcpy(&meta_pgno,
2769 (db_pgno_t *)bkdata->data, sizeof(db_pgno_t));
2770
2771 /*
2772 * Subdatabase meta pgnos are stored in network byte
2773 * order for cross-endian compatibility. Swap if appropriate.
2774 */
2775 DB_NTOHL_SWAP(env, &meta_pgno);
2776
2777 /* If we can't get the subdb meta page, just skip the subdb. */
2778 if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf,
2779 &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) {
2780 err_ret = ret;
2781 continue;
2782 }
2783
2784 /*
2785 * Verify the subdatabase meta page. This has two functions.
2786 * First, if it's bad, we have no choice but to skip the subdb
2787 * and let the pages just get printed on a later pass. Second,
2788 * the access-method-specific meta verification routines record
2789 * the various state info (such as the presence of dups)
2790 * that we need for __db_prheader().
2791 */
2792 if ((ret =
2793 __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) {
2794 err_ret = ret;
2795 (void)__memp_fput(mpf,
2796 vdp->thread_info, subpg, dbp->priority);
2797 continue;
2798 }
2799 switch (TYPE(subpg)) {
2800 case P_BTREEMETA:
2801 if ((ret = __bam_vrfy_meta(dbp,
2802 vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) {
2803 err_ret = ret;
2804 (void)__memp_fput(mpf,
2805 vdp->thread_info, subpg, dbp->priority);
2806 continue;
2807 }
2808 break;
2809 case P_HASHMETA:
2810 if ((ret = __ham_vrfy_meta(dbp,
2811 vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) {
2812 err_ret = ret;
2813 (void)__memp_fput(mpf,
2814 vdp->thread_info, subpg, dbp->priority);
2815 continue;
2816 }
2817 break;
2818 default:
2819 /* This isn't an appropriate page; skip this subdb. */
2820 err_ret = DB_VERIFY_BAD;
2821 continue;
2822 }
2823
2824 if ((ret = __memp_fput(mpf,
2825 vdp->thread_info, subpg, dbp->priority)) != 0) {
2826 err_ret = ret;
2827 continue;
2828 }
2829
2830 /* Print a subdatabase header. */
2831 if ((ret = __db_prheader(dbp,
2832 subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0)
2833 goto err;
2834
2835 /* Salvage meta_pgno's tree. */
2836 if ((ret = __db_salvage(dbp,
2837 vdp, meta_pgno, handle, callback, flags)) != 0)
2838 err_ret = ret;
2839
2840 /* Print a subdatabase footer. */
2841 if ((ret = __db_prfooter(handle, callback)) != 0)
2842 goto err;
2843 }
2844
2845 err: if (subdbname)
2846 __os_free(env, subdbname);
2847
2848 if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
2849 ret = t_ret;
2850
2851 if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
2852 ret = t_ret;
2853
2854 if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0)
2855 return (t_ret);
2856
2857 return ((err_ret != 0) ? err_ret : ret);
2858 }
2859
2860 /*
2861 * __db_salvage --
2862 * Given a meta page number, salvage all data from leaf pages found by
2863 * walking the meta page's tree.
2864 */
2865 static int
__db_salvage(dbp,vdp,meta_pgno,handle,callback,flags)2866 __db_salvage(dbp, vdp, meta_pgno, handle, callback, flags)
2867 DB *dbp;
2868 VRFY_DBINFO *vdp;
2869 db_pgno_t meta_pgno;
2870 void *handle;
2871 int (*callback) __P((void *, const void *));
2872 u_int32_t flags;
2873
2874 {
2875 DB *pgset;
2876 DBC *dbc, *pgsc;
2877 DB_MPOOLFILE *mpf;
2878 ENV *env;
2879 PAGE *subpg;
2880 db_pgno_t p;
2881 int err_ret, ret, t_ret;
2882
2883 env = dbp->env;
2884 mpf = dbp->mpf;
2885 err_ret = ret = t_ret = 0;
2886 pgsc = NULL;
2887 pgset = NULL;
2888 dbc = NULL;
2889
2890 if ((ret = __db_vrfy_pgset(env,
2891 vdp->thread_info, dbp->pgsize, &pgset)) != 0)
2892 goto err;
2893
2894 /* Get all page numbers referenced from this meta page. */
2895 if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno,
2896 flags, pgset)) != 0) {
2897 err_ret = ret;
2898 goto err;
2899 }
2900
2901 if ((ret = __db_cursor(pgset,
2902 vdp->thread_info, NULL, &pgsc, 0)) != 0)
2903 goto err;
2904
2905 if (dbp->type == DB_QUEUE &&
2906 (ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0)
2907 goto err;
2908
2909 /* Salvage every page in pgset. */
2910 while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
2911 if (dbp->type == DB_QUEUE) {
2912 #ifdef HAVE_QUEUE
2913 ret = __qam_fget(dbc, &p, 0, &subpg);
2914 #else
2915 ret = __db_no_queue_am(env);
2916 #endif
2917 /* Don't report an error for pages not found in a queue.
2918 * The pgset is a best guess, it doesn't know about
2919 * deleted extents which leads to this error.
2920 */
2921 if (ret == ENOENT || ret == DB_PAGE_NOTFOUND)
2922 continue;
2923 } else
2924 ret = __memp_fget(mpf,
2925 &p, vdp->thread_info, NULL, 0, &subpg);
2926 if (ret != 0) {
2927 err_ret = ret;
2928 continue;
2929 }
2930
2931 if ((ret = __db_salvage_pg(dbp, vdp, p, subpg,
2932 handle, callback, flags)) != 0)
2933 err_ret = ret;
2934
2935 if (dbp->type == DB_QUEUE)
2936 #ifdef HAVE_QUEUE
2937 ret = __qam_fput(dbc, p, subpg, dbp->priority);
2938 #else
2939 ret = __db_no_queue_am(env);
2940 #endif
2941 else
2942 ret = __memp_fput(mpf,
2943 vdp->thread_info, subpg, dbp->priority);
2944 if (ret != 0)
2945 err_ret = ret;
2946 }
2947
2948 if (ret == DB_NOTFOUND)
2949 ret = 0;
2950
2951 err:
2952 if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0)
2953 ret = t_ret;
2954 if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
2955 ret = t_ret;
2956 if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
2957 ret = t_ret;
2958
2959 return ((err_ret != 0) ? err_ret : ret);
2960 }
2961
2962 /*
2963 * __db_meta2pgset --
2964 * Given a known-safe meta page number, return the set of pages
2965 * corresponding to the database it represents. Return DB_VERIFY_BAD if
2966 * it's not a suitable meta page or is invalid.
2967 */
2968 static int
__db_meta2pgset(dbp,vdp,pgno,flags,pgset)2969 __db_meta2pgset(dbp, vdp, pgno, flags, pgset)
2970 DB *dbp;
2971 VRFY_DBINFO *vdp;
2972 db_pgno_t pgno;
2973 u_int32_t flags;
2974 DB *pgset;
2975 {
2976 DB_MPOOLFILE *mpf;
2977 PAGE *h;
2978 int ret, t_ret;
2979
2980 mpf = dbp->mpf;
2981
2982 if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
2983 return (ret);
2984
2985 switch (TYPE(h)) {
2986 case P_BTREEMETA:
2987 ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset);
2988 break;
2989 case P_HASHMETA:
2990 ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset);
2991 break;
2992 case P_HEAPMETA:
2993 ret = __heap_meta2pgset(dbp, vdp, (HEAPMETA *)h, pgset);
2994 break;
2995 case P_QAMMETA:
2996 #ifdef HAVE_QUEUE
2997 ret = __qam_meta2pgset(dbp, vdp, pgset);
2998 break;
2999 #endif
3000 default:
3001 ret = DB_VERIFY_BAD;
3002 break;
3003 }
3004
3005 if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0)
3006 return (t_ret);
3007 return (ret);
3008 }
3009
3010 /*
3011 * __db_guesspgsize --
3012 * Try to guess what the pagesize is if the one on the meta page
3013 * and the one in the db are invalid.
3014 */
3015 static u_int
__db_guesspgsize(env,fhp)3016 __db_guesspgsize(env, fhp)
3017 ENV *env;
3018 DB_FH *fhp;
3019 {
3020 db_pgno_t i;
3021 size_t nr;
3022 u_int32_t guess;
3023 u_int8_t type;
3024
3025 for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) {
3026 /*
3027 * We try to read three pages ahead after the first one
3028 * and make sure we have plausible types for all of them.
3029 * If the seeks fail, continue with a smaller size;
3030 * we're probably just looking past the end of the database.
3031 * If they succeed and the types are reasonable, also continue
3032 * with a size smaller; we may be looking at pages N,
3033 * 2N, and 3N for some N > 1.
3034 *
3035 * As soon as we hit an invalid type, we stop and return
3036 * our previous guess; that last one was probably the page size.
3037 */
3038 for (i = 1; i <= 3; i++) {
3039 if (__os_seek(
3040 env, fhp, i, guess, SSZ(DBMETA, type)) != 0)
3041 break;
3042 if (__os_read(env,
3043 fhp, &type, 1, &nr) != 0 || nr == 0)
3044 break;
3045 if (type == P_INVALID || type >= P_PAGETYPE_MAX)
3046 return (guess << 1);
3047 }
3048 }
3049
3050 /*
3051 * If we're just totally confused--the corruption takes up most of the
3052 * beginning pages of the database--go with the default size.
3053 */
3054 return (DB_DEF_IOSIZE);
3055 }
3056