1 /*-
2 * Copyright (c) 1996, 2020 Oracle and/or its affiliates. All rights reserved.
3 *
4 * See the file LICENSE for license information.
5 */
6 /*
7 * Copyright (c) 1990, 1993, 1994, 1995, 1996
8 * Keith Bostic. All rights reserved.
9 */
10 /*
11 * Copyright (c) 1990, 1993, 1994, 1995
12 * The Regents of the University of California. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * $Id$
39 */
40
41 #include "db_config.h"
42
43 #include "db_int.h"
44 #include "dbinc/crypto.h"
45 #include "dbinc/hmac.h"
46 #include "dbinc/db_page.h"
47 #include "dbinc/db_swap.h"
48 #include "dbinc/btree.h"
49 #include "dbinc/fop.h"
50 #include "dbinc/hash.h"
51 #include "dbinc/heap.h"
52 #include "dbinc/qam.h"
53
54 static int __db_convert_extent
55 __P((ENV *, const char *, u_int32_t, u_int32_t));
56 static int __db_convert_extent_names __P((DB *, DBMETA *, char *, char ***));
57 static int __db_swap __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
58
59 /*
60 * __db_pgin --
61 * Primary page-swap routine.
62 *
63 * PUBLIC: int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
64 */
65 int
__db_pgin(dbenv,pg,pp,cookie)66 __db_pgin(dbenv, pg, pp, cookie)
67 DB_ENV *dbenv;
68 db_pgno_t pg;
69 void *pp;
70 DBT *cookie;
71 {
72 DB dummydb, *dbp;
73 DB_CIPHER *db_cipher;
74 DB_LSN not_used;
75 DB_PGINFO *pginfo;
76 ENV *env;
77 PAGE *pagep;
78 size_t sum_len;
79 int is_hmac, ret;
80 u_int8_t *chksum;
81
82 pginfo = (DB_PGINFO *)cookie->data;
83 env = dbenv->env;
84 pagep = (PAGE *)pp;
85
86 ret = is_hmac = 0;
87 chksum = NULL;
88 memset(&dummydb, 0, sizeof(DB));
89 dbp = &dummydb;
90 dbp->dbenv = dbenv;
91 dbp->env = env;
92 dbp->flags = pginfo->flags;
93 dbp->pgsize = pginfo->db_pagesize;
94 db_cipher = env->crypto_handle;
95 switch (pagep->type) {
96 case P_HASHMETA:
97 case P_HEAPMETA:
98 case P_BTREEMETA:
99 case P_QAMMETA:
100 /*
101 * If checksumming is set on the meta-page, we must set
102 * it in the dbp.
103 */
104 if (FLD_ISSET(((DBMETA *)pp)->metaflags, DBMETA_CHKSUM))
105 F_SET(dbp, DB_AM_CHKSUM);
106 else
107 F_CLR(dbp, DB_AM_CHKSUM);
108 if (((DBMETA *)pp)->encrypt_alg != 0 ||
109 F_ISSET(dbp, DB_AM_ENCRYPT))
110 is_hmac = 1;
111 /*
112 * !!!
113 * For all meta pages it is required that the chksum
114 * be at the same location. Use BTMETA to get to it
115 * for any meta type.
116 */
117 chksum = ((BTMETA *)pp)->chksum;
118 sum_len = DBMETASIZE;
119 break;
120 case P_INVALID:
121 /*
122 * We assume that we've read a file hole if we have
123 * a zero LSN, zero page number and P_INVALID. Otherwise
124 * we have an invalid page that might contain real data.
125 */
126 if (IS_ZERO_LSN(LSN(pagep)) && pagep->pgno == PGNO_INVALID) {
127 sum_len = 0;
128 break;
129 }
130 /* FALLTHROUGH */
131 default:
132 chksum = P_CHKSUM(dbp, pagep);
133 sum_len = pginfo->db_pagesize;
134 /*
135 * If we are reading in a non-meta page, then if we have
136 * a db_cipher then we are using hmac.
137 */
138 is_hmac = CRYPTO_ON(env) ? 1 : 0;
139 break;
140 }
141
142 /*
143 * We expect a checksum error if there was a configuration problem.
144 * If there is no configuration problem and we don't get a match,
145 * it's fatal: panic the system.
146 */
147 if (F_ISSET(dbp, DB_AM_CHKSUM) && sum_len != 0) {
148 if (F_ISSET(dbp, DB_AM_SWAP) && is_hmac == 0)
149 P_32_SWAP(chksum);
150 switch (ret = __db_check_chksum(
151 env, NULL, db_cipher, chksum, pp, sum_len, is_hmac)) {
152 case 0:
153 break;
154 case -1:
155 if (DBENV_LOGGING(env))
156 (void)__db_cksum_log(
157 env, NULL, ¬_used, DB_FLUSH);
158 __db_errx(env, DB_STR_A("0684",
159 "checksum error: page %lu: catastrophic recovery required",
160 "%lu"), (u_long)pg);
161 return (__env_panic(env, DB_RUNRECOVERY));
162 default:
163 return (ret);
164 }
165 }
166 if ((ret = __db_decrypt_pg(env, dbp, pagep)) != 0)
167 return (ret);
168 switch (pagep->type) {
169 case P_INVALID:
170 if (pginfo->type == DB_QUEUE)
171 return (__qam_pgin_out(env, pg, pp, cookie));
172 else if (pginfo->type == DB_HEAP)
173 return (__heap_pgin(dbp, pg, pp, cookie));
174 /*
175 * This page is either newly allocated from the end of the
176 * file, or from the free list, or it is an as-yet unwritten
177 * hash bucket page. In this last case it needs to be
178 * initialized, but never byte-swapped. Otherwise the header
179 * may need swapping. It will not be a metadata page, so the
180 * byte swapping code of __ham_pgin is adequate. If hash
181 * is not configured fall back to btree swapping.
182 */
183 #ifdef HAVE_HASH
184 return (__ham_pgin(dbp, pg, pp, cookie));
185 #else
186 return (__bam_pgin(dbp, pg, pp, cookie));
187 #endif
188 /* NOTREACHED. */
189 break;
190 case P_HASH_UNSORTED:
191 case P_HASH:
192 case P_HASHMETA:
193 return (__ham_pgin(dbp, pg, pp, cookie));
194 case P_HEAP:
195 case P_HEAPMETA:
196 case P_IHEAP:
197 return (__heap_pgin(dbp, pg, pp, cookie));
198 case P_BTREEMETA:
199 case P_IBTREE:
200 case P_IRECNO:
201 case P_LBTREE:
202 case P_LDUP:
203 case P_LRECNO:
204 case P_OVERFLOW:
205 return (__bam_pgin(dbp, pg, pp, cookie));
206 case P_QAMMETA:
207 case P_QAMDATA:
208 return (__qam_pgin_out(env, pg, pp, cookie));
209 default:
210 break;
211 }
212 return (__db_pgfmt(env, pg));
213 }
214
215 /*
216 * __db_pgout --
217 * Primary page-swap routine.
218 *
219 * PUBLIC: int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
220 */
221 int
__db_pgout(dbenv,pg,pp,cookie)222 __db_pgout(dbenv, pg, pp, cookie)
223 DB_ENV *dbenv;
224 db_pgno_t pg;
225 void *pp;
226 DBT *cookie;
227 {
228 DB dummydb, *dbp;
229 DB_PGINFO *pginfo;
230 ENV *env;
231 PAGE *pagep;
232 int ret;
233
234 pginfo = (DB_PGINFO *)cookie->data;
235 env = dbenv->env;
236 pagep = (PAGE *)pp;
237
238 memset(&dummydb, 0, sizeof(DB));
239 dbp = &dummydb;
240 dbp->dbenv = dbenv;
241 dbp->env = env;
242 dbp->flags = pginfo->flags;
243 dbp->pgsize = pginfo->db_pagesize;
244 ret = 0;
245 switch (pagep->type) {
246 case P_INVALID:
247 switch (pginfo->type) {
248 case DB_QUEUE:
249 ret = __qam_pgin_out(env, pg, pp, cookie);
250 break;
251 #ifdef HAVE_HASH
252 case DB_HASH:
253 ret = __ham_pgout(dbp, pg, pp, cookie);
254 break;
255 #endif
256 #ifdef HAVE_HEAP
257 case DB_HEAP:
258 ret = __heap_pgout(dbp, pg, pp, cookie);
259 break;
260 #endif
261 case DB_BTREE:
262 case DB_RECNO:
263 ret = __bam_pgout(dbp, pg, pp, cookie);
264 break;
265 default:
266 return (__db_pgfmt(env, pg));
267 }
268 break;
269 case P_HASH:
270 case P_HASH_UNSORTED:
271 /*
272 * Support pgout of unsorted hash pages - since online
273 * replication upgrade can cause pages of this type to be
274 * written out.
275 *
276 * FALLTHROUGH
277 */
278 case P_HASHMETA:
279 ret = __ham_pgout(dbp, pg, pp, cookie);
280 break;
281 case P_HEAP:
282 case P_HEAPMETA:
283 case P_IHEAP:
284 ret = __heap_pgout(dbp, pg, pp, cookie);
285 break;
286 case P_BTREEMETA:
287 case P_IBTREE:
288 case P_IRECNO:
289 case P_LBTREE:
290 case P_LDUP:
291 case P_LRECNO:
292 case P_OVERFLOW:
293 ret = __bam_pgout(dbp, pg, pp, cookie);
294 break;
295 case P_QAMMETA:
296 case P_QAMDATA:
297 ret = __qam_pgin_out(env, pg, pp, cookie);
298 break;
299 default:
300 return (__db_pgfmt(env, pg));
301 }
302 if (ret)
303 return (ret);
304
305 return (__db_encrypt_and_checksum_pg(env, dbp, pagep));
306 }
307
308 /*
309 * __db_decrypt_pg --
310 * Utility function to decrypt a db page.
311 *
312 * PUBLIC: int __db_decrypt_pg __P((ENV *, DB *, PAGE *));
313 */
314 int
__db_decrypt_pg(env,dbp,pagep)315 __db_decrypt_pg (env, dbp, pagep)
316 ENV *env;
317 DB *dbp;
318 PAGE *pagep;
319 {
320 DB_CIPHER *db_cipher;
321 size_t pg_len, pg_off;
322 u_int8_t *iv;
323 int ret;
324
325 db_cipher = env->crypto_handle;
326 ret = 0;
327 iv = NULL;
328 if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
329 DB_ASSERT(env, db_cipher != NULL);
330 DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM));
331
332 pg_off = P_OVERHEAD(dbp);
333 DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0);
334
335 switch (pagep->type) {
336 case P_HASHMETA:
337 case P_HEAPMETA:
338 case P_BTREEMETA:
339 case P_QAMMETA:
340 /*
341 * !!!
342 * For all meta pages it is required that the iv
343 * be at the same location. Use BTMETA to get to it
344 * for any meta type.
345 */
346 iv = ((BTMETA *)pagep)->iv;
347 pg_len = DBMETASIZE;
348 break;
349 case P_INVALID:
350 if (IS_ZERO_LSN(LSN(pagep)) &&
351 pagep->pgno == PGNO_INVALID) {
352 pg_len = 0;
353 break;
354 }
355 /* FALLTHROUGH */
356 default:
357 iv = P_IV(dbp, pagep);
358 pg_len = dbp->pgsize;
359 break;
360 }
361 if (pg_len != 0)
362 ret = db_cipher->decrypt(env, db_cipher->data,
363 iv, ((u_int8_t *)pagep) + pg_off,
364 pg_len - pg_off);
365 }
366 return (ret);
367 }
368
369 /*
370 * __db_encrypt_and_checksum_pg --
371 * Utility function to encrypt and checksum a db page.
372 *
373 * PUBLIC: int __db_encrypt_and_checksum_pg
374 * PUBLIC: __P((ENV *, DB *, PAGE *));
375 */
376 int
__db_encrypt_and_checksum_pg(env,dbp,pagep)377 __db_encrypt_and_checksum_pg (env, dbp, pagep)
378 ENV *env;
379 DB *dbp;
380 PAGE *pagep;
381 {
382 DB_CIPHER *db_cipher;
383 int ret;
384 size_t pg_off, pg_len, sum_len;
385 u_int8_t *chksum, *iv, *key;
386
387 chksum = iv = key = NULL;
388 db_cipher = env->crypto_handle;
389
390 if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
391 DB_ASSERT(env, db_cipher != NULL);
392 DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM));
393
394 pg_off = P_OVERHEAD(dbp);
395 DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0);
396
397 key = db_cipher->mac_key;
398
399 switch (pagep->type) {
400 case P_HASHMETA:
401 case P_HEAPMETA:
402 case P_BTREEMETA:
403 case P_QAMMETA:
404 /*
405 * !!!
406 * For all meta pages it is required that the iv
407 * be at the same location. Use BTMETA to get to it
408 * for any meta type.
409 */
410 iv = ((BTMETA *)pagep)->iv;
411 pg_len = DBMETASIZE;
412 break;
413 default:
414 iv = P_IV(dbp, pagep);
415 pg_len = dbp->pgsize;
416 break;
417 }
418 if ((ret = db_cipher->encrypt(env, db_cipher->data,
419 iv, ((u_int8_t *)pagep) + pg_off, pg_len - pg_off)) != 0)
420 return (ret);
421 }
422 if (F_ISSET(dbp, DB_AM_CHKSUM)) {
423 switch (pagep->type) {
424 case P_HASHMETA:
425 case P_HEAPMETA:
426 case P_BTREEMETA:
427 case P_QAMMETA:
428 /*
429 * !!!
430 * For all meta pages it is required that the chksum
431 * be at the same location. Use BTMETA to get to it
432 * for any meta type.
433 */
434 chksum = ((BTMETA *)pagep)->chksum;
435 sum_len = DBMETASIZE;
436 break;
437 default:
438 chksum = P_CHKSUM(dbp, pagep);
439 sum_len = dbp->pgsize;
440 break;
441 }
442 __db_chksum(NULL, (u_int8_t *)pagep, sum_len, key, chksum);
443 if (F_ISSET(dbp, DB_AM_SWAP) && !F_ISSET(dbp, DB_AM_ENCRYPT))
444 P_32_SWAP(chksum);
445 }
446 return (0);
447 }
448
449 /*
450 * __db_metaswap --
451 * Byteswap the common part of the meta-data page.
452 *
453 * PUBLIC: void __db_metaswap __P((PAGE *));
454 */
455 void
__db_metaswap(pg)456 __db_metaswap(pg)
457 PAGE *pg;
458 {
459 u_int8_t *p;
460
461 p = (u_int8_t *)pg;
462
463 /* Swap the meta-data information. */
464 SWAP32(p); /* lsn.file */
465 SWAP32(p); /* lsn.offset */
466 SWAP32(p); /* pgno */
467 SWAP32(p); /* magic */
468 SWAP32(p); /* version */
469 SWAP32(p); /* pagesize */
470 p += 4; /* unused, page type, unused, unused */
471 SWAP32(p); /* free */
472 SWAP32(p); /* alloc_lsn part 1 */
473 SWAP32(p); /* alloc_lsn part 2 */
474 SWAP32(p); /* cached key count */
475 SWAP32(p); /* cached record count */
476 SWAP32(p); /* flags */
477 }
478
479 /*
480 * __db_byteswap --
481 * Byteswap an ordinary database page.
482 *
483 * PUBLIC: int __db_byteswap
484 * PUBLIC: __P((DB *, db_pgno_t, PAGE *, size_t, int));
485 */
486 int
__db_byteswap(dbp,pg,h,pagesize,pgin)487 __db_byteswap(dbp, pg, h, pagesize, pgin)
488 DB *dbp;
489 db_pgno_t pg;
490 PAGE *h;
491 size_t pagesize;
492 int pgin;
493 {
494 ENV *env;
495 BINTERNAL *bi;
496 BBLOB *bl;
497 BKEYDATA *bk;
498 BOVERFLOW *bo;
499 HEAPBLOBHDR *bhdr;
500 HEAPHDR *hh;
501 HEAPSPLITHDR *hsh;
502 RINTERNAL *ri;
503 db_indx_t i, *inp, len, tmp;
504 u_int8_t *end, *p, *pgend;
505
506 /* This function is also used to byteswap logs, so
507 * the pagesize might not be an actual page size.
508 */
509 if (!(pagesize >= 24 && pagesize <= DB_MAX_PGSIZE))
510 return (EINVAL);
511
512 if (pgin) {
513 M_32_SWAP(h->lsn.file);
514 M_32_SWAP(h->lsn.offset);
515 M_32_SWAP(h->pgno);
516 if (TYPE(h) == P_HEAP) {
517 M_32_SWAP(((HEAPPG *)h)->high_pgno);
518 M_16_SWAP(((HEAPPG *)h)->high_indx);
519 M_16_SWAP(((HEAPPG *)h)->free_indx);
520 } else {
521 M_32_SWAP(h->prev_pgno);
522 M_32_SWAP(h->next_pgno);
523 }
524 M_16_SWAP(h->entries);
525 M_16_SWAP(h->hf_offset);
526 }
527
528 if (dbp == NULL)
529 return (0);
530 env = dbp->env;
531
532 pgend = (u_int8_t *)h + pagesize;
533
534 inp = P_INP(dbp, h);
535 if ((u_int8_t *)inp > pgend)
536 return (__db_pgfmt(env, pg));
537
538 switch (TYPE(h)) {
539 case P_HASH_UNSORTED:
540 case P_HASH:
541 for (i = 0; i < NUM_ENT(h); i++) {
542 if ((u_int8_t*)(inp + i) >= pgend)
543 return (__db_pgfmt(env, pg));
544 if (inp[i] == 0)
545 continue;
546 if (pgin)
547 M_16_SWAP(inp[i]);
548 if (inp[i] >= pagesize)
549 return (__db_pgfmt(env, pg));
550
551 if (P_ENTRY(dbp, h, i) >= pgend)
552 return (__db_pgfmt(env, pg));
553
554 switch (HPAGE_TYPE(dbp, h, i)) {
555 case H_BLOB:
556 if ((inp[i] + HBLOB_SIZE) > pagesize)
557 return (__db_pgfmt(env, pg));
558 p = HBLOB_ID(P_ENTRY(dbp, h, i));
559 SWAP64(p); /* id */
560 SWAP64(p); /* size */
561 p = HBLOB_FILE_ID(P_ENTRY(dbp, h, i));
562 SWAP64(p); /* file id */
563 SWAP64(p); /* sdb id */
564 break;
565 case H_KEYDATA:
566 break;
567 case H_DUPLICATE:
568 if (LEN_HITEM(dbp, h, pagesize, i) <
569 HKEYDATA_SIZE(0))
570 return (__db_pgfmt(env, pg));
571
572 len = LEN_HKEYDATA(dbp, h, pagesize, i);
573 p = HKEYDATA_DATA(P_ENTRY(dbp, h, i));
574
575 end = p + len;
576 if (end > pgend)
577 return (__db_pgfmt(env, pg));
578
579 while (p < end) {
580 if (pgin) {
581 P_16_SWAP(p);
582 memcpy(&tmp,
583 p, sizeof(db_indx_t));
584 p += sizeof(db_indx_t);
585 } else {
586 memcpy(&tmp,
587 p, sizeof(db_indx_t));
588 SWAP16(p);
589 }
590 p += tmp;
591 if (p >= end)
592 return (__db_pgfmt(env, pg));
593 SWAP16(p);
594 }
595 break;
596 case H_OFFDUP:
597 if ((inp[i] + HOFFDUP_SIZE) > pagesize)
598 return (__db_pgfmt(env, pg));
599 p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i));
600 SWAP32(p); /* pgno */
601 break;
602 case H_OFFPAGE:
603 if ((inp[i] + HOFFPAGE_SIZE) > pagesize)
604 return (__db_pgfmt(env, pg));
605 p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i));
606 SWAP32(p); /* pgno */
607 SWAP32(p); /* tlen */
608 break;
609 default:
610 return (__db_pgfmt(env, pg));
611 }
612 }
613
614 /*
615 * The offsets in the inp array are used to determine
616 * the size of entries on a page; therefore they
617 * cannot be converted until we've done all the
618 * entries.
619 */
620 if (!pgin)
621 for (i = 0; i < NUM_ENT(h); i++)
622 M_16_SWAP(inp[i]);
623 break;
624 case P_LBTREE:
625 case P_LDUP:
626 case P_LRECNO:
627 for (i = 0; i < NUM_ENT(h); i++) {
628 if ((u_int8_t *)(inp + i) >= pgend)
629 return (__db_pgfmt(env, pg));
630 if (pgin)
631 M_16_SWAP(inp[i]);
632 if (inp[i] >= pagesize)
633 return (__db_pgfmt(env, pg));
634
635 /*
636 * In the case of on-page duplicates, key information
637 * should only be swapped once.
638 */
639 if (h->type == P_LBTREE && i > 1) {
640 if (pgin) {
641 if (inp[i] == inp[i - 2])
642 continue;
643 } else {
644 M_16_SWAP(inp[i]);
645 if (inp[i] == inp[i - 2])
646 continue;
647 M_16_SWAP(inp[i]);
648 }
649 }
650
651 bk = GET_BKEYDATA(dbp, h, i);
652 if ((u_int8_t *)bk >= pgend)
653 return (__db_pgfmt(env, pg));
654 switch (B_TYPE(bk->type)) {
655 case B_BLOB:
656 bl = (BBLOB *)bk;
657 if (((u_int8_t*)bl + BBLOB_SIZE) > pgend)
658 return (__db_pgfmt(env, pg));
659 M_16_SWAP(bl->len);
660 M_64_SWAP(bl->id); /* id */
661 M_64_SWAP(bl->size); /* size */
662 M_64_SWAP(bl->file_id); /* file id */
663 M_64_SWAP(bl->sdb_id); /* sdb id */
664 break;
665 case B_KEYDATA:
666 M_16_SWAP(bk->len);
667 break;
668 case B_DUPLICATE:
669 case B_OVERFLOW:
670 bo = (BOVERFLOW *)bk;
671 if (((u_int8_t *)bo + BOVERFLOW_SIZE) > pgend)
672 return (__db_pgfmt(env, pg));
673 M_32_SWAP(bo->pgno);
674 M_32_SWAP(bo->tlen);
675 break;
676 default:
677 return (__db_pgfmt(env, pg));
678 }
679
680 if (!pgin)
681 M_16_SWAP(inp[i]);
682 }
683 break;
684 case P_IBTREE:
685 for (i = 0; i < NUM_ENT(h); i++) {
686 if ((u_int8_t *)(inp + i) > pgend)
687 return (__db_pgfmt(env, pg));
688 if (pgin)
689 M_16_SWAP(inp[i]);
690 if ((u_int16_t)(inp[i] +
691 BINTERNAL_SIZE(0) - 1) > pagesize)
692 break;
693
694 bi = GET_BINTERNAL(dbp, h, i);
695 if (((u_int8_t *)bi + BINTERNAL_SIZE(0)) > pgend)
696 return (__db_pgfmt(env, pg));
697
698 M_16_SWAP(bi->len);
699 M_32_SWAP(bi->pgno);
700 M_32_SWAP(bi->nrecs);
701
702 switch (B_TYPE(bi->type)) {
703 case B_KEYDATA:
704 break;
705 case B_DUPLICATE:
706 case B_OVERFLOW:
707 if ((u_int16_t)(inp[i] +
708 BINTERNAL_SIZE(BOVERFLOW_SIZE) - 1) >
709 pagesize)
710 goto out;
711 bo = (BOVERFLOW *)bi->data;
712 M_32_SWAP(bo->pgno);
713 M_32_SWAP(bo->tlen);
714 break;
715 default:
716 return (__db_pgfmt(env, pg));
717 }
718
719 if (!pgin)
720 M_16_SWAP(inp[i]);
721 }
722 break;
723 case P_IRECNO:
724 for (i = 0; i < NUM_ENT(h); i++) {
725 if ((u_int8_t *)(inp + i) >= pgend)
726 return (__db_pgfmt(env, pg));
727 if (pgin)
728 M_16_SWAP(inp[i]);
729 if (inp[i] >= pagesize)
730 return (__db_pgfmt(env, pg));
731
732 ri = GET_RINTERNAL(dbp, h, i);
733 if ((((u_int8_t *)ri) + RINTERNAL_SIZE) > pgend)
734 return (__db_pgfmt(env, pg));
735
736 M_32_SWAP(ri->pgno);
737 M_32_SWAP(ri->nrecs);
738
739 if (!pgin)
740 M_16_SWAP(inp[i]);
741 }
742 break;
743 case P_HEAP:
744 for (i = 0; i <= HEAP_HIGHINDX(h); i++) {
745 if (i >= NUM_ENT(h))
746 return (__db_pgfmt(env, pg));
747 if ((u_int8_t *)(inp + i) > pgend)
748 return (__db_pgfmt(env, pg));
749 if (pgin)
750 M_16_SWAP(inp[i]);
751 if (inp[i] >= pagesize)
752 return (__db_pgfmt(env, pg));
753 if (inp[i] == 0)
754 continue;
755
756 hh = (HEAPHDR *)P_ENTRY(dbp, h, i);
757 if ((u_int8_t *)hh >= pgend)
758 continue;
759 M_16_SWAP(hh->size);
760 if (pgin && ((inp[i] + HEAP_HDRSIZE(hh) + hh->size) > pagesize))
761 break;
762 if (F_ISSET(hh, HEAP_RECSPLIT)) {
763 hsh = (HEAPSPLITHDR *)hh;
764 if (((u_int8_t *)hsh + sizeof(HEAPSPLITHDR)) > pgend)
765 return (__db_pgfmt(env, pg));
766 M_32_SWAP(hsh->tsize);
767 M_32_SWAP(hsh->nextpg);
768 M_16_SWAP(hsh->nextindx);
769 } else if (F_ISSET(hh, HEAP_RECBLOB)) {
770 bhdr = (HEAPBLOBHDR *)hh;
771 if (((u_int8_t*)bhdr + HEAPBLOBREC_SIZE) > pgend)
772 return (__db_pgfmt(env, pg));
773 M_64_SWAP(bhdr->id); /* id */
774 M_64_SWAP(bhdr->size); /* size */
775 M_64_SWAP(bhdr->file_id); /* file id */
776 }
777
778 if (!pgin)
779 M_16_SWAP(inp[i]);
780 }
781 break;
782 case P_IHEAP:
783 case P_INVALID:
784 case P_OVERFLOW:
785 case P_QAMDATA:
786 /* Nothing to do. */
787 break;
788 default:
789 return (__db_pgfmt(env, pg));
790 }
791
792 out: if (!pgin) {
793 /* Swap the header information. */
794 M_32_SWAP(h->lsn.file);
795 M_32_SWAP(h->lsn.offset);
796 M_32_SWAP(h->pgno);
797 if (TYPE(h) == P_HEAP) {
798 M_32_SWAP(((HEAPPG *)h)->high_pgno);
799 M_16_SWAP(((HEAPPG *)h)->high_indx);
800 M_16_SWAP(((HEAPPG *)h)->free_indx);
801 } else {
802 M_32_SWAP(h->prev_pgno);
803 M_32_SWAP(h->next_pgno);
804 }
805 M_16_SWAP(h->entries);
806 M_16_SWAP(h->hf_offset);
807 }
808 return (0);
809 }
810
811 /*
812 * __db_pageswap --
813 * Byteswap any database page. Normally, the page to be swapped will be
814 * referenced by the "pp" argument and the pdata argument will be NULL.
815 * This function is also called by automatically generated log functions,
816 * where the page may be split into separate header and data parts. In
817 * that case, pdata is not NULL we reconsitute
818 *
819 * PUBLIC: int __db_pageswap
820 * PUBLIC: __P((ENV *, DB *, void *, size_t, DBT *, int));
821 */
822 int
__db_pageswap(env,dbp,pp,len,pdata,pgin)823 __db_pageswap(env, dbp, pp, len, pdata, pgin)
824 ENV *env;
825 DB *dbp;
826 void *pp;
827 size_t len;
828 DBT *pdata;
829 int pgin;
830 {
831 db_pgno_t pg;
832 size_t pgsize;
833 void *pgcopy;
834 int ret;
835 u_int16_t hoffset;
836
837 switch (TYPE(pp)) {
838 case P_BTREEMETA:
839 return (__bam_mswap(env, pp));
840
841 case P_HASHMETA:
842 return (__ham_mswap(env, pp));
843 #ifdef HAVE_HEAP
844 case P_HEAPMETA:
845 return (__heap_mswap(env, pp));
846 #endif
847 case P_QAMMETA:
848 return (__qam_mswap(env, pp));
849
850 case P_INVALID:
851 case P_OVERFLOW:
852 case P_QAMDATA:
853 /*
854 * We may have been passed an invalid page, or a queue data
855 * page, or an overflow page where fields like hoffset have a
856 * special meaning. In that case, no swapping of the page data
857 * is required, just the fields in the page header.
858 */
859 pdata = NULL;
860 break;
861
862 default:
863 break;
864 }
865
866 if (pgin) {
867 P_32_COPYSWAP(&PGNO(pp), &pg);
868 P_16_COPYSWAP(&HOFFSET(pp), &hoffset);
869 } else {
870 pg = PGNO(pp);
871 hoffset = HOFFSET(pp);
872 }
873
874 if (pdata == NULL)
875 ret = __db_byteswap(dbp, pg, (PAGE *)pp, len, pgin);
876 else {
877 pgsize = hoffset + pdata->size;
878 if ((ret = __os_malloc(env, pgsize, &pgcopy)) != 0)
879 return (ret);
880 memset(pgcopy, 0, pgsize);
881 memcpy(pgcopy, pp, len);
882 memcpy((u_int8_t *)pgcopy + hoffset, pdata->data, pdata->size);
883
884 ret = __db_byteswap(dbp, pg, (PAGE *)pgcopy, pgsize, pgin);
885 memcpy(pp, pgcopy, len);
886
887 /*
888 * If we are swapping data to be written to the log, we can't
889 * overwrite the buffer that was passed in: it may be a pointer
890 * into a page in cache. We set DB_DBT_APPMALLOC here so that
891 * the calling code can free the memory we allocate here.
892 */
893 if (!pgin) {
894 if ((ret =
895 __os_malloc(env, pdata->size, &pdata->data)) != 0) {
896 __os_free(env, pgcopy);
897 return (ret);
898 }
899 F_SET(pdata, DB_DBT_APPMALLOC);
900 }
901 memcpy(pdata->data, (u_int8_t *)pgcopy + hoffset, pdata->size);
902 __os_free(env, pgcopy);
903 }
904
905 return (ret);
906 }
907
908 /*
909 * __db_recordswap --
910 * Byteswap any database record.
911 *
912 * PUBLIC: void __db_recordswap __P((u_int32_t,
913 * PUBLIC: u_int32_t, void *, void *, u_int32_t));
914 */
915 void
__db_recordswap(op,size,hdr,data,pgin)916 __db_recordswap(op, size, hdr, data, pgin)
917 u_int32_t op;
918 u_int32_t size;
919 void *hdr, *data;
920 u_int32_t pgin;
921 {
922 BBLOB *bl;
923 BKEYDATA *bk;
924 BOVERFLOW *bo;
925 BINTERNAL *bi;
926 DBT *dbt;
927 HEAPHDR *hh;
928 HEAPBLOBHDR bhdr;
929 HEAPSPLITHDR *hsh;
930 RINTERNAL *ri;
931 db_indx_t tmp;
932 u_int8_t buf[HEAPBLOBREC_SIZE], *end, *p;
933
934 if (size == 0)
935 return;
936 switch (OP_PAGE_GET(op)) {
937 case P_LDUP:
938 case P_LBTREE:
939 case P_LRECNO:
940 bk = (BKEYDATA *)hdr;
941 switch (B_TYPE(bk->type)) {
942 case B_KEYDATA:
943 M_16_SWAP(bk->len);
944 break;
945 case B_BLOB:
946 bl = (BBLOB *)bk;
947 M_16_SWAP(bl->len);
948 M_64_SWAP(bl->id); /* id */
949 M_64_SWAP(bl->size); /* size */
950 M_64_SWAP(bl->file_id); /* file id */
951 M_64_SWAP(bl->sdb_id); /* sdb id */
952 break;
953 case B_DUPLICATE:
954 case B_OVERFLOW:
955 bo = (BOVERFLOW *)hdr;
956 M_32_SWAP(bo->pgno);
957 M_32_SWAP(bo->tlen);
958 break;
959 default:
960 DB_ASSERT(NULL, bk->type != bk->type);
961 }
962 break;
963 case P_IBTREE:
964 bi = (BINTERNAL *)hdr;
965 M_16_SWAP(bi->len);
966 M_32_SWAP(bi->pgno);
967 M_32_SWAP(bi->nrecs);
968 if (B_TYPE(bi->type) == B_OVERFLOW) {
969 if (data == NULL) {
970 DB_ASSERT(NULL,
971 size == BINTERNAL_SIZE(BOVERFLOW_SIZE));
972 bo = (BOVERFLOW *)bi->data;
973 } else
974 bo = (BOVERFLOW *)data;
975 M_32_SWAP(bo->pgno);
976 M_32_SWAP(bo->tlen);
977 }
978 break;
979 case P_IRECNO:
980 ri = (RINTERNAL *)hdr;
981 M_32_SWAP(ri->pgno);
982 M_32_SWAP(ri->nrecs);
983 break;
984 case P_OVERFLOW:
985 break;
986 case P_HASH:
987 case P_HASH_UNSORTED:
988 switch (OP_MODE_GET(op)) {
989 /* KEYDATA and DUPLICATE records do not include the header. */
990 case H_KEYDATA:
991 break;
992 case H_DUPLICATE:
993 p = (u_int8_t *)hdr;
994 for (end = p + size; p < end;) {
995 if (pgin) {
996 P_16_SWAP(p);
997 memcpy(&tmp,
998 p, sizeof(db_indx_t));
999 p += sizeof(db_indx_t);
1000 } else {
1001 memcpy(&tmp,
1002 p, sizeof(db_indx_t));
1003 SWAP16(p);
1004 }
1005 p += tmp;
1006 SWAP16(p);
1007 }
1008 break;
1009 /* These three record types include the full header. */
1010 case H_OFFDUP:
1011 p = (u_int8_t *)hdr;
1012 p += SSZ(HOFFDUP, pgno);
1013 SWAP32(p); /* pgno */
1014 break;
1015 case H_OFFPAGE:
1016 p = (u_int8_t *)hdr;
1017 p += SSZ(HOFFPAGE, pgno);
1018 SWAP32(p); /* pgno */
1019 SWAP32(p); /* tlen */
1020 break;
1021 case H_BLOB:
1022 p = HBLOB_ID(hdr);
1023 SWAP64(p); /* id */
1024 SWAP64(p); /* size */
1025 p = HBLOB_FILE_ID(hdr);
1026 SWAP64(p); /* file id */
1027 SWAP64(p); /* sdb id */
1028 break;
1029 default:
1030 DB_ASSERT(NULL, op != op);
1031 }
1032 break;
1033 case P_HEAP:
1034 hh = (HEAPHDR *)hdr;
1035 M_16_SWAP(hh->size);
1036 if (F_ISSET(hh, HEAP_RECSPLIT)) {
1037 hsh = (HEAPSPLITHDR *)hdr;
1038 M_32_SWAP(hsh->tsize);
1039 M_32_SWAP(hsh->nextpg);
1040 M_16_SWAP(hsh->nextindx);
1041 }else if (F_ISSET(hh, HEAP_RECBLOB)) {
1042 /*
1043 * Heap blob records are broken into two parts when
1044 * logged, the shared header and the part that is
1045 * unique to blob records, which is stored in the
1046 * log data field.
1047 */
1048 if (data != NULL) {
1049 dbt = NULL;
1050 if (pgin) {
1051 dbt = data;
1052 memcpy(buf + sizeof(HEAPHDR),
1053 dbt->data, HEAPBLOBREC_DSIZE);
1054 } else {
1055 memcpy(buf + sizeof(HEAPHDR),
1056 data, HEAPBLOBREC_DSIZE);
1057 }
1058 memcpy(&bhdr, buf, HEAPBLOBREC_SIZE);
1059 M_64_SWAP(bhdr.id); /* id */
1060 M_64_SWAP(bhdr.size); /* size */
1061 M_64_SWAP(bhdr.file_id); /* file id */
1062 memcpy(buf, &bhdr, HEAPBLOBREC_SIZE);
1063 if (pgin) {
1064 memcpy(dbt->data,
1065 HEAPBLOBREC_DATA(buf),
1066 HEAPBLOBREC_DSIZE);
1067 } else {
1068 memcpy(data,
1069 HEAPBLOBREC_DATA(buf),
1070 HEAPBLOBREC_DSIZE);
1071 }
1072 }
1073 break;
1074 }
1075 break;
1076 default:
1077 DB_ASSERT(NULL, op != op);
1078 }
1079 }
1080
1081 /*
1082 * __db_swap --
1083 * Swap the byte order for a page. Used by __db_page_pass.
1084 */
1085 static int
__db_swap(dbp,real_name,flags,fhp,h,dirtyp)1086 __db_swap(dbp, real_name, flags, fhp, h, dirtyp)
1087 DB *dbp;
1088 char *real_name;
1089 u_int32_t flags;
1090 DB_FH *fhp;
1091 PAGE *h;
1092 int *dirtyp;
1093 {
1094 COMPQUIET(real_name, NULL);
1095 COMPQUIET(flags, 0);
1096 COMPQUIET(fhp, NULL);
1097 *dirtyp = 1;
1098 return __db_pageswap(dbp->env, dbp,
1099 h, dbp->pgsize, NULL, !F_ISSET(dbp, DB_AM_SWAP));
1100 }
1101
1102 static int (* const func_swap[P_PAGETYPE_MAX])
1103 __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
1104 NULL, /* P_INVALID */
1105 __db_swap, /* __P_DUPLICATE */
1106 __db_swap, /* P_HASH_UNSORTED */
1107 __db_swap, /* P_IBTREE */
1108 __db_swap, /* P_IRECNO */
1109 __db_swap, /* P_LBTREE */
1110 __db_swap, /* P_LRECNO */
1111 __db_swap, /* P_OVERFLOW */
1112 __db_swap, /* P_HASHMETA */
1113 __db_swap, /* P_BTREEMETA */
1114 __db_swap, /* P_QAMMETA */
1115 __db_swap, /* P_QAMDATA */
1116 __db_swap, /* P_LDUP */
1117 __db_swap, /* P_HASH */
1118 __db_swap, /* P_HEAPMETA */
1119 __db_swap, /* P_HEAP */
1120 __db_swap, /* P_IHEAP */
1121 };
1122
1123 /*
1124 * __db_convert_pp --
1125 * DB->convert pre/post processing.
1126 *
1127 * PUBLIC: int __db_convert_pp __P((DB *, const char *, u_int32_t));
1128 */
1129 int
__db_convert_pp(dbp,fname,lorder)1130 __db_convert_pp(dbp, fname, lorder)
1131 DB *dbp;
1132 const char *fname;
1133 u_int32_t lorder;
1134 {
1135 DB_THREAD_INFO *ip;
1136 ENV *env;
1137 int ret;
1138
1139 env = dbp->env;
1140
1141 ENV_ENTER(env, ip);
1142 ret = __db_convert(dbp, fname, lorder);
1143
1144 #ifdef HAVE_SLICES
1145 if (ret == 0)
1146 ret = __db_slice_process(dbp, fname, lorder,
1147 __db_convert_pp, "db_convert");
1148 #endif
1149
1150 ENV_LEAVE(env, ip);
1151 return (ret);
1152 }
1153
1154 /*
1155 * __db_convert_extent --
1156 * Convert the byte order of each database extent (a queue or partition
1157 * extent).
1158 */
1159 static int
__db_convert_extent(env,fname,pagesize,flags)1160 __db_convert_extent(env, fname, pagesize, flags)
1161 ENV *env;
1162 const char *fname;
1163 u_int32_t pagesize;
1164 u_int32_t flags;
1165 {
1166 DB *dbp;
1167 DB_FH *fhp;
1168 char *real_name;
1169 int ret, t_ret;
1170
1171 dbp = NULL;
1172 fhp = NULL;
1173 ret = t_ret = 0;
1174
1175 /* Get the real backing file name. */
1176 if ((ret = __db_appname(env,
1177 DB_APP_DATA, fname, NULL, &real_name)) != 0)
1178 return (ret);
1179
1180 /* Open the file. */
1181 if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) {
1182 __db_err(env, ret, "%s", real_name);
1183 goto err;
1184 }
1185
1186 if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
1187 goto err;
1188
1189 dbp->pgsize = pagesize;
1190 dbp->flags = flags;
1191
1192 if ((ret = __db_page_pass(dbp,
1193 real_name, 0, func_swap, fhp, DB_CONVERT)) != 0)
1194 goto err;
1195 ret = __os_fsync(env, fhp);
1196
1197 err:
1198 if (fhp != NULL &&
1199 (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
1200 ret = t_ret;
1201 if (dbp != NULL && (t_ret = __db_close(dbp, NULL, 0) != 0) && ret == 0)
1202 ret = t_ret;
1203 __os_free(env, real_name);
1204
1205 return (ret);
1206 }
1207
1208 static int
__db_convert_extent_names(dbp,mbuf,fname,namelistp)1209 __db_convert_extent_names(dbp, mbuf, fname, namelistp)
1210 DB *dbp;
1211 DBMETA *mbuf;
1212 char *fname;
1213 char ***namelistp;
1214 {
1215 ENV *env;
1216
1217 env = dbp->env;
1218 *namelistp = NULL;
1219
1220 switch (mbuf->magic) {
1221 case DB_BTREEMAGIC:
1222 case DB_HASHMAGIC:
1223 #ifdef HAVE_PARTITION
1224 if (dbp->p_internal != NULL) {
1225 return __partition_extent_names(dbp, fname, namelistp);
1226 }
1227 #endif
1228 break;
1229 case DB_QAMMAGIC:
1230 if (F_ISSET(dbp, DB_AM_CHKSUM) &&
1231 ((QMETA*)mbuf)->page_ext != 0) {
1232 return __qam_extent_names(env, fname, namelistp);
1233 }
1234 break;
1235 case DB_HEAPMAGIC:
1236 default:
1237 break;
1238 }
1239
1240 return (0);
1241 }
1242
1243 /*
1244 * __db_convert --
1245 * Convert the byte order of a database.
1246 *
1247 * PUBLIC: int __db_convert __P((DB *, const char *, u_int32_t));
1248 */
1249 int
__db_convert(dbp,fname,lorder)1250 __db_convert(dbp, fname, lorder)
1251 DB *dbp;
1252 const char *fname;
1253 u_int32_t lorder;
1254 {
1255 ENV *env;
1256 DB_FH *fhp;
1257 u_int8_t mbuf[DBMETASIZE];
1258 char *real_name, **extent_names, **ename;
1259 size_t len;
1260 u_int32_t db_order;
1261 int t_ret, ret;
1262
1263 env = dbp->env;
1264 fhp = NULL;
1265 extent_names = NULL;
1266 real_name = NULL;
1267 len = 0;
1268 ret = t_ret = 0;
1269
1270 /* Get the real backing file name. */
1271 if ((ret = __db_appname(env,
1272 DB_APP_DATA, fname, NULL, &real_name)) != 0)
1273 return (ret);
1274
1275 /* Open the file. */
1276 if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) {
1277 __db_err(env, ret, "%s", real_name);
1278 goto err;
1279 }
1280
1281 /* Read the metadata page. */
1282 if ((ret = __fop_read_meta(env, real_name, mbuf, sizeof(mbuf),
1283 fhp, 0, &len)) != 0)
1284 goto err;
1285
1286 switch (__db_needswap(((DBMETA *)mbuf)->magic)) {
1287 case 0:
1288 db_order = __db_isbigendian() ? 4321 : 1234;
1289 F_SET(dbp, DB_AM_SWAP);
1290 break;
1291 case DB_SWAPBYTES:
1292 db_order = __db_isbigendian() ? 1234 : 4321;
1293 M_32_SWAP(((DBMETA *)mbuf)->magic);
1294 M_32_SWAP(((DBMETA *)mbuf)->pagesize);
1295 F_CLR(dbp, DB_AM_SWAP);
1296 break;
1297 default:
1298 ret = USR_ERR(env, EINVAL);
1299 goto err;
1300 }
1301
1302 if (db_order != lorder) {
1303 dbp->pgsize = ((DBMETA*)mbuf)->pagesize;
1304 if (FLD_ISSET(((DBMETA *)mbuf)->metaflags, DBMETA_CHKSUM))
1305 F_SET(dbp, DB_AM_CHKSUM);
1306 if (((DBMETA*)mbuf)->encrypt_alg != 0) {
1307 if (!CRYPTO_ON(dbp->env)) {
1308 ret = USR_ERR(env, EINVAL);
1309 __db_errx(env, DB_STR("0667",
1310 "Attempt to convert an encrypted database without providing a password."));
1311 goto err;
1312 }
1313 F_SET(dbp, DB_AM_ENCRYPT);
1314 }
1315 if ((ret = __db_page_pass(dbp,
1316 real_name, 0, func_swap, fhp, DB_CONVERT)) != 0)
1317 goto err;
1318 ret = __os_fsync(env, fhp);
1319
1320 if ((ret = __db_convert_extent_names(dbp,
1321 (DBMETA*)mbuf, (char*)fname, &extent_names)) != 0)
1322 goto err;
1323 if (extent_names != NULL) {
1324 for (ename = extent_names; *ename != NULL; ename++) {
1325 if ((t_ret = __db_convert_extent(env, *ename,
1326 dbp->pgsize, dbp->flags)) != 0 && ret == 0)
1327 ret = t_ret;
1328 }
1329 }
1330 }
1331
1332 err: if (fhp != NULL &&
1333 (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
1334 ret = t_ret;
1335 if (real_name != NULL)
1336 __os_free(env, real_name);
1337 if (extent_names != NULL)
1338 __os_free(env, extent_names);
1339
1340 return (ret);
1341 }
1342