19a527560SKonstantin Belousov /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
39a527560SKonstantin Belousov *
49a527560SKonstantin Belousov * Copyright (c) 2006 Peter Wemm
59a527560SKonstantin Belousov * All rights reserved.
69a527560SKonstantin Belousov *
79a527560SKonstantin Belousov * Redistribution and use in source and binary forms, with or without
89a527560SKonstantin Belousov * modification, are permitted provided that the following conditions
99a527560SKonstantin Belousov * are met:
109a527560SKonstantin Belousov *
119a527560SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright
129a527560SKonstantin Belousov * notice, this list of conditions and the following disclaimer.
139a527560SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright
149a527560SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the
159a527560SKonstantin Belousov * documentation and/or other materials provided with the distribution.
169a527560SKonstantin Belousov *
179a527560SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
189a527560SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
199a527560SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
209a527560SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
219a527560SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
229a527560SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
239a527560SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
249a527560SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
259a527560SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
269a527560SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
279a527560SKonstantin Belousov */
289a527560SKonstantin Belousov
299a527560SKonstantin Belousov #include <sys/cdefs.h>
309a527560SKonstantin Belousov #include "opt_watchdog.h"
319a527560SKonstantin Belousov
329a527560SKonstantin Belousov #include <sys/param.h>
339a527560SKonstantin Belousov #include <sys/systm.h>
349a527560SKonstantin Belousov #include <sys/conf.h>
359a527560SKonstantin Belousov #include <sys/cons.h>
369a527560SKonstantin Belousov #include <sys/kernel.h>
379a527560SKonstantin Belousov #include <sys/kerneldump.h>
389a527560SKonstantin Belousov #include <sys/msgbuf.h>
399a527560SKonstantin Belousov #include <sys/watchdog.h>
409a527560SKonstantin Belousov #include <vm/vm.h>
4121943937SJeff Roberson #include <vm/vm_param.h>
4221943937SJeff Roberson #include <vm/vm_page.h>
4321943937SJeff Roberson #include <vm/vm_phys.h>
446f3b523cSKonstantin Belousov #include <vm/vm_dumpset.h>
459a527560SKonstantin Belousov #include <vm/pmap.h>
469a527560SKonstantin Belousov #include <machine/atomic.h>
479a527560SKonstantin Belousov #include <machine/elf.h>
489a527560SKonstantin Belousov #include <machine/md_var.h>
499a527560SKonstantin Belousov #include <machine/minidump.h>
509a527560SKonstantin Belousov
519a527560SKonstantin Belousov CTASSERT(sizeof(struct kerneldumpheader) == 512);
529a527560SKonstantin Belousov
539a527560SKonstantin Belousov #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
549a527560SKonstantin Belousov #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE)
559a527560SKonstantin Belousov
569a527560SKonstantin Belousov static struct kerneldumpheader kdh;
579a527560SKonstantin Belousov
589a527560SKonstantin Belousov /* Handle chunked writes. */
599a527560SKonstantin Belousov static size_t fragsz;
609a527560SKonstantin Belousov static void *dump_va;
619a527560SKonstantin Belousov
629a527560SKonstantin Belousov static int
blk_flush(struct dumperinfo * di)639a527560SKonstantin Belousov blk_flush(struct dumperinfo *di)
649a527560SKonstantin Belousov {
659a527560SKonstantin Belousov int error;
669a527560SKonstantin Belousov
679a527560SKonstantin Belousov if (fragsz == 0)
689a527560SKonstantin Belousov return (0);
699a527560SKonstantin Belousov
70db71383bSMitchell Horne error = dump_append(di, dump_va, fragsz);
719a527560SKonstantin Belousov fragsz = 0;
729a527560SKonstantin Belousov return (error);
739a527560SKonstantin Belousov }
749a527560SKonstantin Belousov
759a527560SKonstantin Belousov static int
blk_write(struct dumperinfo * di,char * ptr,vm_paddr_t pa,size_t sz)769a527560SKonstantin Belousov blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
779a527560SKonstantin Belousov {
789a527560SKonstantin Belousov size_t len;
799a527560SKonstantin Belousov int error, i, c;
809a527560SKonstantin Belousov u_int maxdumpsz;
819a527560SKonstantin Belousov
829a527560SKonstantin Belousov maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
839a527560SKonstantin Belousov if (maxdumpsz == 0) /* seatbelt */
849a527560SKonstantin Belousov maxdumpsz = PAGE_SIZE;
859a527560SKonstantin Belousov error = 0;
869a527560SKonstantin Belousov if ((sz % PAGE_SIZE) != 0) {
879a527560SKonstantin Belousov printf("size not page aligned\n");
889a527560SKonstantin Belousov return (EINVAL);
899a527560SKonstantin Belousov }
909a527560SKonstantin Belousov if (ptr != NULL && pa != 0) {
919a527560SKonstantin Belousov printf("cant have both va and pa!\n");
929a527560SKonstantin Belousov return (EINVAL);
939a527560SKonstantin Belousov }
949a527560SKonstantin Belousov if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
959a527560SKonstantin Belousov printf("address not page aligned\n");
969a527560SKonstantin Belousov return (EINVAL);
979a527560SKonstantin Belousov }
989a527560SKonstantin Belousov if (ptr != NULL) {
999a527560SKonstantin Belousov /* If we're doing a virtual dump, flush any pre-existing pa pages */
1009a527560SKonstantin Belousov error = blk_flush(di);
1019a527560SKonstantin Belousov if (error)
1029a527560SKonstantin Belousov return (error);
1039a527560SKonstantin Belousov }
1049a527560SKonstantin Belousov while (sz) {
1059a527560SKonstantin Belousov len = maxdumpsz - fragsz;
1069a527560SKonstantin Belousov if (len > sz)
1079a527560SKonstantin Belousov len = sz;
1089a527560SKonstantin Belousov
109ab4ed843SMitchell Horne dumpsys_pb_progress(len);
1109a527560SKonstantin Belousov wdog_kern_pat(WD_LASTVAL);
1119a527560SKonstantin Belousov
1129a527560SKonstantin Belousov if (ptr) {
113db71383bSMitchell Horne error = dump_append(di, ptr, len);
1149a527560SKonstantin Belousov if (error)
1159a527560SKonstantin Belousov return (error);
1169a527560SKonstantin Belousov ptr += len;
1179a527560SKonstantin Belousov sz -= len;
1189a527560SKonstantin Belousov } else {
1199a527560SKonstantin Belousov for (i = 0; i < len; i += PAGE_SIZE)
1209a527560SKonstantin Belousov dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
1219a527560SKonstantin Belousov fragsz += len;
1229a527560SKonstantin Belousov pa += len;
1239a527560SKonstantin Belousov sz -= len;
1249a527560SKonstantin Belousov if (fragsz == maxdumpsz) {
1259a527560SKonstantin Belousov error = blk_flush(di);
1269a527560SKonstantin Belousov if (error)
1279a527560SKonstantin Belousov return (error);
1289a527560SKonstantin Belousov }
1299a527560SKonstantin Belousov }
1309a527560SKonstantin Belousov
1319a527560SKonstantin Belousov /* Check for user abort. */
1329a527560SKonstantin Belousov c = cncheckc();
1339a527560SKonstantin Belousov if (c == 0x03)
1349a527560SKonstantin Belousov return (ECANCELED);
1359a527560SKonstantin Belousov if (c != -1)
1369a527560SKonstantin Belousov printf(" (CTRL-C to abort) ");
1379a527560SKonstantin Belousov }
1389a527560SKonstantin Belousov
1399a527560SKonstantin Belousov return (0);
1409a527560SKonstantin Belousov }
1419a527560SKonstantin Belousov
1429a527560SKonstantin Belousov /* A fake page table page, to avoid having to handle both 4K and 2M pages */
1439a527560SKonstantin Belousov static pt_entry_t fakept[NPTEPG];
1449a527560SKonstantin Belousov
1459a527560SKonstantin Belousov #ifdef PMAP_PAE_COMP
1461adebe3cSMitchell Horne #define cpu_minidumpsys cpu_minidumpsys_pae
1479a527560SKonstantin Belousov #define IdlePTD IdlePTD_pae
1489a527560SKonstantin Belousov #else
1491adebe3cSMitchell Horne #define cpu_minidumpsys cpu_minidumpsys_nopae
1509a527560SKonstantin Belousov #define IdlePTD IdlePTD_nopae
1519a527560SKonstantin Belousov #endif
1529a527560SKonstantin Belousov
1539a527560SKonstantin Belousov int
cpu_minidumpsys(struct dumperinfo * di,const struct minidumpstate * state)1541adebe3cSMitchell Horne cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
1559a527560SKonstantin Belousov {
1569a527560SKonstantin Belousov uint64_t dumpsize;
1579a527560SKonstantin Belousov uint32_t ptesize;
158681bd710SMitchell Horne vm_offset_t va, kva_end;
1599a527560SKonstantin Belousov int error;
1609a527560SKonstantin Belousov uint64_t pa;
161681bd710SMitchell Horne pd_entry_t *pd, pde;
162681bd710SMitchell Horne pt_entry_t *pt, pte;
1633c942808SKonstantin Belousov int k;
1649a527560SKonstantin Belousov struct minidumphdr mdhdr;
1651d2d1418SMitchell Horne struct msgbuf *mbp;
1669a527560SKonstantin Belousov
167681bd710SMitchell Horne /* Snapshot the KVA upper bound in case it grows. */
168681bd710SMitchell Horne kva_end = kernel_vm_end;
169681bd710SMitchell Horne
170681bd710SMitchell Horne /*
171681bd710SMitchell Horne * Walk the kernel page table pages, setting the active entries in the
172681bd710SMitchell Horne * dump bitmap.
173681bd710SMitchell Horne *
174681bd710SMitchell Horne * NB: for a live dump, we may be racing with updates to the page
175681bd710SMitchell Horne * tables, so care must be taken to read each entry only once.
176681bd710SMitchell Horne */
1779a527560SKonstantin Belousov ptesize = 0;
178681bd710SMitchell Horne for (va = KERNBASE; va < kva_end; va += NBPDR) {
1799a527560SKonstantin Belousov /*
1809a527560SKonstantin Belousov * We always write a page, even if it is zero. Each
1819a527560SKonstantin Belousov * page written corresponds to 2MB of space
1829a527560SKonstantin Belousov */
1839a527560SKonstantin Belousov ptesize += PAGE_SIZE;
1849a527560SKonstantin Belousov pd = IdlePTD; /* always mapped! */
185681bd710SMitchell Horne pde = pte_load(&pd[va >> PDRSHIFT]);
186681bd710SMitchell Horne if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
1879a527560SKonstantin Belousov /* This is an entire 2M page. */
188681bd710SMitchell Horne pa = pde & PG_PS_FRAME;
1899a527560SKonstantin Belousov for (k = 0; k < NPTEPG; k++) {
19031991a5aSMitchell Horne if (vm_phys_is_dumpable(pa))
19110fe6f80SMitchell Horne vm_page_dump_add(state->dump_bitset,
19210fe6f80SMitchell Horne pa);
1939a527560SKonstantin Belousov pa += PAGE_SIZE;
1949a527560SKonstantin Belousov }
1959a527560SKonstantin Belousov continue;
1969a527560SKonstantin Belousov }
197681bd710SMitchell Horne if ((pde & PG_V) == PG_V) {
1989a527560SKonstantin Belousov /* set bit for each valid page in this 2MB block */
199681bd710SMitchell Horne pt = pmap_kenter_temporary(pde & PG_FRAME, 0);
2009a527560SKonstantin Belousov for (k = 0; k < NPTEPG; k++) {
201681bd710SMitchell Horne pte = pte_load(&pt[k]);
202681bd710SMitchell Horne if ((pte & PG_V) == PG_V) {
203681bd710SMitchell Horne pa = pte & PG_FRAME;
20431991a5aSMitchell Horne if (vm_phys_is_dumpable(pa))
20510fe6f80SMitchell Horne vm_page_dump_add(
20610fe6f80SMitchell Horne state->dump_bitset, pa);
2079a527560SKonstantin Belousov }
2089a527560SKonstantin Belousov }
2099a527560SKonstantin Belousov } else {
2109a527560SKonstantin Belousov /* nothing, we're going to dump a null page */
2119a527560SKonstantin Belousov }
2129a527560SKonstantin Belousov }
2139a527560SKonstantin Belousov
2149a527560SKonstantin Belousov /* Calculate dump size. */
2151d2d1418SMitchell Horne mbp = state->msgbufp;
2169a527560SKonstantin Belousov dumpsize = ptesize;
2171d2d1418SMitchell Horne dumpsize += round_page(mbp->msg_size);
21800e66147SD Scott Phillips dumpsize += round_page(sizeof(dump_avail));
219ab041f71SD Scott Phillips dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages));
22010fe6f80SMitchell Horne VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
2219a527560SKonstantin Belousov /* Clear out undumpable pages now if needed */
22231991a5aSMitchell Horne if (vm_phys_is_dumpable(pa)) {
2239a527560SKonstantin Belousov dumpsize += PAGE_SIZE;
2249a527560SKonstantin Belousov } else {
22510fe6f80SMitchell Horne vm_page_dump_drop(state->dump_bitset, pa);
2269a527560SKonstantin Belousov }
2279a527560SKonstantin Belousov }
2289a527560SKonstantin Belousov dumpsize += PAGE_SIZE;
2299a527560SKonstantin Belousov
230ab4ed843SMitchell Horne dumpsys_pb_init(dumpsize);
2319a527560SKonstantin Belousov
2329a527560SKonstantin Belousov /* Initialize mdhdr */
2339a527560SKonstantin Belousov bzero(&mdhdr, sizeof(mdhdr));
2349a527560SKonstantin Belousov strcpy(mdhdr.magic, MINIDUMP_MAGIC);
2359a527560SKonstantin Belousov mdhdr.version = MINIDUMP_VERSION;
2361d2d1418SMitchell Horne mdhdr.msgbufsize = mbp->msg_size;
237ab041f71SD Scott Phillips mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages));
2389a527560SKonstantin Belousov mdhdr.ptesize = ptesize;
2399a527560SKonstantin Belousov mdhdr.kernbase = KERNBASE;
2409a527560SKonstantin Belousov mdhdr.paemode = pae_mode;
24100e66147SD Scott Phillips mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
2429a527560SKonstantin Belousov
2439a527560SKonstantin Belousov dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
2449a527560SKonstantin Belousov dumpsize);
2459a527560SKonstantin Belousov
2469a527560SKonstantin Belousov error = dump_start(di, &kdh);
2479a527560SKonstantin Belousov if (error != 0)
2489a527560SKonstantin Belousov goto fail;
2499a527560SKonstantin Belousov
2509a527560SKonstantin Belousov printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
2519a527560SKonstantin Belousov printf("Dumping %llu MB:", (long long)dumpsize >> 20);
2529a527560SKonstantin Belousov
2539a527560SKonstantin Belousov /* Dump my header */
2549a527560SKonstantin Belousov bzero(&fakept, sizeof(fakept));
2559a527560SKonstantin Belousov bcopy(&mdhdr, &fakept, sizeof(mdhdr));
2569a527560SKonstantin Belousov error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
2579a527560SKonstantin Belousov if (error)
2589a527560SKonstantin Belousov goto fail;
2599a527560SKonstantin Belousov
2609a527560SKonstantin Belousov /* Dump msgbuf up front */
2611d2d1418SMitchell Horne error = blk_write(di, (char *)mbp->msg_ptr, 0,
2621d2d1418SMitchell Horne round_page(mbp->msg_size));
2639a527560SKonstantin Belousov if (error)
2649a527560SKonstantin Belousov goto fail;
2659a527560SKonstantin Belousov
26600e66147SD Scott Phillips /* Dump dump_avail */
26700e66147SD Scott Phillips _Static_assert(sizeof(dump_avail) <= sizeof(fakept),
26800e66147SD Scott Phillips "Large dump_avail not handled");
26900e66147SD Scott Phillips bzero(fakept, sizeof(fakept));
27000e66147SD Scott Phillips memcpy(fakept, dump_avail, sizeof(dump_avail));
27100e66147SD Scott Phillips error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
27200e66147SD Scott Phillips if (error)
27300e66147SD Scott Phillips goto fail;
27400e66147SD Scott Phillips
2759a527560SKonstantin Belousov /* Dump bitmap */
276ab041f71SD Scott Phillips error = blk_write(di, (char *)vm_page_dump, 0,
277ab041f71SD Scott Phillips round_page(BITSET_SIZE(vm_page_dump_pages)));
2789a527560SKonstantin Belousov if (error)
2799a527560SKonstantin Belousov goto fail;
2809a527560SKonstantin Belousov
2819a527560SKonstantin Belousov /* Dump kernel page table pages */
282681bd710SMitchell Horne for (va = KERNBASE; va < kva_end; va += NBPDR) {
2839a527560SKonstantin Belousov /* We always write a page, even if it is zero */
2849a527560SKonstantin Belousov pd = IdlePTD; /* always mapped! */
285681bd710SMitchell Horne pde = pte_load(&pd[va >> PDRSHIFT]);
286681bd710SMitchell Horne if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
2879a527560SKonstantin Belousov /* This is a single 2M block. Generate a fake PTP */
288681bd710SMitchell Horne pa = pde & PG_PS_FRAME;
2899a527560SKonstantin Belousov for (k = 0; k < NPTEPG; k++) {
2909a527560SKonstantin Belousov fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
2919a527560SKonstantin Belousov }
2929a527560SKonstantin Belousov error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
2939a527560SKonstantin Belousov if (error)
2949a527560SKonstantin Belousov goto fail;
2959a527560SKonstantin Belousov /* flush, in case we reuse fakept in the same block */
2969a527560SKonstantin Belousov error = blk_flush(di);
2979a527560SKonstantin Belousov if (error)
2989a527560SKonstantin Belousov goto fail;
2999a527560SKonstantin Belousov continue;
3009a527560SKonstantin Belousov }
301681bd710SMitchell Horne if ((pde & PG_V) == PG_V) {
302681bd710SMitchell Horne pa = pde & PG_FRAME;
3039a527560SKonstantin Belousov error = blk_write(di, 0, pa, PAGE_SIZE);
3049a527560SKonstantin Belousov if (error)
3059a527560SKonstantin Belousov goto fail;
3069a527560SKonstantin Belousov } else {
3079a527560SKonstantin Belousov bzero(fakept, sizeof(fakept));
3089a527560SKonstantin Belousov error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
3099a527560SKonstantin Belousov if (error)
3109a527560SKonstantin Belousov goto fail;
3119a527560SKonstantin Belousov /* flush, in case we reuse fakept in the same block */
3129a527560SKonstantin Belousov error = blk_flush(di);
3139a527560SKonstantin Belousov if (error)
3149a527560SKonstantin Belousov goto fail;
3159a527560SKonstantin Belousov }
3169a527560SKonstantin Belousov }
3179a527560SKonstantin Belousov
3189a527560SKonstantin Belousov /* Dump memory chunks */
31910fe6f80SMitchell Horne VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
3209a527560SKonstantin Belousov error = blk_write(di, 0, pa, PAGE_SIZE);
3219a527560SKonstantin Belousov if (error)
3229a527560SKonstantin Belousov goto fail;
3239a527560SKonstantin Belousov }
3249a527560SKonstantin Belousov
3259a527560SKonstantin Belousov error = blk_flush(di);
3269a527560SKonstantin Belousov if (error)
3279a527560SKonstantin Belousov goto fail;
3289a527560SKonstantin Belousov
3299a527560SKonstantin Belousov error = dump_finish(di, &kdh);
3309a527560SKonstantin Belousov if (error != 0)
3319a527560SKonstantin Belousov goto fail;
3329a527560SKonstantin Belousov
3339a527560SKonstantin Belousov printf("\nDump complete\n");
3349a527560SKonstantin Belousov return (0);
3359a527560SKonstantin Belousov
3369a527560SKonstantin Belousov fail:
3379a527560SKonstantin Belousov if (error < 0)
3389a527560SKonstantin Belousov error = -error;
3399a527560SKonstantin Belousov
3409a527560SKonstantin Belousov if (error == ECANCELED)
3419a527560SKonstantin Belousov printf("\nDump aborted\n");
34257f317e6SNavdeep Parhar else if (error == E2BIG || error == ENOSPC) {
34357f317e6SNavdeep Parhar printf("\nDump failed. Partition too small (about %lluMB were "
34457f317e6SNavdeep Parhar "needed this time).\n", (long long)dumpsize >> 20);
34557f317e6SNavdeep Parhar } else
3469a527560SKonstantin Belousov printf("\n** DUMP FAILED (ERROR %d) **\n", error);
3479a527560SKonstantin Belousov return (error);
3489a527560SKonstantin Belousov }
349