1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2006 Peter Wemm 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_watchdog.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/conf.h> 37 #include <sys/cons.h> 38 #include <sys/kernel.h> 39 #include <sys/kerneldump.h> 40 #include <sys/msgbuf.h> 41 #include <sys/watchdog.h> 42 #include <vm/vm.h> 43 #include <vm/vm_param.h> 44 #include <vm/vm_page.h> 45 #include <vm/vm_phys.h> 46 #include <vm/vm_dumpset.h> 47 #include <vm/pmap.h> 48 #include <machine/atomic.h> 49 #include <machine/elf.h> 50 #include <machine/md_var.h> 51 #include <machine/minidump.h> 52 53 CTASSERT(sizeof(struct kerneldumpheader) == 512); 54 55 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 56 #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) 57 58 static struct kerneldumpheader kdh; 59 60 /* Handle chunked writes. */ 61 static size_t fragsz; 62 static void *dump_va; 63 64 static int 65 blk_flush(struct dumperinfo *di) 66 { 67 int error; 68 69 if (fragsz == 0) 70 return (0); 71 72 error = dump_append(di, dump_va, fragsz); 73 fragsz = 0; 74 return (error); 75 } 76 77 static int 78 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 79 { 80 size_t len; 81 int error, i, c; 82 u_int maxdumpsz; 83 84 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 85 if (maxdumpsz == 0) /* seatbelt */ 86 maxdumpsz = PAGE_SIZE; 87 error = 0; 88 if ((sz % PAGE_SIZE) != 0) { 89 printf("size not page aligned\n"); 90 return (EINVAL); 91 } 92 if (ptr != NULL && pa != 0) { 93 printf("cant have both va and pa!\n"); 94 return (EINVAL); 95 } 96 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 97 printf("address not page aligned\n"); 98 return (EINVAL); 99 } 100 if (ptr != NULL) { 101 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 102 error = blk_flush(di); 103 if (error) 104 return (error); 105 } 106 while (sz) { 107 len = maxdumpsz - fragsz; 108 if (len > sz) 109 len = sz; 110 111 dumpsys_pb_progress(len); 112 wdog_kern_pat(WD_LASTVAL); 113 114 if (ptr) { 115 error = dump_append(di, ptr, len); 116 if (error) 117 return (error); 118 ptr += len; 119 sz -= len; 120 } else { 121 for (i = 0; i < len; i += PAGE_SIZE) 122 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 123 fragsz += len; 124 pa += len; 125 sz -= len; 126 if (fragsz == maxdumpsz) { 127 error = blk_flush(di); 128 if (error) 129 return (error); 130 } 131 } 132 133 /* Check for user abort. */ 134 c = cncheckc(); 135 if (c == 0x03) 136 return (ECANCELED); 137 if (c != -1) 138 printf(" (CTRL-C to abort) "); 139 } 140 141 return (0); 142 } 143 144 /* A fake page table page, to avoid having to handle both 4K and 2M pages */ 145 static pt_entry_t fakept[NPTEPG]; 146 147 #ifdef PMAP_PAE_COMP 148 #define cpu_minidumpsys cpu_minidumpsys_pae 149 #define IdlePTD IdlePTD_pae 150 #else 151 #define cpu_minidumpsys cpu_minidumpsys_nopae 152 #define IdlePTD IdlePTD_nopae 153 #endif 154 155 int 156 cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) 157 { 158 uint64_t dumpsize; 159 uint32_t ptesize; 160 vm_offset_t va, kva_end; 161 int error; 162 uint64_t pa; 163 pd_entry_t *pd, pde; 164 pt_entry_t *pt, pte; 165 int k; 166 struct minidumphdr mdhdr; 167 struct msgbuf *mbp; 168 169 /* Snapshot the KVA upper bound in case it grows. */ 170 kva_end = kernel_vm_end; 171 172 /* 173 * Walk the kernel page table pages, setting the active entries in the 174 * dump bitmap. 175 * 176 * NB: for a live dump, we may be racing with updates to the page 177 * tables, so care must be taken to read each entry only once. 178 */ 179 ptesize = 0; 180 for (va = KERNBASE; va < kva_end; va += NBPDR) { 181 /* 182 * We always write a page, even if it is zero. Each 183 * page written corresponds to 2MB of space 184 */ 185 ptesize += PAGE_SIZE; 186 pd = IdlePTD; /* always mapped! */ 187 pde = pte_load(&pd[va >> PDRSHIFT]); 188 if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 189 /* This is an entire 2M page. */ 190 pa = pde & PG_PS_FRAME; 191 for (k = 0; k < NPTEPG; k++) { 192 if (vm_phys_is_dumpable(pa)) 193 vm_page_dump_add(state->dump_bitset, 194 pa); 195 pa += PAGE_SIZE; 196 } 197 continue; 198 } 199 if ((pde & PG_V) == PG_V) { 200 /* set bit for each valid page in this 2MB block */ 201 pt = pmap_kenter_temporary(pde & PG_FRAME, 0); 202 for (k = 0; k < NPTEPG; k++) { 203 pte = pte_load(&pt[k]); 204 if ((pte & PG_V) == PG_V) { 205 pa = pte & PG_FRAME; 206 if (vm_phys_is_dumpable(pa)) 207 vm_page_dump_add( 208 state->dump_bitset, pa); 209 } 210 } 211 } else { 212 /* nothing, we're going to dump a null page */ 213 } 214 } 215 216 /* Calculate dump size. */ 217 mbp = state->msgbufp; 218 dumpsize = ptesize; 219 dumpsize += round_page(mbp->msg_size); 220 dumpsize += round_page(sizeof(dump_avail)); 221 dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); 222 VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { 223 /* Clear out undumpable pages now if needed */ 224 if (vm_phys_is_dumpable(pa)) { 225 dumpsize += PAGE_SIZE; 226 } else { 227 vm_page_dump_drop(state->dump_bitset, pa); 228 } 229 } 230 dumpsize += PAGE_SIZE; 231 232 dumpsys_pb_init(dumpsize); 233 234 /* Initialize mdhdr */ 235 bzero(&mdhdr, sizeof(mdhdr)); 236 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 237 mdhdr.version = MINIDUMP_VERSION; 238 mdhdr.msgbufsize = mbp->msg_size; 239 mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); 240 mdhdr.ptesize = ptesize; 241 mdhdr.kernbase = KERNBASE; 242 mdhdr.paemode = pae_mode; 243 mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); 244 245 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, 246 dumpsize); 247 248 error = dump_start(di, &kdh); 249 if (error != 0) 250 goto fail; 251 252 printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); 253 printf("Dumping %llu MB:", (long long)dumpsize >> 20); 254 255 /* Dump my header */ 256 bzero(&fakept, sizeof(fakept)); 257 bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 258 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 259 if (error) 260 goto fail; 261 262 /* Dump msgbuf up front */ 263 error = blk_write(di, (char *)mbp->msg_ptr, 0, 264 round_page(mbp->msg_size)); 265 if (error) 266 goto fail; 267 268 /* Dump dump_avail */ 269 _Static_assert(sizeof(dump_avail) <= sizeof(fakept), 270 "Large dump_avail not handled"); 271 bzero(fakept, sizeof(fakept)); 272 memcpy(fakept, dump_avail, sizeof(dump_avail)); 273 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 274 if (error) 275 goto fail; 276 277 /* Dump bitmap */ 278 error = blk_write(di, (char *)vm_page_dump, 0, 279 round_page(BITSET_SIZE(vm_page_dump_pages))); 280 if (error) 281 goto fail; 282 283 /* Dump kernel page table pages */ 284 for (va = KERNBASE; va < kva_end; va += NBPDR) { 285 /* We always write a page, even if it is zero */ 286 pd = IdlePTD; /* always mapped! */ 287 pde = pte_load(&pd[va >> PDRSHIFT]); 288 if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 289 /* This is a single 2M block. Generate a fake PTP */ 290 pa = pde & PG_PS_FRAME; 291 for (k = 0; k < NPTEPG; k++) { 292 fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; 293 } 294 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 295 if (error) 296 goto fail; 297 /* flush, in case we reuse fakept in the same block */ 298 error = blk_flush(di); 299 if (error) 300 goto fail; 301 continue; 302 } 303 if ((pde & PG_V) == PG_V) { 304 pa = pde & PG_FRAME; 305 error = blk_write(di, 0, pa, PAGE_SIZE); 306 if (error) 307 goto fail; 308 } else { 309 bzero(fakept, sizeof(fakept)); 310 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 311 if (error) 312 goto fail; 313 /* flush, in case we reuse fakept in the same block */ 314 error = blk_flush(di); 315 if (error) 316 goto fail; 317 } 318 } 319 320 /* Dump memory chunks */ 321 VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { 322 error = blk_write(di, 0, pa, PAGE_SIZE); 323 if (error) 324 goto fail; 325 } 326 327 error = blk_flush(di); 328 if (error) 329 goto fail; 330 331 error = dump_finish(di, &kdh); 332 if (error != 0) 333 goto fail; 334 335 printf("\nDump complete\n"); 336 return (0); 337 338 fail: 339 if (error < 0) 340 error = -error; 341 342 if (error == ECANCELED) 343 printf("\nDump aborted\n"); 344 else if (error == E2BIG || error == ENOSPC) { 345 printf("\nDump failed. Partition too small (about %lluMB were " 346 "needed this time).\n", (long long)dumpsize >> 20); 347 } else 348 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 349 return (error); 350 } 351