1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2006 Peter Wemm 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_pmap.h" 33 #include "opt_watchdog.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/cons.h> 39 #include <sys/kernel.h> 40 #include <sys/kerneldump.h> 41 #include <sys/msgbuf.h> 42 #include <sys/sysctl.h> 43 #include <sys/watchdog.h> 44 #include <sys/vmmeter.h> 45 #include <vm/vm.h> 46 #include <vm/vm_param.h> 47 #include <vm/vm_page.h> 48 #include <vm/vm_phys.h> 49 #include <vm/vm_dumpset.h> 50 #include <vm/pmap.h> 51 #include <machine/atomic.h> 52 #include <machine/elf.h> 53 #include <machine/md_var.h> 54 #include <machine/minidump.h> 55 56 CTASSERT(sizeof(struct kerneldumpheader) == 512); 57 58 static struct kerneldumpheader kdh; 59 60 /* Handle chunked writes. */ 61 static size_t fragsz; 62 static void *dump_va; 63 static size_t counter, progress, dumpsize, wdog_next; 64 65 static int dump_retry_count = 5; 66 SYSCTL_INT(_machdep, OID_AUTO, dump_retry_count, CTLFLAG_RWTUN, 67 &dump_retry_count, 0, "Number of times dump has to retry before bailing out"); 68 69 static int 70 is_dumpable(vm_paddr_t pa) 71 { 72 vm_page_t m; 73 int i; 74 75 if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) 76 return ((m->flags & PG_NODUMP) == 0); 77 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 78 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 79 return (1); 80 } 81 return (0); 82 } 83 84 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 85 86 static int 87 blk_flush(struct dumperinfo *di) 88 { 89 int error; 90 91 if (fragsz == 0) 92 return (0); 93 94 error = dump_append(di, dump_va, 0, fragsz); 95 fragsz = 0; 96 return (error); 97 } 98 99 static struct { 100 int min_per; 101 int max_per; 102 int visited; 103 } progress_track[10] = { 104 { 0, 10, 0}, 105 { 10, 20, 0}, 106 { 20, 30, 0}, 107 { 30, 40, 0}, 108 { 40, 50, 0}, 109 { 50, 60, 0}, 110 { 60, 70, 0}, 111 { 70, 80, 0}, 112 { 80, 90, 0}, 113 { 90, 100, 0} 114 }; 115 116 static void 117 report_progress(size_t progress, size_t dumpsize) 118 { 119 int sofar, i; 120 121 sofar = 100 - ((progress * 100) / dumpsize); 122 for (i = 0; i < nitems(progress_track); i++) { 123 if (sofar < progress_track[i].min_per || 124 sofar > progress_track[i].max_per) 125 continue; 126 if (progress_track[i].visited) 127 return; 128 progress_track[i].visited = 1; 129 printf("..%d%%", sofar); 130 return; 131 } 132 } 133 134 /* Pat the watchdog approximately every 128MB of the dump. */ 135 #define WDOG_DUMP_INTERVAL (128 * 1024 * 1024) 136 137 static int 138 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 139 { 140 size_t len; 141 int error, i, c; 142 u_int maxdumpsz; 143 144 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 145 if (maxdumpsz == 0) /* seatbelt */ 146 maxdumpsz = PAGE_SIZE; 147 error = 0; 148 if ((sz % PAGE_SIZE) != 0) { 149 printf("size not page aligned\n"); 150 return (EINVAL); 151 } 152 if (ptr != NULL && pa != 0) { 153 printf("cant have both va and pa!\n"); 154 return (EINVAL); 155 } 156 if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { 157 printf("address not page aligned %p\n", ptr); 158 return (EINVAL); 159 } 160 if (ptr != NULL) { 161 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 162 error = blk_flush(di); 163 if (error) 164 return (error); 165 } 166 while (sz) { 167 len = maxdumpsz - fragsz; 168 if (len > sz) 169 len = sz; 170 counter += len; 171 progress -= len; 172 if (counter >> 24) { 173 report_progress(progress, dumpsize); 174 counter &= (1<<24) - 1; 175 } 176 if (progress <= wdog_next) { 177 wdog_kern_pat(WD_LASTVAL); 178 if (wdog_next > WDOG_DUMP_INTERVAL) 179 wdog_next -= WDOG_DUMP_INTERVAL; 180 else 181 wdog_next = 0; 182 } 183 184 if (ptr) { 185 error = dump_append(di, ptr, 0, len); 186 if (error) 187 return (error); 188 ptr += len; 189 sz -= len; 190 } else { 191 for (i = 0; i < len; i += PAGE_SIZE) 192 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 193 fragsz += len; 194 pa += len; 195 sz -= len; 196 if (fragsz == maxdumpsz) { 197 error = blk_flush(di); 198 if (error) 199 return (error); 200 } 201 } 202 203 /* Check for user abort. */ 204 c = cncheckc(); 205 if (c == 0x03) 206 return (ECANCELED); 207 if (c != -1) 208 printf(" (CTRL-C to abort) "); 209 } 210 211 return (0); 212 } 213 214 /* A fake page table page, to avoid having to handle both 4K and 2M pages */ 215 static pd_entry_t fakepd[NPDEPG]; 216 217 int 218 minidumpsys(struct dumperinfo *di) 219 { 220 uint32_t pmapsize; 221 vm_offset_t va; 222 int error; 223 uint64_t *pml4, *pdp, *pd, *pt, pa; 224 int i, ii, j, k, n; 225 int retry_count; 226 struct minidumphdr mdhdr; 227 228 retry_count = 0; 229 retry: 230 retry_count++; 231 counter = 0; 232 for (i = 0; i < nitems(progress_track); i++) 233 progress_track[i].visited = 0; 234 /* Walk page table pages, set bits in vm_page_dump */ 235 pmapsize = 0; 236 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 237 kernel_vm_end); ) { 238 /* 239 * We always write a page, even if it is zero. Each 240 * page written corresponds to 1GB of space 241 */ 242 pmapsize += PAGE_SIZE; 243 ii = pmap_pml4e_index(va); 244 pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 245 pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 246 i = pmap_pdpe_index(va); 247 if ((pdp[i] & PG_V) == 0) { 248 va += NBPDP; 249 continue; 250 } 251 252 /* 253 * 1GB page is represented as 512 2MB pages in a dump. 254 */ 255 if ((pdp[i] & PG_PS) != 0) { 256 va += NBPDP; 257 pa = pdp[i] & PG_PS_FRAME; 258 for (n = 0; n < NPDEPG * NPTEPG; n++) { 259 if (is_dumpable(pa)) 260 dump_add_page(pa); 261 pa += PAGE_SIZE; 262 } 263 continue; 264 } 265 266 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 267 for (n = 0; n < NPDEPG; n++, va += NBPDR) { 268 j = pmap_pde_index(va); 269 270 if ((pd[j] & PG_V) == 0) 271 continue; 272 273 if ((pd[j] & PG_PS) != 0) { 274 /* This is an entire 2M page. */ 275 pa = pd[j] & PG_PS_FRAME; 276 for (k = 0; k < NPTEPG; k++) { 277 if (is_dumpable(pa)) 278 dump_add_page(pa); 279 pa += PAGE_SIZE; 280 } 281 continue; 282 } 283 284 pa = pd[j] & PG_FRAME; 285 /* set bit for this PTE page */ 286 if (is_dumpable(pa)) 287 dump_add_page(pa); 288 /* and for each valid page in this 2MB block */ 289 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); 290 for (k = 0; k < NPTEPG; k++) { 291 if ((pt[k] & PG_V) == 0) 292 continue; 293 pa = pt[k] & PG_FRAME; 294 if (is_dumpable(pa)) 295 dump_add_page(pa); 296 } 297 } 298 } 299 300 /* Calculate dump size. */ 301 dumpsize = pmapsize; 302 dumpsize += round_page(msgbufp->msg_size); 303 dumpsize += round_page(sizeof(dump_avail)); 304 dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); 305 VM_PAGE_DUMP_FOREACH(pa) { 306 /* Clear out undumpable pages now if needed */ 307 if (is_dumpable(pa)) { 308 dumpsize += PAGE_SIZE; 309 } else { 310 dump_drop_page(pa); 311 } 312 } 313 dumpsize += PAGE_SIZE; 314 315 wdog_next = progress = dumpsize; 316 317 /* Initialize mdhdr */ 318 bzero(&mdhdr, sizeof(mdhdr)); 319 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 320 mdhdr.version = MINIDUMP_VERSION; 321 mdhdr.msgbufsize = msgbufp->msg_size; 322 mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); 323 mdhdr.pmapsize = pmapsize; 324 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 325 mdhdr.dmapbase = DMAP_MIN_ADDRESS; 326 mdhdr.dmapend = DMAP_MAX_ADDRESS; 327 mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); 328 329 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, 330 dumpsize); 331 332 error = dump_start(di, &kdh); 333 if (error != 0) 334 goto fail; 335 336 printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 337 ptoa((uintmax_t)physmem) / 1048576); 338 339 /* Dump my header */ 340 bzero(&fakepd, sizeof(fakepd)); 341 bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 342 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 343 if (error) 344 goto fail; 345 346 /* Dump msgbuf up front */ 347 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 348 if (error) 349 goto fail; 350 351 /* Dump dump_avail */ 352 _Static_assert(sizeof(dump_avail) <= sizeof(fakepd), 353 "Large dump_avail not handled"); 354 bzero(&fakepd, sizeof(fakepd)); 355 memcpy(fakepd, dump_avail, sizeof(dump_avail)); 356 error = blk_write(di, (char *)fakepd, 0, PAGE_SIZE); 357 if (error) 358 goto fail; 359 360 /* Dump bitmap */ 361 error = blk_write(di, (char *)vm_page_dump, 0, 362 round_page(BITSET_SIZE(vm_page_dump_pages))); 363 if (error) 364 goto fail; 365 366 /* Dump kernel page directory pages */ 367 bzero(fakepd, sizeof(fakepd)); 368 for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, 369 kernel_vm_end); va += NBPDP) { 370 ii = pmap_pml4e_index(va); 371 pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 372 pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 373 i = pmap_pdpe_index(va); 374 375 /* We always write a page, even if it is zero */ 376 if ((pdp[i] & PG_V) == 0) { 377 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 378 if (error) 379 goto fail; 380 /* flush, in case we reuse fakepd in the same block */ 381 error = blk_flush(di); 382 if (error) 383 goto fail; 384 continue; 385 } 386 387 /* 1GB page is represented as 512 2MB pages in a dump */ 388 if ((pdp[i] & PG_PS) != 0) { 389 /* PDPE and PDP have identical layout in this case */ 390 fakepd[0] = pdp[i]; 391 for (j = 1; j < NPDEPG; j++) 392 fakepd[j] = fakepd[j - 1] + NBPDR; 393 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 394 if (error) 395 goto fail; 396 /* flush, in case we reuse fakepd in the same block */ 397 error = blk_flush(di); 398 if (error) 399 goto fail; 400 bzero(fakepd, sizeof(fakepd)); 401 continue; 402 } 403 404 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); 405 error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 406 if (error) 407 goto fail; 408 error = blk_flush(di); 409 if (error) 410 goto fail; 411 } 412 413 /* Dump memory chunks */ 414 VM_PAGE_DUMP_FOREACH(pa) { 415 error = blk_write(di, 0, pa, PAGE_SIZE); 416 if (error) 417 goto fail; 418 } 419 420 error = blk_flush(di); 421 if (error) 422 goto fail; 423 424 error = dump_finish(di, &kdh); 425 if (error != 0) 426 goto fail; 427 428 printf("\nDump complete\n"); 429 return (0); 430 431 fail: 432 if (error < 0) 433 error = -error; 434 435 printf("\n"); 436 if (error == ENOSPC) { 437 printf("Dump map grown while dumping. "); 438 if (retry_count < dump_retry_count) { 439 printf("Retrying...\n"); 440 goto retry; 441 } 442 printf("Dump failed.\n"); 443 } 444 else if (error == ECANCELED) 445 printf("Dump aborted\n"); 446 else if (error == E2BIG) { 447 printf("Dump failed. Partition too small (about %lluMB were " 448 "needed this time).\n", (long long)dumpsize >> 20); 449 } else 450 printf("** DUMP FAILED (ERROR %d) **\n", error); 451 return (error); 452 } 453