1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006 Peter Wemm 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_watchdog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/conf.h> 35 #include <sys/cons.h> 36 #include <sys/kernel.h> 37 #include <sys/kerneldump.h> 38 #include <sys/msgbuf.h> 39 #include <sys/watchdog.h> 40 #include <vm/vm.h> 41 #include <vm/vm_param.h> 42 #include <vm/vm_page.h> 43 #include <vm/vm_phys.h> 44 #include <vm/vm_dumpset.h> 45 #include <vm/pmap.h> 46 #include <machine/atomic.h> 47 #include <machine/elf.h> 48 #include <machine/md_var.h> 49 #include <machine/minidump.h> 50 51 CTASSERT(sizeof(struct kerneldumpheader) == 512); 52 53 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 54 #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) 55 56 static struct kerneldumpheader kdh; 57 58 /* Handle chunked writes. */ 59 static size_t fragsz; 60 static void *dump_va; 61 62 static int 63 blk_flush(struct dumperinfo *di) 64 { 65 int error; 66 67 if (fragsz == 0) 68 return (0); 69 70 error = dump_append(di, dump_va, fragsz); 71 fragsz = 0; 72 return (error); 73 } 74 75 static int 76 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 77 { 78 size_t len; 79 int error, i, c; 80 u_int maxdumpsz; 81 82 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 83 if (maxdumpsz == 0) /* seatbelt */ 84 maxdumpsz = PAGE_SIZE; 85 error = 0; 86 if ((sz % PAGE_SIZE) != 0) { 87 printf("size not page aligned\n"); 88 return (EINVAL); 89 } 90 if (ptr != NULL && pa != 0) { 91 printf("cant have both va and pa!\n"); 92 return (EINVAL); 93 } 94 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 95 printf("address not page aligned\n"); 96 return (EINVAL); 97 } 98 if (ptr != NULL) { 99 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 100 error = blk_flush(di); 101 if (error) 102 return (error); 103 } 104 while (sz) { 105 len = maxdumpsz - fragsz; 106 if (len > sz) 107 len = sz; 108 109 dumpsys_pb_progress(len); 110 wdog_kern_pat(WD_LASTVAL); 111 112 if (ptr) { 113 error = dump_append(di, ptr, len); 114 if (error) 115 return (error); 116 ptr += len; 117 sz -= len; 118 } else { 119 for (i = 0; i < len; i += PAGE_SIZE) 120 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 121 fragsz += len; 122 pa += len; 123 sz -= len; 124 if (fragsz == maxdumpsz) { 125 error = blk_flush(di); 126 if (error) 127 return (error); 128 } 129 } 130 131 /* Check for user abort. */ 132 c = cncheckc(); 133 if (c == 0x03) 134 return (ECANCELED); 135 if (c != -1) 136 printf(" (CTRL-C to abort) "); 137 } 138 139 return (0); 140 } 141 142 /* A fake page table page, to avoid having to handle both 4K and 2M pages */ 143 static pt_entry_t fakept[NPTEPG]; 144 145 #ifdef PMAP_PAE_COMP 146 #define cpu_minidumpsys cpu_minidumpsys_pae 147 #define IdlePTD IdlePTD_pae 148 #else 149 #define cpu_minidumpsys cpu_minidumpsys_nopae 150 #define IdlePTD IdlePTD_nopae 151 #endif 152 153 int 154 cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) 155 { 156 uint64_t dumpsize; 157 uint32_t ptesize; 158 vm_offset_t va, kva_end; 159 int error; 160 uint64_t pa; 161 pd_entry_t *pd, pde; 162 pt_entry_t *pt, pte; 163 int k; 164 struct minidumphdr mdhdr; 165 struct msgbuf *mbp; 166 167 /* Snapshot the KVA upper bound in case it grows. */ 168 kva_end = kernel_vm_end; 169 170 /* 171 * Walk the kernel page table pages, setting the active entries in the 172 * dump bitmap. 173 * 174 * NB: for a live dump, we may be racing with updates to the page 175 * tables, so care must be taken to read each entry only once. 176 */ 177 ptesize = 0; 178 for (va = KERNBASE; va < kva_end; va += NBPDR) { 179 /* 180 * We always write a page, even if it is zero. Each 181 * page written corresponds to 2MB of space 182 */ 183 ptesize += PAGE_SIZE; 184 pd = IdlePTD; /* always mapped! */ 185 pde = pte_load(&pd[va >> PDRSHIFT]); 186 if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 187 /* This is an entire 2M page. */ 188 pa = pde & PG_PS_FRAME; 189 for (k = 0; k < NPTEPG; k++) { 190 if (vm_phys_is_dumpable(pa)) 191 vm_page_dump_add(state->dump_bitset, 192 pa); 193 pa += PAGE_SIZE; 194 } 195 continue; 196 } 197 if ((pde & PG_V) == PG_V) { 198 /* set bit for each valid page in this 2MB block */ 199 pt = pmap_kenter_temporary(pde & PG_FRAME, 0); 200 for (k = 0; k < NPTEPG; k++) { 201 pte = pte_load(&pt[k]); 202 if ((pte & PG_V) == PG_V) { 203 pa = pte & PG_FRAME; 204 if (vm_phys_is_dumpable(pa)) 205 vm_page_dump_add( 206 state->dump_bitset, pa); 207 } 208 } 209 } else { 210 /* nothing, we're going to dump a null page */ 211 } 212 } 213 214 /* Calculate dump size. */ 215 mbp = state->msgbufp; 216 dumpsize = ptesize; 217 dumpsize += round_page(mbp->msg_size); 218 dumpsize += round_page(sizeof(dump_avail)); 219 dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); 220 VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { 221 /* Clear out undumpable pages now if needed */ 222 if (vm_phys_is_dumpable(pa)) { 223 dumpsize += PAGE_SIZE; 224 } else { 225 vm_page_dump_drop(state->dump_bitset, pa); 226 } 227 } 228 dumpsize += PAGE_SIZE; 229 230 dumpsys_pb_init(dumpsize); 231 232 /* Initialize mdhdr */ 233 bzero(&mdhdr, sizeof(mdhdr)); 234 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 235 mdhdr.version = MINIDUMP_VERSION; 236 mdhdr.msgbufsize = mbp->msg_size; 237 mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); 238 mdhdr.ptesize = ptesize; 239 mdhdr.kernbase = KERNBASE; 240 mdhdr.paemode = pae_mode; 241 mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); 242 243 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, 244 dumpsize); 245 246 error = dump_start(di, &kdh); 247 if (error != 0) 248 goto fail; 249 250 printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); 251 printf("Dumping %llu MB:", (long long)dumpsize >> 20); 252 253 /* Dump my header */ 254 bzero(&fakept, sizeof(fakept)); 255 bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 256 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 257 if (error) 258 goto fail; 259 260 /* Dump msgbuf up front */ 261 error = blk_write(di, (char *)mbp->msg_ptr, 0, 262 round_page(mbp->msg_size)); 263 if (error) 264 goto fail; 265 266 /* Dump dump_avail */ 267 _Static_assert(sizeof(dump_avail) <= sizeof(fakept), 268 "Large dump_avail not handled"); 269 bzero(fakept, sizeof(fakept)); 270 memcpy(fakept, dump_avail, sizeof(dump_avail)); 271 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 272 if (error) 273 goto fail; 274 275 /* Dump bitmap */ 276 error = blk_write(di, (char *)vm_page_dump, 0, 277 round_page(BITSET_SIZE(vm_page_dump_pages))); 278 if (error) 279 goto fail; 280 281 /* Dump kernel page table pages */ 282 for (va = KERNBASE; va < kva_end; va += NBPDR) { 283 /* We always write a page, even if it is zero */ 284 pd = IdlePTD; /* always mapped! */ 285 pde = pte_load(&pd[va >> PDRSHIFT]); 286 if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 287 /* This is a single 2M block. Generate a fake PTP */ 288 pa = pde & PG_PS_FRAME; 289 for (k = 0; k < NPTEPG; k++) { 290 fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; 291 } 292 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 293 if (error) 294 goto fail; 295 /* flush, in case we reuse fakept in the same block */ 296 error = blk_flush(di); 297 if (error) 298 goto fail; 299 continue; 300 } 301 if ((pde & PG_V) == PG_V) { 302 pa = pde & PG_FRAME; 303 error = blk_write(di, 0, pa, PAGE_SIZE); 304 if (error) 305 goto fail; 306 } else { 307 bzero(fakept, sizeof(fakept)); 308 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 309 if (error) 310 goto fail; 311 /* flush, in case we reuse fakept in the same block */ 312 error = blk_flush(di); 313 if (error) 314 goto fail; 315 } 316 } 317 318 /* Dump memory chunks */ 319 VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { 320 error = blk_write(di, 0, pa, PAGE_SIZE); 321 if (error) 322 goto fail; 323 } 324 325 error = blk_flush(di); 326 if (error) 327 goto fail; 328 329 error = dump_finish(di, &kdh); 330 if (error != 0) 331 goto fail; 332 333 printf("\nDump complete\n"); 334 return (0); 335 336 fail: 337 if (error < 0) 338 error = -error; 339 340 if (error == ECANCELED) 341 printf("\nDump aborted\n"); 342 else if (error == E2BIG || error == ENOSPC) { 343 printf("\nDump failed. Partition too small (about %lluMB were " 344 "needed this time).\n", (long long)dumpsize >> 20); 345 } else 346 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 347 return (error); 348 } 349