1 /*- 2 * Copyright (c) 2006 Peter Wemm 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_watchdog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/conf.h> 35 #include <sys/cons.h> 36 #include <sys/kernel.h> 37 #include <sys/kerneldump.h> 38 #include <sys/msgbuf.h> 39 #include <sys/watchdog.h> 40 #include <vm/vm.h> 41 #include <vm/pmap.h> 42 #include <machine/atomic.h> 43 #include <machine/elf.h> 44 #include <machine/md_var.h> 45 #include <machine/vmparam.h> 46 #include <machine/minidump.h> 47 48 CTASSERT(sizeof(struct kerneldumpheader) == 512); 49 50 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) 51 #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) 52 53 uint32_t *vm_page_dump; 54 int vm_page_dump_size; 55 56 static struct kerneldumpheader kdh; 57 58 /* Handle chunked writes. */ 59 static size_t fragsz; 60 static void *dump_va; 61 static uint64_t counter, progress; 62 63 CTASSERT(sizeof(*vm_page_dump) == 4); 64 65 66 static int 67 is_dumpable(vm_paddr_t pa) 68 { 69 int i; 70 71 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 72 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 73 return (1); 74 } 75 return (0); 76 } 77 78 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 79 80 static int 81 blk_flush(struct dumperinfo *di) 82 { 83 int error; 84 85 if (fragsz == 0) 86 return (0); 87 88 error = dump_append(di, dump_va, 0, fragsz); 89 fragsz = 0; 90 return (error); 91 } 92 93 static int 94 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 95 { 96 size_t len; 97 int error, i, c; 98 u_int maxdumpsz; 99 100 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 101 if (maxdumpsz == 0) /* seatbelt */ 102 maxdumpsz = PAGE_SIZE; 103 error = 0; 104 if ((sz % PAGE_SIZE) != 0) { 105 printf("size not page aligned\n"); 106 return (EINVAL); 107 } 108 if (ptr != NULL && pa != 0) { 109 printf("cant have both va and pa!\n"); 110 return (EINVAL); 111 } 112 if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { 113 printf("address not page aligned\n"); 114 return (EINVAL); 115 } 116 if (ptr != NULL) { 117 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 118 error = blk_flush(di); 119 if (error) 120 return (error); 121 } 122 while (sz) { 123 len = maxdumpsz - fragsz; 124 if (len > sz) 125 len = sz; 126 counter += len; 127 progress -= len; 128 if (counter >> 24) { 129 printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); 130 counter &= (1<<24) - 1; 131 } 132 133 wdog_kern_pat(WD_LASTVAL); 134 135 if (ptr) { 136 error = dump_append(di, ptr, 0, len); 137 if (error) 138 return (error); 139 ptr += len; 140 sz -= len; 141 } else { 142 for (i = 0; i < len; i += PAGE_SIZE) 143 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 144 fragsz += len; 145 pa += len; 146 sz -= len; 147 if (fragsz == maxdumpsz) { 148 error = blk_flush(di); 149 if (error) 150 return (error); 151 } 152 } 153 154 /* Check for user abort. */ 155 c = cncheckc(); 156 if (c == 0x03) 157 return (ECANCELED); 158 if (c != -1) 159 printf(" (CTRL-C to abort) "); 160 } 161 162 return (0); 163 } 164 165 /* A fake page table page, to avoid having to handle both 4K and 2M pages */ 166 static pt_entry_t fakept[NPTEPG]; 167 168 int 169 minidumpsys(struct dumperinfo *di) 170 { 171 uint64_t dumpsize; 172 uint32_t ptesize; 173 vm_offset_t va; 174 int error; 175 uint32_t bits; 176 uint64_t pa; 177 pd_entry_t *pd; 178 pt_entry_t *pt; 179 int i, j, k, bit; 180 struct minidumphdr mdhdr; 181 182 counter = 0; 183 /* Walk page table pages, set bits in vm_page_dump */ 184 ptesize = 0; 185 for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 186 /* 187 * We always write a page, even if it is zero. Each 188 * page written corresponds to 2MB of space 189 */ 190 ptesize += PAGE_SIZE; 191 pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */ 192 j = va >> PDRSHIFT; 193 if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 194 /* This is an entire 2M page. */ 195 pa = pd[j] & PG_PS_FRAME; 196 for (k = 0; k < NPTEPG; k++) { 197 if (is_dumpable(pa)) 198 dump_add_page(pa); 199 pa += PAGE_SIZE; 200 } 201 continue; 202 } 203 if ((pd[j] & PG_V) == PG_V) { 204 /* set bit for each valid page in this 2MB block */ 205 pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); 206 for (k = 0; k < NPTEPG; k++) { 207 if ((pt[k] & PG_V) == PG_V) { 208 pa = pt[k] & PG_FRAME; 209 if (is_dumpable(pa)) 210 dump_add_page(pa); 211 } 212 } 213 } else { 214 /* nothing, we're going to dump a null page */ 215 } 216 } 217 218 /* Calculate dump size. */ 219 dumpsize = ptesize; 220 dumpsize += round_page(msgbufp->msg_size); 221 dumpsize += round_page(vm_page_dump_size); 222 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 223 bits = vm_page_dump[i]; 224 while (bits) { 225 bit = bsfl(bits); 226 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 227 /* Clear out undumpable pages now if needed */ 228 if (is_dumpable(pa)) { 229 dumpsize += PAGE_SIZE; 230 } else { 231 dump_drop_page(pa); 232 } 233 bits &= ~(1ul << bit); 234 } 235 } 236 dumpsize += PAGE_SIZE; 237 238 progress = dumpsize; 239 240 /* Initialize mdhdr */ 241 bzero(&mdhdr, sizeof(mdhdr)); 242 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 243 mdhdr.version = MINIDUMP_VERSION; 244 mdhdr.msgbufsize = msgbufp->msg_size; 245 mdhdr.bitmapsize = vm_page_dump_size; 246 mdhdr.ptesize = ptesize; 247 mdhdr.kernbase = KERNBASE; 248 #if defined(PAE) || defined(PAE_TABLES) 249 mdhdr.paemode = 1; 250 #endif 251 252 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, 253 dumpsize); 254 255 printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); 256 printf("Dumping %llu MB:", (long long)dumpsize >> 20); 257 258 error = dump_start(di, &kdh); 259 if (error != 0) 260 goto fail; 261 262 /* Dump my header */ 263 bzero(&fakept, sizeof(fakept)); 264 bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 265 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 266 if (error) 267 goto fail; 268 269 /* Dump msgbuf up front */ 270 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 271 if (error) 272 goto fail; 273 274 /* Dump bitmap */ 275 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); 276 if (error) 277 goto fail; 278 279 /* Dump kernel page table pages */ 280 for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 281 /* We always write a page, even if it is zero */ 282 pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */ 283 j = va >> PDRSHIFT; 284 if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 285 /* This is a single 2M block. Generate a fake PTP */ 286 pa = pd[j] & PG_PS_FRAME; 287 for (k = 0; k < NPTEPG; k++) { 288 fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; 289 } 290 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 291 if (error) 292 goto fail; 293 /* flush, in case we reuse fakept in the same block */ 294 error = blk_flush(di); 295 if (error) 296 goto fail; 297 continue; 298 } 299 if ((pd[j] & PG_V) == PG_V) { 300 pa = pd[j] & PG_FRAME; 301 error = blk_write(di, 0, pa, PAGE_SIZE); 302 if (error) 303 goto fail; 304 } else { 305 bzero(fakept, sizeof(fakept)); 306 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 307 if (error) 308 goto fail; 309 /* flush, in case we reuse fakept in the same block */ 310 error = blk_flush(di); 311 if (error) 312 goto fail; 313 } 314 } 315 316 /* Dump memory chunks */ 317 /* XXX cluster it up and use blk_dump() */ 318 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 319 bits = vm_page_dump[i]; 320 while (bits) { 321 bit = bsfl(bits); 322 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; 323 error = blk_write(di, 0, pa, PAGE_SIZE); 324 if (error) 325 goto fail; 326 bits &= ~(1ul << bit); 327 } 328 } 329 330 error = blk_flush(di); 331 if (error) 332 goto fail; 333 334 error = dump_finish(di, &kdh); 335 if (error != 0) 336 goto fail; 337 338 printf("\nDump complete\n"); 339 return (0); 340 341 fail: 342 if (error < 0) 343 error = -error; 344 345 if (error == ECANCELED) 346 printf("\nDump aborted\n"); 347 else if (error == E2BIG || error == ENOSPC) 348 printf("\nDump failed. Partition too small.\n"); 349 else 350 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 351 return (error); 352 } 353 354 void 355 dump_add_page(vm_paddr_t pa) 356 { 357 int idx, bit; 358 359 pa >>= PAGE_SHIFT; 360 idx = pa >> 5; /* 2^5 = 32 */ 361 bit = pa & 31; 362 atomic_set_int(&vm_page_dump[idx], 1ul << bit); 363 } 364 365 void 366 dump_drop_page(vm_paddr_t pa) 367 { 368 int idx, bit; 369 370 pa >>= PAGE_SHIFT; 371 idx = pa >> 5; /* 2^5 = 32 */ 372 bit = pa & 31; 373 atomic_clear_int(&vm_page_dump[idx], 1ul << bit); 374 } 375 376