1 /* $NetBSD: dumpsys.c,v 1.4 2008/04/28 20:23:24 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * This code is derived from software contributed to The NetBSD Foundation 12 * by Coyote Point Systems, Inc. which was written under contract to Coyote 13 * Point by Jed Davis and Devon O'Dell. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 /*- 38 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 39 * All rights reserved. 40 * 41 * This code is derived from software contributed to Berkeley by 42 * William Jolitz. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)machdep.c 7.4 (Berkeley) 6/3/91 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: dumpsys.c,v 1.4 2008/04/28 20:23:24 martin Exp $"); 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/kernel.h> 77 #include <sys/kcore.h> 78 #include <sys/core.h> 79 #include <sys/conf.h> 80 #include <sys/exec.h> 81 #include <sys/exec_aout.h> 82 83 #include <machine/kcore.h> 84 85 #include <uvm/uvm_extern.h> 86 #include <uvm/uvm_page.h> 87 88 /* 89 * Exports, needed by savecore, the debugger or elsewhere in the kernel. 90 */ 91 92 void dodumpsys(void); 93 void dumpsys(void); 94 95 struct pcb dumppcb; 96 uint32_t dumpmag = 0x8fca0101; /* magic number */ 97 int dumpsize; /* pages */ 98 long dumplo; /* blocks */ 99 int sparse_dump = 1; 100 101 /* 102 * Module private. 103 */ 104 105 #define dump_headerbuf_size PAGE_SIZE 106 #define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size) 107 #define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr) 108 #define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize */ 109 110 static vaddr_t dumpspace; 111 static paddr_t max_paddr; 112 static uint8_t *sparse_dump_physmap; 113 114 static uint8_t *dump_headerbuf; 115 static uint8_t *dump_headerbuf_ptr; 116 static daddr_t dump_header_blkno; 117 118 static size_t dump_nmemsegs; 119 static size_t dump_npages; 120 static size_t dump_header_size; 121 static size_t dump_totalbytesleft; 122 123 static int cpu_dump(void); 124 static int cpu_dumpsize(void); 125 static u_long cpu_dump_mempagecnt(void); 126 127 static void dump_misc_init(void); 128 static void dump_seg_prep(void); 129 static int dump_seg_iter(int (*)(paddr_t, paddr_t)); 130 131 static void sparse_dump_reset(void); 132 static void sparse_dump_mark(vaddr_t, vaddr_t, int); 133 static void cpu_dump_prep_sparse(void); 134 135 static void dump_header_start(void); 136 static int dump_header_flush(void); 137 static int dump_header_addbytes(const void*, size_t); 138 static int dump_header_addseg(paddr_t, paddr_t); 139 static int dump_header_finish(void); 140 141 static int dump_seg_count_range(paddr_t, paddr_t); 142 static int dumpsys_seg(paddr_t, paddr_t); 143 144 /* 145 * From machdep.c. 146 */ 147 148 extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 149 extern int mem_cluster_cnt; 150 151 void 152 dodumpsys(void) 153 { 154 const struct bdevsw *bdev; 155 int dumpend, psize; 156 int error; 157 158 if (dumpdev == NODEV) 159 return; 160 161 bdev = bdevsw_lookup(dumpdev); 162 if (bdev == NULL || bdev->d_psize == NULL) 163 return; 164 165 /* 166 * For dumps during autoconfiguration, 167 * if dump device has already configured... 168 */ 169 if (dumpsize == 0) 170 cpu_dumpconf(); 171 if (dumplo <= 0 || dumpsize == 0) { 172 printf("\ndump to dev %u,%u not possible\n", major(dumpdev), 173 minor(dumpdev)); 174 return; 175 } 176 printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev), 177 minor(dumpdev), dumplo); 178 179 psize = (*bdev->d_psize)(dumpdev); 180 printf("dump "); 181 if (psize == -1) { 182 printf("area unavailable\n"); 183 return; 184 } 185 186 #if 0 /* XXX this doesn't work. grr. */ 187 /* toss any characters present prior to dump */ 188 while (sget() != NULL); /*syscons and pccons differ */ 189 #endif 190 191 dump_seg_prep(); 192 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages); 193 if (dumpend > psize) { 194 printf("failed: insufficient space (%d < %d)\n", 195 psize, dumpend); 196 goto failed; 197 } 198 199 dump_header_start(); 200 if ((error = cpu_dump()) != 0) 201 goto err; 202 if ((error = dump_header_finish()) != 0) 203 goto err; 204 205 if (dump_header_blkno != dumplo + btodb(dump_header_size)) { 206 printf("BAD header size (%ld [written] != %ld [expected])\n", 207 (long)(dump_header_blkno - dumplo), 208 (long)btodb(dump_header_size)); 209 goto failed; 210 } 211 212 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP); 213 error = dump_seg_iter(dumpsys_seg); 214 215 if (error == 0 && dump_header_blkno != dumpend) { 216 printf("BAD dump size (%ld [written] != %ld [expected])\n", 217 (long)(dumpend - dumplo), 218 (long)(dump_header_blkno - dumplo)); 219 goto failed; 220 } 221 222 err: 223 switch (error) { 224 225 case ENXIO: 226 printf("device bad\n"); 227 break; 228 229 case EFAULT: 230 printf("device not ready\n"); 231 break; 232 233 case EINVAL: 234 printf("area improper\n"); 235 break; 236 237 case EIO: 238 printf("i/o error\n"); 239 break; 240 241 case EINTR: 242 printf("aborted from console\n"); 243 break; 244 245 case 0: 246 printf("succeeded\n"); 247 break; 248 249 default: 250 printf("error %d\n", error); 251 break; 252 } 253 failed: 254 printf("\n\n"); 255 delay(5000000); /* 5 seconds */ 256 } 257 258 /* 259 * This is called by main to set dumplo and dumpsize. 260 * Dumps always skip the first PAGE_SIZE of disk space 261 * in case there might be a disk label stored there. 262 * If there is extra space, put dump at the end to 263 * reduce the chance that swapping trashes it. 264 * 265 * Sparse dumps can't placed as close to the end as possible, because 266 * savecore(8) has to know where to start reading in the dump device 267 * before it has access to any of the crashed system's state. 268 * 269 * Note also that a sparse dump will never be larger than a full one: 270 * in order to add a phys_ram_seg_t to the header, at least one page 271 * must be removed. 272 */ 273 void 274 cpu_dumpconf(void) 275 { 276 const struct bdevsw *bdev; 277 int nblks, dumpblks; /* size of dump area */ 278 279 if (dumpdev == NODEV) 280 goto bad; 281 bdev = bdevsw_lookup(dumpdev); 282 if (bdev == NULL) { 283 dumpdev = NODEV; 284 goto bad; 285 } 286 if (bdev->d_psize == NULL) 287 goto bad; 288 nblks = (*bdev->d_psize)(dumpdev); 289 if (nblks <= ctod(1)) 290 goto bad; 291 292 dumpblks = cpu_dumpsize(); 293 if (dumpblks < 0) 294 goto bad; 295 dumpblks += ctod(cpu_dump_mempagecnt()); 296 297 /* If dump won't fit (incl. room for possible label): */ 298 if (dumpblks > (nblks - ctod(1))) { 299 /* A sparse dump might (and hopefully will) fit. */ 300 dumplo = ctod(1); 301 } else { 302 /* Put dump at end of partition */ 303 dumplo = nblks - dumpblks; 304 } 305 306 /* dumpsize is in page units, and doesn't include headers. */ 307 dumpsize = cpu_dump_mempagecnt(); 308 309 /* Now that we've decided this will work, init ancillary stuff. */ 310 dump_misc_init(); 311 return; 312 313 bad: 314 dumpsize = 0; 315 } 316 317 vaddr_t 318 reserve_dumppages(vaddr_t p) 319 { 320 321 dumpspace = p; 322 return (p + BYTES_PER_DUMP); 323 } 324 325 /* 326 * Perform assorted dump-related initialization tasks. Assumes that 327 * the maximum physical memory address will not increase afterwards. 328 */ 329 static void 330 dump_misc_init(void) 331 { 332 int i; 333 334 if (dump_headerbuf != NULL) 335 return; /* already called */ 336 337 for (i = 0; i < mem_cluster_cnt; ++i) { 338 paddr_t top = mem_clusters[i].start + mem_clusters[i].size; 339 if (max_paddr < top) 340 max_paddr = top; 341 } 342 #ifdef DEBUG 343 printf("dump_misc_init: max_paddr = 0x%lx\n", 344 (unsigned long)max_paddr); 345 #endif 346 347 sparse_dump_physmap = (void*)uvm_km_alloc(kernel_map, 348 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE), 349 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); 350 dump_headerbuf = (void*)uvm_km_alloc(kernel_map, 351 dump_headerbuf_size, 352 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); 353 /* XXXjld should check for failure here, disable dumps if so. */ 354 } 355 356 /* 357 * Clear the set of pages to include in a sparse dump. 358 */ 359 static void 360 sparse_dump_reset(void) 361 { 362 363 memset(sparse_dump_physmap, 0, 364 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE)); 365 } 366 367 /* 368 * Include or exclude pages in a sparse dump, by half-open virtual 369 * address interval (which may wrap around the end of the space). 370 */ 371 static void 372 sparse_dump_mark(vaddr_t vbegin, vaddr_t vend, int includep) 373 { 374 pmap_t pmap; 375 paddr_t p; 376 vaddr_t v; 377 378 /* 379 * If a partial page is called for, the whole page must be included. 380 */ 381 if (includep) { 382 vbegin = rounddown(vbegin, PAGE_SIZE); 383 vend = roundup(vend, PAGE_SIZE); 384 } else { 385 vbegin = roundup(vbegin, PAGE_SIZE); 386 vend = rounddown(vend, PAGE_SIZE); 387 } 388 389 pmap = pmap_kernel(); 390 for (v = vbegin; v != vend; v += PAGE_SIZE) { 391 if (pmap_extract(pmap, v, &p)) { 392 if (includep) 393 setbit(sparse_dump_physmap, p/PAGE_SIZE); 394 else 395 clrbit(sparse_dump_physmap, p/PAGE_SIZE); 396 } 397 } 398 } 399 400 /* 401 * Machine-dependently decides on the contents of a sparse dump, using 402 * the above. 403 */ 404 static void 405 cpu_dump_prep_sparse(void) 406 { 407 408 sparse_dump_reset(); 409 /* XXX could the alternate recursive page table be skipped? */ 410 sparse_dump_mark((vaddr_t)PTE_BASE, 0, 1); 411 /* Memory for I/O buffers could be unmarked here, for example. */ 412 /* The kernel text could also be unmarked, but gdb would be upset. */ 413 } 414 415 /* 416 * Abstractly iterate over the collection of memory segments to be 417 * dumped; the callback lacks the customary environment-pointer 418 * argument because none of the current users really need one. 419 * 420 * To be used only after dump_seg_prep is called to set things up. 421 */ 422 static int 423 dump_seg_iter(int (*callback)(paddr_t, paddr_t)) 424 { 425 int error, i; 426 427 #define CALLBACK(start,size) do { \ 428 error = callback(start,size); \ 429 if (error) \ 430 return error; \ 431 } while(0) 432 433 for (i = 0; i < mem_cluster_cnt; ++i) { 434 /* 435 * The bitmap is scanned within each memory segment, 436 * rather than over its entire domain, in case any 437 * pages outside of the memory proper have been mapped 438 * into kva; they might be devices that wouldn't 439 * appreciate being arbitrarily read, and including 440 * them could also break the assumption that a sparse 441 * dump will always be smaller than a full one. 442 */ 443 if (sparse_dump) { 444 paddr_t p, start, end; 445 int lastset; 446 447 start = mem_clusters[i].start; 448 end = start + mem_clusters[i].size; 449 start = rounddown(start, PAGE_SIZE); /* unnecessary? */ 450 lastset = 0; 451 for (p = start; p < end; p += PAGE_SIZE) { 452 int thisset = isset(sparse_dump_physmap, 453 p/PAGE_SIZE); 454 455 if (!lastset && thisset) 456 start = p; 457 if (lastset && !thisset) 458 CALLBACK(start, p - start); 459 lastset = thisset; 460 } 461 if (lastset) 462 CALLBACK(start, p - start); 463 } else 464 CALLBACK(mem_clusters[i].start, mem_clusters[i].size); 465 } 466 return 0; 467 #undef CALLBACK 468 } 469 470 /* 471 * Prepare for an impending core dump: decide what's being dumped and 472 * how much space it will take up. 473 */ 474 static void 475 dump_seg_prep(void) 476 { 477 478 if (sparse_dump) 479 cpu_dump_prep_sparse(); 480 481 dump_nmemsegs = 0; 482 dump_npages = 0; 483 dump_seg_iter(dump_seg_count_range); 484 485 dump_header_size = ALIGN(sizeof(kcore_seg_t)) + 486 ALIGN(sizeof(cpu_kcore_hdr_t)) + 487 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t)); 488 dump_header_size = roundup(dump_header_size, dbtob(1)); 489 490 /* 491 * savecore(8) will read this to decide how many pages to 492 * copy, and cpu_dumpconf has already used the pessimistic 493 * value to set dumplo, so it's time to tell the truth. 494 */ 495 dumpsize = dump_npages; /* XXX could these just be one variable? */ 496 } 497 498 static int 499 dump_seg_count_range(paddr_t start, paddr_t size) 500 { 501 502 ++dump_nmemsegs; 503 dump_npages += size / PAGE_SIZE; 504 return 0; 505 } 506 507 /* 508 * A sparse dump's header may be rather large, due to the number of 509 * "segments" emitted. These routines manage a simple output buffer, 510 * so that the header can be written to disk incrementally. 511 */ 512 static void 513 dump_header_start(void) 514 { 515 516 dump_headerbuf_ptr = dump_headerbuf; 517 dump_header_blkno = dumplo; 518 } 519 520 static int 521 dump_header_flush(void) 522 { 523 const struct bdevsw *bdev; 524 size_t to_write; 525 int error; 526 527 bdev = bdevsw_lookup(dumpdev); 528 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1)); 529 error = bdev->d_dump(dumpdev, dump_header_blkno, 530 dump_headerbuf, to_write); 531 dump_header_blkno += btodb(to_write); 532 dump_headerbuf_ptr = dump_headerbuf; 533 return error; 534 } 535 536 static int 537 dump_header_addbytes(const void* vptr, size_t n) 538 { 539 const char *ptr = vptr; 540 int error; 541 542 while (n > dump_headerbuf_avail) { 543 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail); 544 ptr += dump_headerbuf_avail; 545 n -= dump_headerbuf_avail; 546 dump_headerbuf_ptr = dump_headerbuf_end; 547 error = dump_header_flush(); 548 if (error) 549 return error; 550 } 551 memcpy(dump_headerbuf_ptr, ptr, n); 552 dump_headerbuf_ptr += n; 553 554 return 0; 555 } 556 557 static int 558 dump_header_addseg(paddr_t start, paddr_t size) 559 { 560 phys_ram_seg_t seg = { start, size }; 561 562 return dump_header_addbytes(&seg, sizeof(seg)); 563 } 564 565 static int 566 dump_header_finish(void) 567 { 568 569 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail); 570 return dump_header_flush(); 571 } 572 573 /* 574 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers 575 * for a full (non-sparse) dump. 576 */ 577 static int 578 cpu_dumpsize(void) 579 { 580 int size; 581 582 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + 583 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); 584 if (roundup(size, dbtob(1)) != dbtob(1)) 585 return (-1); 586 587 return (1); 588 } 589 590 /* 591 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped 592 * for a full (non-sparse) dump. 593 */ 594 static u_long 595 cpu_dump_mempagecnt(void) 596 { 597 u_long i, n; 598 599 n = 0; 600 for (i = 0; i < mem_cluster_cnt; i++) 601 n += atop(mem_clusters[i].size); 602 return (n); 603 } 604 605 /* 606 * cpu_dump: dump the machine-dependent kernel core dump headers. 607 */ 608 static int 609 cpu_dump(void) 610 { 611 int (*dump)(dev_t, daddr_t, void *, size_t); 612 kcore_seg_t seg; 613 cpu_kcore_hdr_t cpuhdr; 614 const struct bdevsw *bdev; 615 616 bdev = bdevsw_lookup(dumpdev); 617 if (bdev == NULL) 618 return (ENXIO); 619 dump = bdev->d_dump; 620 621 /* 622 * Generate a segment header. 623 */ 624 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 625 seg.c_size = dump_header_size - ALIGN(sizeof(seg)); 626 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg))); 627 628 /* 629 * Add the machine-dependent header info. 630 */ 631 cpuhdr.pdppaddr = PDPpaddr; 632 cpuhdr.nmemsegs = dump_nmemsegs; 633 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr))); 634 635 /* 636 * Write out the memory segment descriptors. 637 */ 638 return dump_seg_iter(dump_header_addseg); 639 } 640 641 static int 642 dumpsys_seg(paddr_t maddr, paddr_t bytes) 643 { 644 u_long i, m, n; 645 daddr_t blkno; 646 const struct bdevsw *bdev; 647 int (*dump)(dev_t, daddr_t, void *, size_t); 648 int error; 649 650 bdev = bdevsw_lookup(dumpdev); 651 dump = bdev->d_dump; 652 653 blkno = dump_header_blkno; 654 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) { 655 /* Print out how many MBs we have left to go. */ 656 if ((dump_totalbytesleft % (1024*1024)) == 0) 657 printf("%lu ", (unsigned long) 658 (dump_totalbytesleft / (1024 * 1024))); 659 660 /* Limit size for next transfer. */ 661 n = bytes - i; 662 if (n > BYTES_PER_DUMP) 663 n = BYTES_PER_DUMP; 664 665 for (m = 0; m < n; m += NBPG) 666 pmap_kenter_pa(dumpspace + m, maddr + m, 667 VM_PROT_READ); 668 pmap_update(pmap_kernel()); 669 670 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n); 671 if (error) 672 return error; 673 maddr += n; 674 blkno += btodb(n); /* XXX? */ 675 676 #if 0 /* XXX this doesn't work. grr. */ 677 /* operator aborting dump? */ 678 if (sget() != NULL) 679 return EINTR; 680 #endif 681 } 682 dump_header_blkno = blkno; 683 684 return 0; 685 } 686