1 /*-
2 * Copyright (c) 2006 Peter Wemm
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/amd64/amd64/minidump_machdep.c,v 1.10 2009/05/29 21:27:12 jamie Exp $
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/conf.h>
32 #include <sys/cons.h>
33 #include <sys/device.h>
34 #include <sys/globaldata.h>
35 #include <sys/kernel.h>
36 #include <sys/kerneldump.h>
37 #include <sys/msgbuf.h>
38 #include <sys/kbio.h>
39 #include <vm/vm.h>
40 #include <vm/vm_kern.h>
41 #include <vm/pmap.h>
42 #include <machine/atomic.h>
43 #include <machine/elf.h>
44 #include <machine/globaldata.h>
45 #include <machine/md_var.h>
46 #include <machine/vmparam.h>
47 #include <machine/minidump.h>
48
49 CTASSERT(sizeof(struct kerneldumpheader) == 512);
50
51 /*
52 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
53 * is to protect us from metadata and to protect metadata from us.
54 */
55 #define SIZEOF_METADATA (64*1024)
56
57 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
58 #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE)
59
60 uint64_t *vm_page_dump;
61 vm_offset_t vm_page_dump_size;
62
63 static struct kerneldumpheader kdh;
64 static off_t dumplo;
65
66 /* Handle chunked writes. */
67 static size_t fragsz;
68 static void *dump_va;
69 static size_t counter, progress;
70
71 CTASSERT(sizeof(*vm_page_dump) == 8);
72
73 static int
is_dumpable(vm_paddr_t pa)74 is_dumpable(vm_paddr_t pa)
75 {
76 int i;
77
78 for (i = 0; dump_avail[i].phys_beg || dump_avail[i].phys_end; ++i) {
79 if (pa >= dump_avail[i].phys_beg && pa < dump_avail[i].phys_end)
80 return (1);
81 }
82 return (0);
83 }
84
85 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
86
87 static int
blk_flush(struct dumperinfo * di)88 blk_flush(struct dumperinfo *di)
89 {
90 int error;
91
92 if (fragsz == 0)
93 return (0);
94
95 error = dev_ddump(di->priv, dump_va, 0, dumplo, fragsz);
96 dumplo += fragsz;
97 fragsz = 0;
98 return (error);
99 }
100
101 static int
blk_write(struct dumperinfo * di,char * ptr,vm_paddr_t pa,size_t sz)102 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
103 {
104 size_t len;
105 int error, i, c;
106 int max_iosize;
107
108 error = 0;
109 if ((sz & PAGE_MASK)) {
110 kprintf("size not page aligned\n");
111 return (EINVAL);
112 }
113 if (ptr != NULL && pa != 0) {
114 kprintf("can't have both va and pa!\n");
115 return (EINVAL);
116 }
117 if (pa != 0 && (((uintptr_t)pa) & PAGE_MASK) != 0) {
118 kprintf("address not page aligned\n");
119 return (EINVAL);
120 }
121 if (ptr != NULL) {
122 /*
123 * If we're doing a virtual dump, flush any
124 * pre-existing pa pages
125 */
126 error = blk_flush(di);
127 if (error)
128 return (error);
129 }
130 max_iosize = min(MAXPHYS, di->maxiosize);
131 while (sz) {
132 len = max_iosize - fragsz;
133 if (len > sz)
134 len = sz;
135 counter += len;
136 progress -= len;
137 if (counter >> 24) {
138 kprintf(" %ld", PG2MB(progress >> PAGE_SHIFT));
139 counter &= (1<<24) - 1;
140 }
141 if (ptr) {
142 /*kprintf("s");*/
143 error = dev_ddump(di->priv, ptr, 0, dumplo, len);
144 /* kprintf("t");*/
145 if (error)
146 return (error);
147 dumplo += len;
148 ptr += len;
149 sz -= len;
150 } else {
151 for (i = 0; i < len; i += PAGE_SIZE) {
152 dump_va = pmap_kenter_temporary(pa + i,
153 (i + fragsz) >> PAGE_SHIFT);
154 }
155 smp_invltlb();
156 fragsz += len;
157 pa += len;
158 sz -= len;
159 if (fragsz == max_iosize) {
160 error = blk_flush(di);
161 if (error)
162 return (error);
163 }
164 }
165 }
166
167 /* Check for user abort. */
168 c = cncheckc();
169 if (c == 0x03)
170 return (ECANCELED);
171 if (c != -1 && c != NOKEY)
172 kprintf(" (CTRL-C to abort) ");
173
174 return (0);
175 }
176
177 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
178 static pt_entry_t fakept[NPTEPG];
179
180 void
minidumpsys(struct dumperinfo * di)181 minidumpsys(struct dumperinfo *di)
182 {
183 uint64_t dumpsize;
184 uint64_t ptesize;
185 vm_offset_t va;
186 vm_offset_t kern_end;
187 int error;
188 uint64_t bits;
189 uint64_t *pdp, *pd, *pt, pa;
190 int i, j, k, bit;
191 int kpdp, klo, khi;
192 int lpdp = -1;
193 long lpdpttl = 0;
194 struct minidumphdr2 mdhdr;
195 struct mdglobaldata *md;
196
197 cnpoll(TRUE);
198 counter = 0;
199
200 /*
201 * minidump page table format is an array of PD entries (1GB pte's),
202 * representing the entire user and kernel virtual address space
203 * (256TB).
204 *
205 * However, we will only dump the KVM portion of this space. And we
206 * only copy the PDP pages for direct access, the PD and PT pages
207 * will be included in the dump as part of the physical map.
208 */
209 ptesize = NPML4EPG * NPDPEPG * 8;
210
211 /*
212 * Walk page table pages, set bits in vm_page_dump.
213 *
214 * NOTE: kernel_vm_end can actually be below KERNBASE.
215 * Just use KvaEnd. Also note that loops which go
216 * all the way to the end of the address space might
217 * overflow the loop variable.
218 */
219 md = (struct mdglobaldata *)globaldata_find(0);
220
221 kern_end = KvaEnd;
222 if (kern_end < (vm_offset_t)&(md[ncpus]))
223 kern_end = (vm_offset_t)&(md[ncpus]);
224
225 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
226 for (va = VM_MIN_KERNEL_ADDRESS; va < kern_end; va += NBPDR) {
227 /*
228 * The loop probably overflows a 64-bit int due to NBPDR.
229 */
230 if (va < VM_MIN_KERNEL_ADDRESS)
231 break;
232
233 /*
234 * KPDPphys[] is relative to VM_MIN_KERNEL_ADDRESS. It
235 * contains NKPML4E PDP pages (so we can get to all kernel
236 * PD entries from this array).
237 */
238 i = ((va - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) &
239 (NPML4EPG * NPDPEPG - 1);
240 if (i != lpdp) {
241 lpdp = i;
242 lpdpttl = 0;
243 }
244
245 /*
246 * Calculate the PD index in the PDP. Each PD represents 1GB.
247 * KVA space can cover multiple PDP pages. The PDP array
248 * has been initialized for the entire kernel address space.
249 *
250 * We include the PD entries in the PDP in the dump
251 */
252 i = ((va - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) &
253 (NPML4EPG * NPDPEPG - 1);
254 if ((pdp[i] & kernel_pmap->pmap_bits[PG_V_IDX]) == 0)
255 continue;
256
257 /*
258 * Add the PD page from the PDP to the dump
259 */
260 dump_add_page(pdp[i] & PG_FRAME);
261 lpdpttl += PAGE_SIZE;
262
263 pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
264 j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
265 if ((pd[j] & (kernel_pmap->pmap_bits[PG_PS_IDX] | kernel_pmap->pmap_bits[PG_V_IDX])) ==
266 (kernel_pmap->pmap_bits[PG_PS_IDX] | kernel_pmap->pmap_bits[PG_V_IDX])) {
267 /* This is an entire 2M page. */
268 lpdpttl += PAGE_SIZE * NPTEPG;
269 pa = pd[j] & PG_PS_FRAME;
270 for (k = 0; k < NPTEPG; k++) {
271 if (is_dumpable(pa))
272 dump_add_page(pa);
273 pa += PAGE_SIZE;
274 }
275 } else if ((pd[j] & kernel_pmap->pmap_bits[PG_V_IDX]) ==
276 kernel_pmap->pmap_bits[PG_V_IDX]) {
277 /*
278 * Add the PT page from the PD to the dump (it is no
279 * longer included in the ptemap.
280 */
281 dump_add_page(pd[j] & PG_FRAME);
282 lpdpttl += PAGE_SIZE;
283
284 /* set bit for each valid page in this 2MB block */
285 pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
286 for (k = 0; k < NPTEPG; k++) {
287 if ((pt[k] & kernel_pmap->pmap_bits[PG_V_IDX])
288 == kernel_pmap->pmap_bits[PG_V_IDX]) {
289 pa = pt[k] & PG_FRAME;
290 lpdpttl += PAGE_SIZE;
291 if (is_dumpable(pa))
292 dump_add_page(pa);
293 }
294 }
295 } else {
296 /* nothing, we're going to dump a null page */
297 }
298 }
299
300 /* Calculate dump size. */
301 dumpsize = ptesize;
302 dumpsize += round_page(msgbufp->msg_size);
303 dumpsize += round_page(vm_page_dump_size);
304
305 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
306 bits = vm_page_dump[i];
307 while (bits) {
308 bit = bsfq(bits);
309 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
310 /* Clear out undumpable pages now if needed */
311 if (is_dumpable(pa)) {
312 dumpsize += PAGE_SIZE;
313 } else {
314 dump_drop_page(pa);
315 }
316 bits &= ~(1ul << bit);
317 }
318 }
319 dumpsize += PAGE_SIZE;
320
321 /* Determine dump offset on device. */
322 if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
323 error = ENOSPC;
324 goto fail;
325 }
326 dumplo = di->mediaoffset + di->mediasize - dumpsize;
327 dumplo -= sizeof(kdh) * 2;
328 progress = dumpsize;
329
330 /* Initialize mdhdr */
331 bzero(&mdhdr, sizeof(mdhdr));
332 strcpy(mdhdr.magic, MINIDUMP2_MAGIC);
333 mdhdr.version = MINIDUMP2_VERSION;
334 mdhdr.msgbufsize = msgbufp->msg_size;
335 mdhdr.bitmapsize = vm_page_dump_size;
336 mdhdr.ptesize = ptesize;
337 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
338 mdhdr.dmapbase = DMAP_MIN_ADDRESS;
339 mdhdr.dmapend = DMAP_MAX_ADDRESS;
340
341 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION,
342 dumpsize, di->blocksize);
343
344 kprintf("Physical memory: %jd MB\n", (intmax_t)ptoa(physmem) / 1048576);
345 kprintf("Dumping %jd MB:", (intmax_t)dumpsize >> 20);
346
347 /* Dump leader */
348 error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
349 if (error)
350 goto fail;
351 dumplo += sizeof(kdh);
352
353 /* Dump my header */
354 bzero(fakept, sizeof(fakept));
355 bcopy(&mdhdr, fakept, sizeof(mdhdr));
356 error = blk_write(di, (char *)fakept, 0, PAGE_SIZE);
357 if (error)
358 goto fail;
359
360 /* Dump msgbuf up front */
361 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
362 if (error)
363 goto fail;
364
365 /* Dump bitmap */
366 error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
367 if (error)
368 goto fail;
369
370 /*
371 * Dump a full PDP array for the entire KVM space, user and kernel.
372 * This is 512*512 1G PD entries (512*512*8 = 2MB).
373 *
374 * The minidump only dumps PD entries related to KVA space. Also
375 * note that pdp[] (aka KPDPphys[]) only covers VM_MIN_KERNEL_ADDRESS
376 * to VM_MAX_KERNEL_ADDRESS.
377 *
378 * The actual KPDPphys[] array covers a KVA space starting at KVA
379 * KPDPPHYS_KVA.
380 *
381 * By dumping a PDP[] array of PDs representing the entire virtual
382 * address space we can expand what we dump in the future.
383 */
384 pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
385 kpdp = (KPDPPHYS_KVA >> PDPSHIFT) &
386 (NPML4EPG * NPDPEPG - 1);
387 klo = (int)(VM_MIN_KERNEL_ADDRESS >> PDPSHIFT) &
388 (NPML4EPG * NPDPEPG - 1);
389 khi = (int)(VM_MAX_KERNEL_ADDRESS >> PDPSHIFT) &
390 (NPML4EPG * NPDPEPG - 1);
391
392 for (i = 0; i < NPML4EPG * NPDPEPG; ++i) {
393 if (i < klo || i > khi) {
394 fakept[i & (NPDPEPG - 1)] = 0;
395 } else {
396 fakept[i & (NPDPEPG - 1)] = pdp[i - kpdp];
397 }
398 if ((i & (NPDPEPG - 1)) == (NPDPEPG - 1)) {
399 error = blk_write(di, (char *)fakept, 0, PAGE_SIZE);
400 if (error)
401 goto fail;
402 error = blk_flush(di);
403 if (error)
404 goto fail;
405 }
406 }
407
408 /* Dump memory chunks */
409 /* XXX cluster it up and use blk_dump() */
410 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
411 bits = vm_page_dump[i];
412 while (bits) {
413 bit = bsfq(bits);
414 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
415 error = blk_write(di, 0, pa, PAGE_SIZE);
416 if (error)
417 goto fail;
418 bits &= ~(1ul << bit);
419 }
420 }
421
422 error = blk_flush(di);
423 if (error)
424 goto fail;
425
426 /* Dump trailer */
427 error = dev_ddump(di->priv, &kdh, 0, dumplo, sizeof(kdh));
428 if (error)
429 goto fail;
430 dumplo += sizeof(kdh);
431
432 /* Signal completion, signoff and exit stage left. */
433 dev_ddump(di->priv, NULL, 0, 0, 0);
434 kprintf("\nDump complete\n");
435 cnpoll(FALSE);
436 return;
437
438 fail:
439 cnpoll(FALSE);
440 if (error < 0)
441 error = -error;
442
443 if (error == ECANCELED)
444 kprintf("\nDump aborted\n");
445 else if (error == ENOSPC)
446 kprintf("\nDump failed. Partition too small.\n");
447 else
448 kprintf("\n** DUMP FAILED (ERROR %d) **\n", error);
449 }
450
451 void
dump_add_page(vm_paddr_t pa)452 dump_add_page(vm_paddr_t pa)
453 {
454 int idx, bit;
455
456 pa >>= PAGE_SHIFT;
457 idx = pa >> 6; /* 2^6 = 64 */
458 bit = pa & 63;
459 atomic_set_long(&vm_page_dump[idx], 1ul << bit);
460 }
461
462 void
dump_drop_page(vm_paddr_t pa)463 dump_drop_page(vm_paddr_t pa)
464 {
465 int idx, bit;
466
467 pa >>= PAGE_SHIFT;
468 idx = pa >> 6; /* 2^6 = 64 */
469 bit = pa & 63;
470 atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
471 }
472