xref: /freebsd/sys/amd64/amd64/minidump_machdep.c (revision f05cddf9)
1 /*-
2  * Copyright (c) 2006 Peter Wemm
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_pmap.h"
31 #include "opt_watchdog.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cons.h>
37 #include <sys/kernel.h>
38 #include <sys/kerneldump.h>
39 #include <sys/msgbuf.h>
40 #include <sys/watchdog.h>
41 #include <vm/vm.h>
42 #include <vm/vm_page.h>
43 #include <vm/vm_phys.h>
44 #include <vm/pmap.h>
45 #include <machine/atomic.h>
46 #include <machine/elf.h>
47 #include <machine/md_var.h>
48 #include <machine/vmparam.h>
49 #include <machine/minidump.h>
50 
51 CTASSERT(sizeof(struct kerneldumpheader) == 512);
52 
53 /*
54  * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
55  * is to protect us from metadata and to protect metadata from us.
56  */
57 #define	SIZEOF_METADATA		(64*1024)
58 
59 #define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
60 #define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
61 
62 uint64_t *vm_page_dump;
63 int vm_page_dump_size;
64 
65 static struct kerneldumpheader kdh;
66 static off_t dumplo;
67 
68 /* Handle chunked writes. */
69 static size_t fragsz;
70 static void *dump_va;
71 static size_t counter, progress, dumpsize;
72 
73 CTASSERT(sizeof(*vm_page_dump) == 8);
74 
75 static int
76 is_dumpable(vm_paddr_t pa)
77 {
78 	vm_page_t m;
79 	int i;
80 
81 	if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
82 		return ((m->flags & PG_NODUMP) == 0);
83 	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
84 		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
85 			return (1);
86 	}
87 	return (0);
88 }
89 
90 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
91 
92 static int
93 blk_flush(struct dumperinfo *di)
94 {
95 	int error;
96 
97 	if (fragsz == 0)
98 		return (0);
99 
100 	error = dump_write(di, dump_va, 0, dumplo, fragsz);
101 	dumplo += fragsz;
102 	fragsz = 0;
103 	return (error);
104 }
105 
106 static struct {
107 	int min_per;
108 	int max_per;
109 	int visited;
110 } progress_track[10] = {
111 	{  0,  10, 0},
112 	{ 10,  20, 0},
113 	{ 20,  30, 0},
114 	{ 30,  40, 0},
115 	{ 40,  50, 0},
116 	{ 50,  60, 0},
117 	{ 60,  70, 0},
118 	{ 70,  80, 0},
119 	{ 80,  90, 0},
120 	{ 90, 100, 0}
121 };
122 
123 static void
124 report_progress(size_t progress, size_t dumpsize)
125 {
126 	int sofar, i;
127 
128 	sofar = 100 - ((progress * 100) / dumpsize);
129 	for (i = 0; i < 10; i++) {
130 		if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per)
131 			continue;
132 		if (progress_track[i].visited)
133 			return;
134 		progress_track[i].visited = 1;
135 		printf("..%d%%", sofar);
136 		return;
137 	}
138 }
139 
140 static int
141 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
142 {
143 	size_t len;
144 	int error, i, c;
145 	u_int maxdumpsz;
146 
147 	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
148 	if (maxdumpsz == 0)	/* seatbelt */
149 		maxdumpsz = PAGE_SIZE;
150 	error = 0;
151 	if ((sz % PAGE_SIZE) != 0) {
152 		printf("size not page aligned\n");
153 		return (EINVAL);
154 	}
155 	if (ptr != NULL && pa != 0) {
156 		printf("cant have both va and pa!\n");
157 		return (EINVAL);
158 	}
159 	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
160 		printf("address not page aligned\n");
161 		return (EINVAL);
162 	}
163 	if (ptr != NULL) {
164 		/* If we're doing a virtual dump, flush any pre-existing pa pages */
165 		error = blk_flush(di);
166 		if (error)
167 			return (error);
168 	}
169 	while (sz) {
170 		len = maxdumpsz - fragsz;
171 		if (len > sz)
172 			len = sz;
173 		counter += len;
174 		progress -= len;
175 		if (counter >> 24) {
176 			report_progress(progress, dumpsize);
177 			counter &= (1<<24) - 1;
178 		}
179 
180 		wdog_kern_pat(WD_LASTVAL);
181 
182 		if (ptr) {
183 			error = dump_write(di, ptr, 0, dumplo, len);
184 			if (error)
185 				return (error);
186 			dumplo += len;
187 			ptr += len;
188 			sz -= len;
189 		} else {
190 			for (i = 0; i < len; i += PAGE_SIZE)
191 				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
192 			fragsz += len;
193 			pa += len;
194 			sz -= len;
195 			if (fragsz == maxdumpsz) {
196 				error = blk_flush(di);
197 				if (error)
198 					return (error);
199 			}
200 		}
201 
202 		/* Check for user abort. */
203 		c = cncheckc();
204 		if (c == 0x03)
205 			return (ECANCELED);
206 		if (c != -1)
207 			printf(" (CTRL-C to abort) ");
208 	}
209 
210 	return (0);
211 }
212 
213 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
214 static pd_entry_t fakepd[NPDEPG];
215 
216 void
217 minidumpsys(struct dumperinfo *di)
218 {
219 	uint32_t pmapsize;
220 	vm_offset_t va;
221 	int error;
222 	uint64_t bits;
223 	uint64_t *pdp, *pd, *pt, pa;
224 	int i, j, k, n, bit;
225 	int retry_count;
226 	struct minidumphdr mdhdr;
227 
228 	retry_count = 0;
229  retry:
230 	retry_count++;
231 	counter = 0;
232 	/* Walk page table pages, set bits in vm_page_dump */
233 	pmapsize = 0;
234 	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
235 	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR,
236 	    kernel_vm_end); ) {
237 		/*
238 		 * We always write a page, even if it is zero. Each
239 		 * page written corresponds to 1GB of space
240 		 */
241 		pmapsize += PAGE_SIZE;
242 		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
243 		if ((pdp[i] & PG_V) == 0) {
244 			va += NBPDP;
245 			continue;
246 		}
247 
248 		/*
249 		 * 1GB page is represented as 512 2MB pages in a dump.
250 		 */
251 		if ((pdp[i] & PG_PS) != 0) {
252 			va += NBPDP;
253 			pa = pdp[i] & PG_PS_FRAME;
254 			for (n = 0; n < NPDEPG * NPTEPG; n++) {
255 				if (is_dumpable(pa))
256 					dump_add_page(pa);
257 				pa += PAGE_SIZE;
258 			}
259 			continue;
260 		}
261 
262 		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
263 		for (n = 0; n < NPDEPG; n++, va += NBPDR) {
264 			j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1);
265 
266 			if ((pd[j] & PG_V) == 0)
267 				continue;
268 
269 			if ((pd[j] & PG_PS) != 0) {
270 				/* This is an entire 2M page. */
271 				pa = pd[j] & PG_PS_FRAME;
272 				for (k = 0; k < NPTEPG; k++) {
273 					if (is_dumpable(pa))
274 						dump_add_page(pa);
275 					pa += PAGE_SIZE;
276 				}
277 				continue;
278 			}
279 
280 			pa = pd[j] & PG_FRAME;
281 			/* set bit for this PTE page */
282 			if (is_dumpable(pa))
283 				dump_add_page(pa);
284 			/* and for each valid page in this 2MB block */
285 			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
286 			for (k = 0; k < NPTEPG; k++) {
287 				if ((pt[k] & PG_V) == 0)
288 					continue;
289 				pa = pt[k] & PG_FRAME;
290 				if (is_dumpable(pa))
291 					dump_add_page(pa);
292 			}
293 		}
294 	}
295 
296 	/* Calculate dump size. */
297 	dumpsize = pmapsize;
298 	dumpsize += round_page(msgbufp->msg_size);
299 	dumpsize += round_page(vm_page_dump_size);
300 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
301 		bits = vm_page_dump[i];
302 		while (bits) {
303 			bit = bsfq(bits);
304 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
305 			/* Clear out undumpable pages now if needed */
306 			if (is_dumpable(pa)) {
307 				dumpsize += PAGE_SIZE;
308 			} else {
309 				dump_drop_page(pa);
310 			}
311 			bits &= ~(1ul << bit);
312 		}
313 	}
314 	dumpsize += PAGE_SIZE;
315 
316 	/* Determine dump offset on device. */
317 	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
318 		error = E2BIG;
319 		goto fail;
320 	}
321 	dumplo = di->mediaoffset + di->mediasize - dumpsize;
322 	dumplo -= sizeof(kdh) * 2;
323 	progress = dumpsize;
324 
325 	/* Initialize mdhdr */
326 	bzero(&mdhdr, sizeof(mdhdr));
327 	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
328 	mdhdr.version = MINIDUMP_VERSION;
329 	mdhdr.msgbufsize = msgbufp->msg_size;
330 	mdhdr.bitmapsize = vm_page_dump_size;
331 	mdhdr.pmapsize = pmapsize;
332 	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
333 	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
334 	mdhdr.dmapend = DMAP_MAX_ADDRESS;
335 
336 	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
337 
338 	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
339 	    ptoa((uintmax_t)physmem) / 1048576);
340 
341 	/* Dump leader */
342 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
343 	if (error)
344 		goto fail;
345 	dumplo += sizeof(kdh);
346 
347 	/* Dump my header */
348 	bzero(&fakepd, sizeof(fakepd));
349 	bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
350 	error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
351 	if (error)
352 		goto fail;
353 
354 	/* Dump msgbuf up front */
355 	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
356 	if (error)
357 		goto fail;
358 
359 	/* Dump bitmap */
360 	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
361 	if (error)
362 		goto fail;
363 
364 	/* Dump kernel page directory pages */
365 	bzero(fakepd, sizeof(fakepd));
366 	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
367 	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR,
368 	    kernel_vm_end); va += NBPDP) {
369 		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
370 
371 		/* We always write a page, even if it is zero */
372 		if ((pdp[i] & PG_V) == 0) {
373 			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
374 			if (error)
375 				goto fail;
376 			/* flush, in case we reuse fakepd in the same block */
377 			error = blk_flush(di);
378 			if (error)
379 				goto fail;
380 			continue;
381 		}
382 
383 		/* 1GB page is represented as 512 2MB pages in a dump */
384 		if ((pdp[i] & PG_PS) != 0) {
385 			/* PDPE and PDP have identical layout in this case */
386 			fakepd[0] = pdp[i];
387 			for (j = 1; j < NPDEPG; j++)
388 				fakepd[j] = fakepd[j - 1] + NBPDR;
389 			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
390 			if (error)
391 				goto fail;
392 			/* flush, in case we reuse fakepd in the same block */
393 			error = blk_flush(di);
394 			if (error)
395 				goto fail;
396 			bzero(fakepd, sizeof(fakepd));
397 			continue;
398 		}
399 
400 		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
401 		error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
402 		if (error)
403 			goto fail;
404 		error = blk_flush(di);
405 		if (error)
406 			goto fail;
407 	}
408 
409 	/* Dump memory chunks */
410 	/* XXX cluster it up and use blk_dump() */
411 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
412 		bits = vm_page_dump[i];
413 		while (bits) {
414 			bit = bsfq(bits);
415 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
416 			error = blk_write(di, 0, pa, PAGE_SIZE);
417 			if (error)
418 				goto fail;
419 			bits &= ~(1ul << bit);
420 		}
421 	}
422 
423 	error = blk_flush(di);
424 	if (error)
425 		goto fail;
426 
427 	/* Dump trailer */
428 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
429 	if (error)
430 		goto fail;
431 	dumplo += sizeof(kdh);
432 
433 	/* Signal completion, signoff and exit stage left. */
434 	dump_write(di, NULL, 0, 0, 0);
435 	printf("\nDump complete\n");
436 	return;
437 
438  fail:
439 	if (error < 0)
440 		error = -error;
441 
442 	printf("\n");
443 	if (error == ENOSPC) {
444 		printf("Dump map grown while dumping. ");
445 		if (retry_count < 5) {
446 			printf("Retrying...\n");
447 			goto retry;
448 		}
449 		printf("Dump failed.\n");
450 	}
451 	else if (error == ECANCELED)
452 		printf("Dump aborted\n");
453 	else if (error == E2BIG)
454 		printf("Dump failed. Partition too small.\n");
455 	else
456 		printf("** DUMP FAILED (ERROR %d) **\n", error);
457 }
458 
459 void
460 dump_add_page(vm_paddr_t pa)
461 {
462 	int idx, bit;
463 
464 	pa >>= PAGE_SHIFT;
465 	idx = pa >> 6;		/* 2^6 = 64 */
466 	bit = pa & 63;
467 	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
468 }
469 
470 void
471 dump_drop_page(vm_paddr_t pa)
472 {
473 	int idx, bit;
474 
475 	pa >>= PAGE_SHIFT;
476 	idx = pa >> 6;		/* 2^6 = 64 */
477 	bit = pa & 63;
478 	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
479 }
480