1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2006 Peter Wemm
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_watchdog.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/conf.h>
37 #include <sys/cons.h>
38 #include <sys/kernel.h>
39 #include <sys/kerneldump.h>
40 #include <sys/msgbuf.h>
41 #include <sys/watchdog.h>
42 #include <vm/vm.h>
43 #include <vm/pmap.h>
44 #include <machine/atomic.h>
45 #include <machine/elf.h>
46 #include <machine/md_var.h>
47 #include <machine/vmparam.h>
48 #include <machine/minidump.h>
49 
50 CTASSERT(sizeof(struct kerneldumpheader) == 512);
51 
52 #define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
53 #define	DEV_ALIGN(x)	roundup2((off_t)(x), DEV_BSIZE)
54 
55 static struct kerneldumpheader kdh;
56 
57 /* Handle chunked writes. */
58 static size_t fragsz;
59 static void *dump_va;
60 static uint64_t counter, progress;
61 
62 CTASSERT(sizeof(*vm_page_dump) == 4);
63 
64 static int
65 is_dumpable(vm_paddr_t pa)
66 {
67 	int i;
68 
69 	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
70 		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
71 			return (1);
72 	}
73 	return (0);
74 }
75 
76 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
77 
78 static int
79 blk_flush(struct dumperinfo *di)
80 {
81 	int error;
82 
83 	if (fragsz == 0)
84 		return (0);
85 
86 	error = dump_append(di, dump_va, 0, fragsz);
87 	fragsz = 0;
88 	return (error);
89 }
90 
91 static int
92 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
93 {
94 	size_t len;
95 	int error, i, c;
96 	u_int maxdumpsz;
97 
98 	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
99 	if (maxdumpsz == 0)	/* seatbelt */
100 		maxdumpsz = PAGE_SIZE;
101 	error = 0;
102 	if ((sz % PAGE_SIZE) != 0) {
103 		printf("size not page aligned\n");
104 		return (EINVAL);
105 	}
106 	if (ptr != NULL && pa != 0) {
107 		printf("cant have both va and pa!\n");
108 		return (EINVAL);
109 	}
110 	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
111 		printf("address not page aligned\n");
112 		return (EINVAL);
113 	}
114 	if (ptr != NULL) {
115 		/* If we're doing a virtual dump, flush any pre-existing pa pages */
116 		error = blk_flush(di);
117 		if (error)
118 			return (error);
119 	}
120 	while (sz) {
121 		len = maxdumpsz - fragsz;
122 		if (len > sz)
123 			len = sz;
124 		counter += len;
125 		progress -= len;
126 		if (counter >> 24) {
127 			printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
128 			counter &= (1<<24) - 1;
129 		}
130 
131 		wdog_kern_pat(WD_LASTVAL);
132 
133 		if (ptr) {
134 			error = dump_append(di, ptr, 0, len);
135 			if (error)
136 				return (error);
137 			ptr += len;
138 			sz -= len;
139 		} else {
140 			for (i = 0; i < len; i += PAGE_SIZE)
141 				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
142 			fragsz += len;
143 			pa += len;
144 			sz -= len;
145 			if (fragsz == maxdumpsz) {
146 				error = blk_flush(di);
147 				if (error)
148 					return (error);
149 			}
150 		}
151 
152 		/* Check for user abort. */
153 		c = cncheckc();
154 		if (c == 0x03)
155 			return (ECANCELED);
156 		if (c != -1)
157 			printf(" (CTRL-C to abort) ");
158 	}
159 
160 	return (0);
161 }
162 
163 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
164 static pt_entry_t fakept[NPTEPG];
165 
166 #ifdef PMAP_PAE_COMP
167 #define	minidumpsys	minidumpsys_pae
168 #define	IdlePTD		IdlePTD_pae
169 #else
170 #define	minidumpsys	minidumpsys_nopae
171 #define	IdlePTD		IdlePTD_nopae
172 #endif
173 
174 int
175 minidumpsys(struct dumperinfo *di)
176 {
177 	uint64_t dumpsize;
178 	uint32_t ptesize;
179 	vm_offset_t va;
180 	int error;
181 	uint32_t bits;
182 	uint64_t pa;
183 	pd_entry_t *pd;
184 	pt_entry_t *pt;
185 	int i, j, k, bit;
186 	struct minidumphdr mdhdr;
187 
188 	counter = 0;
189 	/* Walk page table pages, set bits in vm_page_dump */
190 	ptesize = 0;
191 	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
192 		/*
193 		 * We always write a page, even if it is zero. Each
194 		 * page written corresponds to 2MB of space
195 		 */
196 		ptesize += PAGE_SIZE;
197 		pd = IdlePTD;	/* always mapped! */
198 		j = va >> PDRSHIFT;
199 		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
200 			/* This is an entire 2M page. */
201 			pa = pd[j] & PG_PS_FRAME;
202 			for (k = 0; k < NPTEPG; k++) {
203 				if (is_dumpable(pa))
204 					dump_add_page(pa);
205 				pa += PAGE_SIZE;
206 			}
207 			continue;
208 		}
209 		if ((pd[j] & PG_V) == PG_V) {
210 			/* set bit for each valid page in this 2MB block */
211 			pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
212 			for (k = 0; k < NPTEPG; k++) {
213 				if ((pt[k] & PG_V) == PG_V) {
214 					pa = pt[k] & PG_FRAME;
215 					if (is_dumpable(pa))
216 						dump_add_page(pa);
217 				}
218 			}
219 		} else {
220 			/* nothing, we're going to dump a null page */
221 		}
222 	}
223 
224 	/* Calculate dump size. */
225 	dumpsize = ptesize;
226 	dumpsize += round_page(msgbufp->msg_size);
227 	dumpsize += round_page(vm_page_dump_size);
228 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
229 		bits = vm_page_dump[i];
230 		while (bits) {
231 			bit = bsfl(bits);
232 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
233 			/* Clear out undumpable pages now if needed */
234 			if (is_dumpable(pa)) {
235 				dumpsize += PAGE_SIZE;
236 			} else {
237 				dump_drop_page(pa);
238 			}
239 			bits &= ~(1ul << bit);
240 		}
241 	}
242 	dumpsize += PAGE_SIZE;
243 
244 	progress = dumpsize;
245 
246 	/* Initialize mdhdr */
247 	bzero(&mdhdr, sizeof(mdhdr));
248 	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
249 	mdhdr.version = MINIDUMP_VERSION;
250 	mdhdr.msgbufsize = msgbufp->msg_size;
251 	mdhdr.bitmapsize = vm_page_dump_size;
252 	mdhdr.ptesize = ptesize;
253 	mdhdr.kernbase = KERNBASE;
254 	mdhdr.paemode = pae_mode;
255 
256 	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
257 	    dumpsize);
258 
259 	error = dump_start(di, &kdh);
260 	if (error != 0)
261 		goto fail;
262 
263 	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
264 	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
265 
266 	/* Dump my header */
267 	bzero(&fakept, sizeof(fakept));
268 	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
269 	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
270 	if (error)
271 		goto fail;
272 
273 	/* Dump msgbuf up front */
274 	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
275 	if (error)
276 		goto fail;
277 
278 	/* Dump bitmap */
279 	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
280 	if (error)
281 		goto fail;
282 
283 	/* Dump kernel page table pages */
284 	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
285 		/* We always write a page, even if it is zero */
286 		pd = IdlePTD;	/* always mapped! */
287 		j = va >> PDRSHIFT;
288 		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
289 			/* This is a single 2M block. Generate a fake PTP */
290 			pa = pd[j] & PG_PS_FRAME;
291 			for (k = 0; k < NPTEPG; k++) {
292 				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
293 			}
294 			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
295 			if (error)
296 				goto fail;
297 			/* flush, in case we reuse fakept in the same block */
298 			error = blk_flush(di);
299 			if (error)
300 				goto fail;
301 			continue;
302 		}
303 		if ((pd[j] & PG_V) == PG_V) {
304 			pa = pd[j] & PG_FRAME;
305 			error = blk_write(di, 0, pa, PAGE_SIZE);
306 			if (error)
307 				goto fail;
308 		} else {
309 			bzero(fakept, sizeof(fakept));
310 			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
311 			if (error)
312 				goto fail;
313 			/* flush, in case we reuse fakept in the same block */
314 			error = blk_flush(di);
315 			if (error)
316 				goto fail;
317 		}
318 	}
319 
320 	/* Dump memory chunks */
321 	/* XXX cluster it up and use blk_dump() */
322 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
323 		bits = vm_page_dump[i];
324 		while (bits) {
325 			bit = bsfl(bits);
326 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
327 			error = blk_write(di, 0, pa, PAGE_SIZE);
328 			if (error)
329 				goto fail;
330 			bits &= ~(1ul << bit);
331 		}
332 	}
333 
334 	error = blk_flush(di);
335 	if (error)
336 		goto fail;
337 
338 	error = dump_finish(di, &kdh);
339 	if (error != 0)
340 		goto fail;
341 
342 	printf("\nDump complete\n");
343 	return (0);
344 
345  fail:
346 	if (error < 0)
347 		error = -error;
348 
349 	if (error == ECANCELED)
350 		printf("\nDump aborted\n");
351 	else if (error == E2BIG || error == ENOSPC)
352 		printf("\nDump failed. Partition too small.\n");
353 	else
354 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
355 	return (error);
356 }
357