1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2006 Peter Wemm
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_watchdog.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/conf.h>
37 #include <sys/cons.h>
38 #include <sys/kernel.h>
39 #include <sys/kerneldump.h>
40 #include <sys/msgbuf.h>
41 #include <sys/watchdog.h>
42 #include <vm/vm.h>
43 #include <vm/vm_param.h>
44 #include <vm/vm_page.h>
45 #include <vm/vm_phys.h>
46 #include <vm/vm_dumpset.h>
47 #include <vm/pmap.h>
48 #include <machine/atomic.h>
49 #include <machine/elf.h>
50 #include <machine/md_var.h>
51 #include <machine/minidump.h>
52 
53 CTASSERT(sizeof(struct kerneldumpheader) == 512);
54 
55 #define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
56 #define	DEV_ALIGN(x)	roundup2((off_t)(x), DEV_BSIZE)
57 
58 static struct kerneldumpheader kdh;
59 
60 /* Handle chunked writes. */
61 static size_t fragsz;
62 static void *dump_va;
63 
64 static int
65 blk_flush(struct dumperinfo *di)
66 {
67 	int error;
68 
69 	if (fragsz == 0)
70 		return (0);
71 
72 	error = dump_append(di, dump_va, 0, fragsz);
73 	fragsz = 0;
74 	return (error);
75 }
76 
77 static int
78 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
79 {
80 	size_t len;
81 	int error, i, c;
82 	u_int maxdumpsz;
83 
84 	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
85 	if (maxdumpsz == 0)	/* seatbelt */
86 		maxdumpsz = PAGE_SIZE;
87 	error = 0;
88 	if ((sz % PAGE_SIZE) != 0) {
89 		printf("size not page aligned\n");
90 		return (EINVAL);
91 	}
92 	if (ptr != NULL && pa != 0) {
93 		printf("cant have both va and pa!\n");
94 		return (EINVAL);
95 	}
96 	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
97 		printf("address not page aligned\n");
98 		return (EINVAL);
99 	}
100 	if (ptr != NULL) {
101 		/* If we're doing a virtual dump, flush any pre-existing pa pages */
102 		error = blk_flush(di);
103 		if (error)
104 			return (error);
105 	}
106 	while (sz) {
107 		len = maxdumpsz - fragsz;
108 		if (len > sz)
109 			len = sz;
110 
111 		dumpsys_pb_progress(len);
112 		wdog_kern_pat(WD_LASTVAL);
113 
114 		if (ptr) {
115 			error = dump_append(di, ptr, 0, len);
116 			if (error)
117 				return (error);
118 			ptr += len;
119 			sz -= len;
120 		} else {
121 			for (i = 0; i < len; i += PAGE_SIZE)
122 				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
123 			fragsz += len;
124 			pa += len;
125 			sz -= len;
126 			if (fragsz == maxdumpsz) {
127 				error = blk_flush(di);
128 				if (error)
129 					return (error);
130 			}
131 		}
132 
133 		/* Check for user abort. */
134 		c = cncheckc();
135 		if (c == 0x03)
136 			return (ECANCELED);
137 		if (c != -1)
138 			printf(" (CTRL-C to abort) ");
139 	}
140 
141 	return (0);
142 }
143 
144 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
145 static pt_entry_t fakept[NPTEPG];
146 
147 #ifdef PMAP_PAE_COMP
148 #define	cpu_minidumpsys		cpu_minidumpsys_pae
149 #define	IdlePTD			IdlePTD_pae
150 #else
151 #define	cpu_minidumpsys		cpu_minidumpsys_nopae
152 #define	IdlePTD			IdlePTD_nopae
153 #endif
154 
155 int
156 cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
157 {
158 	uint64_t dumpsize;
159 	uint32_t ptesize;
160 	vm_offset_t va, kva_end;
161 	int error;
162 	uint64_t pa;
163 	pd_entry_t *pd, pde;
164 	pt_entry_t *pt, pte;
165 	int k;
166 	struct minidumphdr mdhdr;
167 	struct msgbuf *mbp;
168 
169 	/* Snapshot the KVA upper bound in case it grows. */
170 	kva_end = kernel_vm_end;
171 
172 	/*
173 	 * Walk the kernel page table pages, setting the active entries in the
174 	 * dump bitmap.
175 	 *
176 	 * NB: for a live dump, we may be racing with updates to the page
177 	 * tables, so care must be taken to read each entry only once.
178 	 */
179 	ptesize = 0;
180 	for (va = KERNBASE; va < kva_end; va += NBPDR) {
181 		/*
182 		 * We always write a page, even if it is zero. Each
183 		 * page written corresponds to 2MB of space
184 		 */
185 		ptesize += PAGE_SIZE;
186 		pd = IdlePTD;	/* always mapped! */
187 		pde = pte_load(&pd[va >> PDRSHIFT]);
188 		if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
189 			/* This is an entire 2M page. */
190 			pa = pde & PG_PS_FRAME;
191 			for (k = 0; k < NPTEPG; k++) {
192 				if (vm_phys_is_dumpable(pa))
193 					vm_page_dump_add(state->dump_bitset,
194 					    pa);
195 				pa += PAGE_SIZE;
196 			}
197 			continue;
198 		}
199 		if ((pde & PG_V) == PG_V) {
200 			/* set bit for each valid page in this 2MB block */
201 			pt = pmap_kenter_temporary(pde & PG_FRAME, 0);
202 			for (k = 0; k < NPTEPG; k++) {
203 				pte = pte_load(&pt[k]);
204 				if ((pte & PG_V) == PG_V) {
205 					pa = pte & PG_FRAME;
206 					if (vm_phys_is_dumpable(pa))
207 						vm_page_dump_add(
208 						    state->dump_bitset, pa);
209 				}
210 			}
211 		} else {
212 			/* nothing, we're going to dump a null page */
213 		}
214 	}
215 
216 	/* Calculate dump size. */
217 	mbp = state->msgbufp;
218 	dumpsize = ptesize;
219 	dumpsize += round_page(mbp->msg_size);
220 	dumpsize += round_page(sizeof(dump_avail));
221 	dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages));
222 	VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
223 		/* Clear out undumpable pages now if needed */
224 		if (vm_phys_is_dumpable(pa)) {
225 			dumpsize += PAGE_SIZE;
226 		} else {
227 			vm_page_dump_drop(state->dump_bitset, pa);
228 		}
229 	}
230 	dumpsize += PAGE_SIZE;
231 
232 	dumpsys_pb_init(dumpsize);
233 
234 	/* Initialize mdhdr */
235 	bzero(&mdhdr, sizeof(mdhdr));
236 	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
237 	mdhdr.version = MINIDUMP_VERSION;
238 	mdhdr.msgbufsize = mbp->msg_size;
239 	mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages));
240 	mdhdr.ptesize = ptesize;
241 	mdhdr.kernbase = KERNBASE;
242 	mdhdr.paemode = pae_mode;
243 	mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
244 
245 	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION,
246 	    dumpsize);
247 
248 	error = dump_start(di, &kdh);
249 	if (error != 0)
250 		goto fail;
251 
252 	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
253 	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
254 
255 	/* Dump my header */
256 	bzero(&fakept, sizeof(fakept));
257 	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
258 	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
259 	if (error)
260 		goto fail;
261 
262 	/* Dump msgbuf up front */
263 	error = blk_write(di, (char *)mbp->msg_ptr, 0,
264 	    round_page(mbp->msg_size));
265 	if (error)
266 		goto fail;
267 
268 	/* Dump dump_avail */
269 	_Static_assert(sizeof(dump_avail) <= sizeof(fakept),
270 	    "Large dump_avail not handled");
271 	bzero(fakept, sizeof(fakept));
272 	memcpy(fakept, dump_avail, sizeof(dump_avail));
273 	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
274 	if (error)
275 		goto fail;
276 
277 	/* Dump bitmap */
278 	error = blk_write(di, (char *)vm_page_dump, 0,
279 	    round_page(BITSET_SIZE(vm_page_dump_pages)));
280 	if (error)
281 		goto fail;
282 
283 	/* Dump kernel page table pages */
284 	for (va = KERNBASE; va < kva_end; va += NBPDR) {
285 		/* We always write a page, even if it is zero */
286 		pd = IdlePTD;	/* always mapped! */
287 		pde = pte_load(&pd[va >> PDRSHIFT]);
288 		if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
289 			/* This is a single 2M block. Generate a fake PTP */
290 			pa = pde & PG_PS_FRAME;
291 			for (k = 0; k < NPTEPG; k++) {
292 				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
293 			}
294 			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
295 			if (error)
296 				goto fail;
297 			/* flush, in case we reuse fakept in the same block */
298 			error = blk_flush(di);
299 			if (error)
300 				goto fail;
301 			continue;
302 		}
303 		if ((pde & PG_V) == PG_V) {
304 			pa = pde & PG_FRAME;
305 			error = blk_write(di, 0, pa, PAGE_SIZE);
306 			if (error)
307 				goto fail;
308 		} else {
309 			bzero(fakept, sizeof(fakept));
310 			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
311 			if (error)
312 				goto fail;
313 			/* flush, in case we reuse fakept in the same block */
314 			error = blk_flush(di);
315 			if (error)
316 				goto fail;
317 		}
318 	}
319 
320 	/* Dump memory chunks */
321 	VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
322 		error = blk_write(di, 0, pa, PAGE_SIZE);
323 		if (error)
324 			goto fail;
325 	}
326 
327 	error = blk_flush(di);
328 	if (error)
329 		goto fail;
330 
331 	error = dump_finish(di, &kdh);
332 	if (error != 0)
333 		goto fail;
334 
335 	printf("\nDump complete\n");
336 	return (0);
337 
338  fail:
339 	if (error < 0)
340 		error = -error;
341 
342 	if (error == ECANCELED)
343 		printf("\nDump aborted\n");
344 	else if (error == E2BIG || error == ENOSPC) {
345 		printf("\nDump failed. Partition too small (about %lluMB were "
346 		    "needed this time).\n", (long long)dumpsize >> 20);
347 	} else
348 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
349 	return (error);
350 }
351