1 /*-
2  * Copyright (c) 2006 Peter Wemm
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * AMD64 machine dependent routines for kvm and minidumps.
28  */
29 
30 #include <sys/user.h>	   /* MUST BE FIRST */
31 #include <sys/param.h>
32 #include <sys/proc.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/fnv_hash.h>
36 #include <strings.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <nlist.h>
41 
42 #include <cpu/pmap.h>
43 #include <vm/vm.h>
44 #include <vm/vm_param.h>
45 
46 #include <machine/elf.h>
47 #include <machine/cpufunc.h>
48 #include <machine/minidump.h>
49 
50 #include <limits.h>
51 
52 #include "kvm.h"
53 #include "kvm_private.h"
54 
55 struct hpte {
56 	struct hpte *next;
57 	vm_paddr_t pa;
58 	int64_t off;
59 };
60 
61 #define HPT_SIZE 1024
62 
63 /* minidump must be the first item! */
64 struct vmstate {
65 	int minidump;		/* 1 = minidump mode */
66 	int pgtable;		/* pagetable mode */
67 	void *hpt_head[HPT_SIZE];
68 	uint64_t *bitmap;
69 	uint64_t *ptemap;
70 	uint64_t kernbase;
71 	uint64_t dmapbase;
72 	uint64_t dmapend;
73 	uint64_t bitmapsize;
74 };
75 
76 static void
77 hpt_insert(kvm_t *kd, vm_paddr_t pa, int64_t off)
78 {
79 	struct hpte *hpte;
80 	uint32_t fnv = FNV1_32_INIT;
81 
82 	fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
83 	fnv &= (HPT_SIZE - 1);
84 	hpte = malloc(sizeof(*hpte));
85 	hpte->pa = pa;
86 	hpte->off = off;
87 	hpte->next = kd->vmst->hpt_head[fnv];
88 	kd->vmst->hpt_head[fnv] = hpte;
89 }
90 
91 static int64_t
92 hpt_find(kvm_t *kd, vm_paddr_t pa)
93 {
94 	struct hpte *hpte;
95 	uint32_t fnv = FNV1_32_INIT;
96 
97 	fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
98 	fnv &= (HPT_SIZE - 1);
99 	for (hpte = kd->vmst->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) {
100 		if (pa == hpte->pa)
101 			return (hpte->off);
102 	}
103 	return (-1);
104 }
105 
106 static int
107 inithash(kvm_t *kd, uint64_t *base, int len, off_t off)
108 {
109 	uint64_t idx;
110 	uint64_t bit, bits;
111 	vm_paddr_t pa;
112 
113 	for (idx = 0; idx < len / sizeof(*base); idx++) {
114 		bits = base[idx];
115 		while (bits) {
116 			bit = bsfq(bits);
117 			bits &= ~(1ul << bit);
118 			pa = (idx * sizeof(*base) * NBBY + bit) * PAGE_SIZE;
119 			hpt_insert(kd, pa, off);
120 			off += PAGE_SIZE;
121 		}
122 	}
123 	return (off);
124 }
125 
126 void
127 _kvm_minidump_freevtop(kvm_t *kd)
128 {
129 	struct vmstate *vm = kd->vmst;
130 
131 	if (vm->bitmap)
132 		free(vm->bitmap);
133 	if (vm->ptemap)
134 		free(vm->ptemap);
135 	free(vm);
136 	kd->vmst = NULL;
137 }
138 
139 static int _kvm_minidump_init_hdr1(kvm_t *kd, struct vmstate *vmst,
140 			struct minidumphdr1 *hdr);
141 static int _kvm_minidump_init_hdr2(kvm_t *kd, struct vmstate *vmst,
142 			struct minidumphdr2 *hdr);
143 
144 int
145 _kvm_minidump_initvtop(kvm_t *kd)
146 {
147 	struct vmstate *vmst;
148 	int error;
149 	union {
150 		struct minidumphdr1 hdr1;
151 		struct minidumphdr2 hdr2;
152 	} u;
153 
154 	vmst = _kvm_malloc(kd, sizeof(*vmst));
155 	if (vmst == NULL) {
156 		_kvm_err(kd, kd->program, "cannot allocate vm");
157 		return (-1);
158 	}
159 	kd->vmst = vmst;
160 	bzero(vmst, sizeof(*vmst));
161 	vmst->minidump = 1;
162 
163 	if (pread(kd->pmfd, &u, sizeof(u), 0) != sizeof(u)) {
164 		_kvm_err(kd, kd->program, "cannot read dump header");
165 		return (-1);
166 	}
167 	if (strncmp(MINIDUMP1_MAGIC, u.hdr1.magic, sizeof(u.hdr1.magic)) == 0 &&
168 	    u.hdr1.version == MINIDUMP1_VERSION) {
169 		error = _kvm_minidump_init_hdr1(kd, vmst, &u.hdr1);
170 	} else
171 	if (strncmp(MINIDUMP2_MAGIC, u.hdr1.magic, sizeof(u.hdr1.magic)) == 0 &&
172 	    u.hdr2.version == MINIDUMP2_VERSION) {
173 		error = _kvm_minidump_init_hdr2(kd, vmst, &u.hdr2);
174 	} else {
175 		_kvm_err(kd, kd->program, "not a minidump for this platform");
176 		error = -1;
177 	}
178 	return error;
179 }
180 
181 static
182 int
183 _kvm_minidump_init_hdr1(kvm_t *kd, struct vmstate *vmst,
184 			struct minidumphdr1 *hdr)
185 {
186 	off_t off;
187 
188 	/* Skip header and msgbuf */
189 	off = PAGE_SIZE + round_page(hdr->msgbufsize);
190 
191 	vmst->bitmap = _kvm_malloc(kd, hdr->bitmapsize);
192 	if (vmst->bitmap == NULL) {
193 		_kvm_err(kd, kd->program,
194 			 "cannot allocate %jd bytes for bitmap",
195 			 (intmax_t)hdr->bitmapsize);
196 		return (-1);
197 	}
198 	if (pread(kd->pmfd, vmst->bitmap, hdr->bitmapsize, off) !=
199 	    hdr->bitmapsize) {
200 		_kvm_err(kd, kd->program,
201 			 "cannot read %jd bytes for page bitmap",
202 			 (intmax_t)hdr->bitmapsize);
203 		return (-1);
204 	}
205 	off += round_page(vmst->bitmapsize);
206 
207 	vmst->ptemap = _kvm_malloc(kd, hdr->ptesize);
208 	if (vmst->ptemap == NULL) {
209 		_kvm_err(kd, kd->program,
210 			 "cannot allocate %jd bytes for ptemap",
211 			 (intmax_t)hdr->ptesize);
212 		return (-1);
213 	}
214 	if (pread(kd->pmfd, vmst->ptemap, hdr->ptesize, off) !=
215 	    hdr->ptesize) {
216 		_kvm_err(kd, kd->program,
217 			 "cannot read %jd bytes for ptemap",
218 			 (intmax_t)hdr->ptesize);
219 		return (-1);
220 	}
221 	off += hdr->ptesize;
222 
223 	vmst->kernbase = hdr->kernbase;
224 	vmst->dmapbase = hdr->dmapbase;
225 	vmst->dmapend = hdr->dmapend;
226 	vmst->bitmapsize = hdr->bitmapsize;
227 	vmst->pgtable = 0;
228 
229 	/* build physical address hash table for sparse pages */
230 	inithash(kd, vmst->bitmap, hdr->bitmapsize, off);
231 
232 	return (0);
233 }
234 
235 static
236 int
237 _kvm_minidump_init_hdr2(kvm_t *kd, struct vmstate *vmst,
238 			struct minidumphdr2 *hdr)
239 {
240 	off_t off;
241 
242 	/* Skip header and msgbuf */
243 	off = PAGE_SIZE + round_page(hdr->msgbufsize);
244 
245 	vmst->bitmap = _kvm_malloc(kd, hdr->bitmapsize);
246 	if (vmst->bitmap == NULL) {
247 		_kvm_err(kd, kd->program,
248 			 "cannot allocate %jd bytes for bitmap",
249 			 (intmax_t)hdr->bitmapsize);
250 		return (-1);
251 	}
252 	if (pread(kd->pmfd, vmst->bitmap, hdr->bitmapsize, off) !=
253 	    (intmax_t)hdr->bitmapsize) {
254 		_kvm_err(kd, kd->program,
255 			 "cannot read %jd bytes for page bitmap",
256 			 (intmax_t)hdr->bitmapsize);
257 		return (-1);
258 	}
259 	off += round_page(hdr->bitmapsize);
260 
261 	vmst->ptemap = _kvm_malloc(kd, hdr->ptesize);
262 	if (vmst->ptemap == NULL) {
263 		_kvm_err(kd, kd->program,
264 			 "cannot allocate %jd bytes for ptemap",
265 			 (intmax_t)hdr->ptesize);
266 		return (-1);
267 	}
268 	if (pread(kd->pmfd, vmst->ptemap, hdr->ptesize, off) !=
269 	    (intmax_t)hdr->ptesize) {
270 		_kvm_err(kd, kd->program,
271 			 "cannot read %jd bytes for ptemap",
272 			 (intmax_t)hdr->ptesize);
273 		return (-1);
274 	}
275 	off += hdr->ptesize;
276 
277 	vmst->kernbase = hdr->kernbase;
278 	vmst->dmapbase = hdr->dmapbase;
279 	vmst->bitmapsize = hdr->bitmapsize;
280 	vmst->pgtable = 1;
281 
282 	/* build physical address hash table for sparse pages */
283 	inithash(kd, vmst->bitmap, hdr->bitmapsize, off);
284 
285 	return (0);
286 }
287 
288 static int
289 _kvm_minidump_vatop(kvm_t *kd, u_long va, off_t *pa)
290 {
291 	struct vmstate *vm;
292 	u_long offset;
293 	pt_entry_t pte;
294 	u_long pteindex;
295 	u_long a;
296 	off_t ofs;
297 
298 	vm = kd->vmst;
299 	offset = va & (PAGE_SIZE - 1);
300 	va -= offset;			/* put va on page boundary */
301 
302 	if (va >= vm->kernbase) {
303 		switch (vm->pgtable) {
304 		case 0:
305 			/*
306 			 * Page tables are specifically dumped (old style)
307 			 */
308 			pteindex = (va - vm->kernbase) >> PAGE_SHIFT;
309 			pte = vm->ptemap[pteindex];
310 			if (((u_long)pte & X86_PG_V) == 0) {
311 				_kvm_err(kd, kd->program,
312 					 "_kvm_vatop: pte not valid");
313 				goto invalid;
314 			}
315 			a = pte & PG_FRAME;
316 			break;
317 		case 1:
318 			/*
319 			 * Kernel page table pages are included in the
320 			 * sparse map.  We only dump the contents of
321 			 * the PDs (zero-filling any empty entries).
322 			 *
323 			 * Index of PD entry in PDP & PDP in PML4E together.
324 			 *
325 			 * First shift by 30 (1GB) - gives us an index
326 			 * into PD entries.  We do not PDP entries in the
327 			 * PML4E, so there are 512 * 512 PD entries possible.
328 			 */
329 			pteindex = (va >> PDPSHIFT) & (512 * 512 - 1);
330 			pte = vm->ptemap[pteindex];
331 			if ((pte & X86_PG_V) == 0) {
332 				_kvm_err(kd, kd->program,
333 					 "_kvm_vatop: pd not valid");
334 				goto invalid;
335 			}
336 			if (pte & X86_PG_PS) {		/* 1GB pages */
337 				a = (pte & PG_PS_FRAME) +
338 				    (va & (1024 * 1024 * 1024 - 1));
339 				break;
340 			}
341 							/* PD page */
342 			ofs = hpt_find(kd, pte & PG_FRAME);
343 			if (ofs == -1) {
344 				_kvm_err(kd, kd->program,
345 					 "_kvm_vatop: no phys page for pd");
346 				goto invalid;
347 			}
348 
349 			/*
350 			 * Index of PT entry in PD
351 			 */
352 			pteindex = (va >> PDRSHIFT) & 511;
353 			if (pread(kd->pmfd, &pte, sizeof(pte),
354 			      ofs + pteindex * sizeof(pte)) != sizeof(pte)) {
355 				_kvm_err(kd, kd->program,
356 					 "_kvm_vatop: pd lookup not valid");
357 				goto invalid;
358 			}
359 			if ((pte & X86_PG_V) == 0) {
360 				_kvm_err(kd, kd->program,
361 					 "_kvm_vatop: pt not valid");
362 				goto invalid;
363 			}
364 			if (pte & X86_PG_PS) {		/* 2MB pages */
365 				a = (pte & PG_PS_FRAME) +
366 				    (va & (2048 * 1024 - 1));
367 				break;
368 			}
369 			ofs = hpt_find(kd, pte & PG_FRAME);
370 			if (ofs == -1) {
371 				_kvm_err(kd, kd->program,
372 					 "_kvm_vatop: no phys page for pt");
373 				goto invalid;
374 			}
375 
376 			/*
377 			 * Index of pte entry in PT
378 			 */
379 			pteindex = (va >> PAGE_SHIFT) & 511;
380 			if (pread(kd->pmfd, &pte, sizeof(pte),
381 			      ofs + pteindex * sizeof(pte)) != sizeof(pte)) {
382 				_kvm_err(kd, kd->program,
383 					 "_kvm_vatop: pte lookup not valid");
384 				goto invalid;
385 			}
386 
387 			/*
388 			 * Calculate end page
389 			 */
390 			a = pte & PG_FRAME;
391 			break;
392 		default:
393 			_kvm_err(kd, kd->program,
394 				 "_kvm_vatop: bad pgtable mode ");
395 			goto invalid;
396 		}
397 		ofs = hpt_find(kd, a);
398 		if (ofs == -1) {
399 			_kvm_err(kd, kd->program, "_kvm_vatop: physical address 0x%lx not in minidump", a);
400 			goto invalid;
401 		}
402 		*pa = ofs + offset;
403 		return (PAGE_SIZE - offset);
404 	} else if (va >= vm->dmapbase && va < vm->dmapend) {
405 		a = (va - vm->dmapbase) & ~PAGE_MASK;
406 		ofs = hpt_find(kd, a);
407 		if (ofs == -1) {
408 			_kvm_err(kd, kd->program, "_kvm_vatop: direct map address 0x%lx not in minidump", va);
409 			goto invalid;
410 		}
411 		*pa = ofs + offset;
412 		return (PAGE_SIZE - offset);
413 	} else {
414 		_kvm_err(kd, kd->program, "_kvm_vatop: virtual address 0x%lx not minidumped", va);
415 		goto invalid;
416 	}
417 
418 invalid:
419 	_kvm_err(kd, 0, "invalid address (0x%lx)", va);
420 	return (0);
421 }
422 
423 int
424 _kvm_minidump_kvatop(kvm_t *kd, u_long va, off_t *pa)
425 {
426 	if (kvm_ishost(kd)) {
427 		_kvm_err(kd, 0, "kvm_vatop called in live kernel!");
428 		return((off_t)0);
429 	}
430 
431 	return (_kvm_minidump_vatop(kd, va, pa));
432 }
433