1 /*-
2  * Copyright (c) 2006 Peter Wemm
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * AMD64 machine dependent routines for kvm and minidumps.
28  */
29 
30 #include <sys/user.h>	   /* MUST BE FIRST */
31 #include <sys/param.h>
32 #include <sys/proc.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/fnv_hash.h>
36 #include <strings.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <nlist.h>
41 
42 #include <vm/vm.h>
43 #include <vm/vm_param.h>
44 
45 #include <machine/elf.h>
46 #include <machine/cpufunc.h>
47 #include <machine/minidump.h>
48 
49 #include <limits.h>
50 
51 #include "kvm.h"
52 #include "kvm_private.h"
53 
54 struct hpte {
55 	struct hpte *next;
56 	vm_paddr_t pa;
57 	int64_t off;
58 };
59 
60 #define HPT_SIZE 1024
61 
62 /* minidump must be the first item! */
63 struct vmstate {
64 	int minidump;		/* 1 = minidump mode */
65 	int pgtable;		/* pagetable mode */
66 	void *hpt_head[HPT_SIZE];
67 	uint64_t *bitmap;
68 	uint64_t *ptemap;
69 	uint64_t kernbase;
70 	uint64_t dmapbase;
71 	uint64_t dmapend;
72 	uint64_t bitmapsize;
73 };
74 
75 static void
76 hpt_insert(kvm_t *kd, vm_paddr_t pa, int64_t off)
77 {
78 	struct hpte *hpte;
79 	uint32_t fnv = FNV1_32_INIT;
80 
81 	fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
82 	fnv &= (HPT_SIZE - 1);
83 	hpte = malloc(sizeof(*hpte));
84 	hpte->pa = pa;
85 	hpte->off = off;
86 	hpte->next = kd->vmst->hpt_head[fnv];
87 	kd->vmst->hpt_head[fnv] = hpte;
88 }
89 
90 static int64_t
91 hpt_find(kvm_t *kd, vm_paddr_t pa)
92 {
93 	struct hpte *hpte;
94 	uint32_t fnv = FNV1_32_INIT;
95 
96 	fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
97 	fnv &= (HPT_SIZE - 1);
98 	for (hpte = kd->vmst->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) {
99 		if (pa == hpte->pa)
100 			return (hpte->off);
101 	}
102 	return (-1);
103 }
104 
105 static int
106 inithash(kvm_t *kd, uint64_t *base, int len, off_t off)
107 {
108 	uint64_t idx;
109 	uint64_t bit, bits;
110 	vm_paddr_t pa;
111 
112 	for (idx = 0; idx < len / sizeof(*base); idx++) {
113 		bits = base[idx];
114 		while (bits) {
115 			bit = bsfq(bits);
116 			bits &= ~(1ul << bit);
117 			pa = (idx * sizeof(*base) * NBBY + bit) * PAGE_SIZE;
118 			hpt_insert(kd, pa, off);
119 			off += PAGE_SIZE;
120 		}
121 	}
122 	return (off);
123 }
124 
125 void
126 _kvm_minidump_freevtop(kvm_t *kd)
127 {
128 	struct vmstate *vm = kd->vmst;
129 
130 	if (vm->bitmap)
131 		free(vm->bitmap);
132 	if (vm->ptemap)
133 		free(vm->ptemap);
134 	free(vm);
135 	kd->vmst = NULL;
136 }
137 
138 static int _kvm_minidump_init_hdr1(kvm_t *kd, struct vmstate *vmst,
139 			struct minidumphdr1 *hdr);
140 static int _kvm_minidump_init_hdr2(kvm_t *kd, struct vmstate *vmst,
141 			struct minidumphdr2 *hdr);
142 
143 int
144 _kvm_minidump_initvtop(kvm_t *kd)
145 {
146 	struct vmstate *vmst;
147 	int error;
148 	union {
149 		struct minidumphdr1 hdr1;
150 		struct minidumphdr2 hdr2;
151 	} u;
152 
153 	vmst = _kvm_malloc(kd, sizeof(*vmst));
154 	if (vmst == NULL) {
155 		_kvm_err(kd, kd->program, "cannot allocate vm");
156 		return (-1);
157 	}
158 	kd->vmst = vmst;
159 	bzero(vmst, sizeof(*vmst));
160 	vmst->minidump = 1;
161 
162 	if (pread(kd->pmfd, &u, sizeof(u), 0) != sizeof(u)) {
163 		_kvm_err(kd, kd->program, "cannot read dump header");
164 		return (-1);
165 	}
166 	if (strncmp(MINIDUMP1_MAGIC, u.hdr1.magic, sizeof(u.hdr1.magic)) == 0 &&
167 	    u.hdr1.version == MINIDUMP1_VERSION) {
168 		error = _kvm_minidump_init_hdr1(kd, vmst, &u.hdr1);
169 	} else
170 	if (strncmp(MINIDUMP2_MAGIC, u.hdr1.magic, sizeof(u.hdr1.magic)) == 0 &&
171 	    u.hdr2.version == MINIDUMP2_VERSION) {
172 		error = _kvm_minidump_init_hdr2(kd, vmst, &u.hdr2);
173 	} else {
174 		_kvm_err(kd, kd->program, "not a minidump for this platform");
175 		error = -1;
176 	}
177 	return error;
178 }
179 
180 static
181 int
182 _kvm_minidump_init_hdr1(kvm_t *kd, struct vmstate *vmst,
183 			struct minidumphdr1 *hdr)
184 {
185 	off_t off;
186 
187 	/* Skip header and msgbuf */
188 	off = PAGE_SIZE + round_page(hdr->msgbufsize);
189 
190 	vmst->bitmap = _kvm_malloc(kd, hdr->bitmapsize);
191 	if (vmst->bitmap == NULL) {
192 		_kvm_err(kd, kd->program,
193 			 "cannot allocate %jd bytes for bitmap",
194 			 (intmax_t)hdr->bitmapsize);
195 		return (-1);
196 	}
197 	if (pread(kd->pmfd, vmst->bitmap, hdr->bitmapsize, off) !=
198 	    hdr->bitmapsize) {
199 		_kvm_err(kd, kd->program,
200 			 "cannot read %jd bytes for page bitmap",
201 			 (intmax_t)hdr->bitmapsize);
202 		return (-1);
203 	}
204 	off += round_page(vmst->bitmapsize);
205 
206 	vmst->ptemap = _kvm_malloc(kd, hdr->ptesize);
207 	if (vmst->ptemap == NULL) {
208 		_kvm_err(kd, kd->program,
209 			 "cannot allocate %jd bytes for ptemap",
210 			 (intmax_t)hdr->ptesize);
211 		return (-1);
212 	}
213 	if (pread(kd->pmfd, vmst->ptemap, hdr->ptesize, off) !=
214 	    hdr->ptesize) {
215 		_kvm_err(kd, kd->program,
216 			 "cannot read %jd bytes for ptemap",
217 			 (intmax_t)hdr->ptesize);
218 		return (-1);
219 	}
220 	off += hdr->ptesize;
221 
222 	vmst->kernbase = hdr->kernbase;
223 	vmst->dmapbase = hdr->dmapbase;
224 	vmst->dmapend = hdr->dmapend;
225 	vmst->bitmapsize = hdr->bitmapsize;
226 	vmst->pgtable = 0;
227 
228 	/* build physical address hash table for sparse pages */
229 	inithash(kd, vmst->bitmap, hdr->bitmapsize, off);
230 
231 	return (0);
232 }
233 
234 static
235 int
236 _kvm_minidump_init_hdr2(kvm_t *kd, struct vmstate *vmst,
237 			struct minidumphdr2 *hdr)
238 {
239 	off_t off;
240 
241 	/* Skip header and msgbuf */
242 	off = PAGE_SIZE + round_page(hdr->msgbufsize);
243 
244 	vmst->bitmap = _kvm_malloc(kd, hdr->bitmapsize);
245 	if (vmst->bitmap == NULL) {
246 		_kvm_err(kd, kd->program,
247 			 "cannot allocate %jd bytes for bitmap",
248 			 (intmax_t)hdr->bitmapsize);
249 		return (-1);
250 	}
251 	if (pread(kd->pmfd, vmst->bitmap, hdr->bitmapsize, off) !=
252 	    (intmax_t)hdr->bitmapsize) {
253 		_kvm_err(kd, kd->program,
254 			 "cannot read %jd bytes for page bitmap",
255 			 (intmax_t)hdr->bitmapsize);
256 		return (-1);
257 	}
258 	off += round_page(hdr->bitmapsize);
259 
260 	vmst->ptemap = _kvm_malloc(kd, hdr->ptesize);
261 	if (vmst->ptemap == NULL) {
262 		_kvm_err(kd, kd->program,
263 			 "cannot allocate %jd bytes for ptemap",
264 			 (intmax_t)hdr->ptesize);
265 		return (-1);
266 	}
267 	if (pread(kd->pmfd, vmst->ptemap, hdr->ptesize, off) !=
268 	    (intmax_t)hdr->ptesize) {
269 		_kvm_err(kd, kd->program,
270 			 "cannot read %jd bytes for ptemap",
271 			 (intmax_t)hdr->ptesize);
272 		return (-1);
273 	}
274 	off += hdr->ptesize;
275 
276 	vmst->kernbase = hdr->kernbase;
277 	vmst->dmapbase = hdr->dmapbase;
278 	vmst->bitmapsize = hdr->bitmapsize;
279 	vmst->pgtable = 1;
280 
281 	/* build physical address hash table for sparse pages */
282 	inithash(kd, vmst->bitmap, hdr->bitmapsize, off);
283 
284 	return (0);
285 }
286 
287 static int
288 _kvm_minidump_vatop(kvm_t *kd, u_long va, off_t *pa)
289 {
290 	struct vmstate *vm;
291 	u_long offset;
292 	pt_entry_t pte;
293 	u_long pteindex;
294 	u_long a;
295 	off_t ofs;
296 
297 	vm = kd->vmst;
298 	offset = va & (PAGE_SIZE - 1);
299 
300 	if (va >= vm->kernbase) {
301 		switch (vm->pgtable) {
302 		case 0:
303 			/*
304 			 * Page tables are specifically dumped (old style)
305 			 */
306 			pteindex = (va - vm->kernbase) >> PAGE_SHIFT;
307 			pte = vm->ptemap[pteindex];
308 			if (((u_long)pte & X86_PG_V) == 0) {
309 				_kvm_err(kd, kd->program,
310 					 "_kvm_vatop: pte not valid");
311 				goto invalid;
312 			}
313 			a = pte & PG_FRAME;
314 			break;
315 		case 1:
316 			/*
317 			 * Kernel page table pages are included in the
318 			 * sparse map.  We only dump the contents of
319 			 * the PDs (zero-filling any empty entries).
320 			 *
321 			 * Index of PD entry in PDP & PDP in PML4E together.
322 			 *
323 			 * First shift by 30 (1GB) - gives us an index
324 			 * into PD entries.  We do not PDP entries in the
325 			 * PML4E, so there are 512 * 512 PD entries possible.
326 			 */
327 			pteindex = (va >> PDPSHIFT) & (512 * 512 - 1);
328 			pte = vm->ptemap[pteindex];
329 			if ((pte & X86_PG_V) == 0) {
330 				_kvm_err(kd, kd->program,
331 					 "_kvm_vatop: pd not valid");
332 				goto invalid;
333 			}
334 			if (pte & X86_PG_PS) {		/* 1GB pages */
335 				pte += va & (1024 * 1024 * 1024 - 1);
336 				goto shortcut;
337 			}
338 			ofs = hpt_find(kd, pte & PG_FRAME);
339 			if (ofs == -1) {
340 				_kvm_err(kd, kd->program,
341 					 "_kvm_vatop: no phys page for pd");
342 				goto invalid;
343 			}
344 
345 			/*
346 			 * Index of PT entry in PD
347 			 */
348 			pteindex = (va >> PDRSHIFT) & 511;
349 			if (pread(kd->pmfd, &pte, sizeof(pte),
350 			      ofs + pteindex * sizeof(pte)) != sizeof(pte)) {
351 				_kvm_err(kd, kd->program,
352 					 "_kvm_vatop: pd lookup not valid");
353 				goto invalid;
354 			}
355 			if ((pte & X86_PG_V) == 0) {
356 				_kvm_err(kd, kd->program,
357 					 "_kvm_vatop: pt not valid");
358 				goto invalid;
359 			}
360 			if (pte & X86_PG_PS) {		/* 2MB pages */
361 				pte += va & (2048 * 1024 - 1);
362 				goto shortcut;
363 			}
364 			ofs = hpt_find(kd, pte & PG_FRAME);
365 			if (ofs == -1) {
366 				_kvm_err(kd, kd->program,
367 					 "_kvm_vatop: no phys page for pt");
368 				goto invalid;
369 			}
370 
371 			/*
372 			 * Index of pte entry in PT
373 			 */
374 			pteindex = (va >> PAGE_SHIFT) & 511;
375 			if (pread(kd->pmfd, &pte, sizeof(pte),
376 			      ofs + pteindex * sizeof(pte)) != sizeof(pte)) {
377 				_kvm_err(kd, kd->program,
378 					 "_kvm_vatop: pte lookup not valid");
379 				goto invalid;
380 			}
381 
382 			/*
383 			 * Calculate end page
384 			 */
385 shortcut:
386 			a = pte & PG_FRAME;
387 			break;
388 		default:
389 			_kvm_err(kd, kd->program,
390 				 "_kvm_vatop: bad pgtable mode ");
391 			goto invalid;
392 		}
393 		ofs = hpt_find(kd, a);
394 		if (ofs == -1) {
395 			_kvm_err(kd, kd->program, "_kvm_vatop: physical address 0x%lx not in minidump", a);
396 			goto invalid;
397 		}
398 		*pa = ofs + offset;
399 		return (PAGE_SIZE - offset);
400 	} else if (va >= vm->dmapbase && va < vm->dmapend) {
401 		a = (va - vm->dmapbase) & ~PAGE_MASK;
402 		ofs = hpt_find(kd, a);
403 		if (ofs == -1) {
404 			_kvm_err(kd, kd->program, "_kvm_vatop: direct map address 0x%lx not in minidump", va);
405 			goto invalid;
406 		}
407 		*pa = ofs + offset;
408 		return (PAGE_SIZE - offset);
409 	} else {
410 		_kvm_err(kd, kd->program, "_kvm_vatop: virtual address 0x%lx not minidumped", va);
411 		goto invalid;
412 	}
413 
414 invalid:
415 	_kvm_err(kd, 0, "invalid address (0x%lx)", va);
416 	return (0);
417 }
418 
419 int
420 _kvm_minidump_kvatop(kvm_t *kd, u_long va, off_t *pa)
421 {
422 	if (kvm_ishost(kd)) {
423 		_kvm_err(kd, 0, "kvm_vatop called in live kernel!");
424 		return((off_t)0);
425 	}
426 
427 	return (_kvm_minidump_vatop(kd, va, pa));
428 }
429