xref: /freebsd/sys/amd64/amd64/efirt_machdep.c (revision 315ee00f)
1 /*-
2  * Copyright (c) 2004 Marcel Moolenaar
3  * Copyright (c) 2001 Doug Rabson
4  * Copyright (c) 2016 The FreeBSD Foundation
5  * All rights reserved.
6  *
7  * Portions of this software were developed by Konstantin Belousov
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/efi.h>
35 #include <sys/kernel.h>
36 #include <sys/linker.h>
37 #include <sys/lock.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/clock.h>
41 #include <sys/proc.h>
42 #include <sys/rwlock.h>
43 #include <sys/sched.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/vmmeter.h>
47 #include <isa/rtc.h>
48 #include <machine/fpu.h>
49 #include <machine/efi.h>
50 #include <machine/metadata.h>
51 #include <machine/md_var.h>
52 #include <machine/smp.h>
53 #include <machine/vmparam.h>
54 #include <vm/vm.h>
55 #include <vm/pmap.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_map.h>
58 #include <vm/vm_object.h>
59 #include <vm/vm_page.h>
60 #include <vm/vm_pager.h>
61 
62 static pml5_entry_t *efi_pml5;
63 static pml4_entry_t *efi_pml4;
64 static vm_object_t obj_1t1_pt;
65 static vm_page_t efi_pmltop_page;
66 static vm_pindex_t efi_1t1_idx;
67 
68 void
69 efi_destroy_1t1_map(void)
70 {
71 	vm_page_t m;
72 
73 	if (obj_1t1_pt != NULL) {
74 		VM_OBJECT_RLOCK(obj_1t1_pt);
75 		TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq)
76 			m->ref_count = VPRC_OBJREF;
77 		vm_wire_sub(obj_1t1_pt->resident_page_count);
78 		VM_OBJECT_RUNLOCK(obj_1t1_pt);
79 		vm_object_deallocate(obj_1t1_pt);
80 	}
81 
82 	obj_1t1_pt = NULL;
83 	efi_pml4 = NULL;
84 	efi_pml5 = NULL;
85 	efi_pmltop_page = NULL;
86 }
87 
88 /*
89  * Map a physical address from EFI runtime space into KVA space.  Returns 0 to
90  * indicate a failed mapping so that the caller may handle error.
91  */
92 vm_offset_t
93 efi_phys_to_kva(vm_paddr_t paddr)
94 {
95 
96 	if (paddr >= dmaplimit)
97 		return (0);
98 	return (PHYS_TO_DMAP(paddr));
99 }
100 
101 static vm_page_t
102 efi_1t1_page(void)
103 {
104 
105 	return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY |
106 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO));
107 }
108 
109 static pt_entry_t *
110 efi_1t1_pte(vm_offset_t va)
111 {
112 	pml5_entry_t *pml5e;
113 	pml4_entry_t *pml4e;
114 	pdp_entry_t *pdpe;
115 	pd_entry_t *pde;
116 	pt_entry_t *pte;
117 	vm_page_t m;
118 	vm_pindex_t pml5_idx, pml4_idx, pdp_idx, pd_idx;
119 	vm_paddr_t mphys;
120 
121 	pml4_idx = pmap_pml4e_index(va);
122 	if (la57) {
123 		pml5_idx = pmap_pml5e_index(va);
124 		pml5e = &efi_pml5[pml5_idx];
125 		if (*pml5e == 0) {
126 			m = efi_1t1_page();
127 			mphys = VM_PAGE_TO_PHYS(m);
128 			*pml5e = mphys | X86_PG_RW | X86_PG_V;
129 		} else {
130 			mphys = *pml5e & PG_FRAME;
131 		}
132 		pml4e = (pml4_entry_t *)PHYS_TO_DMAP(mphys);
133 		pml4e = &pml4e[pml4_idx];
134 	} else {
135 		pml4e = &efi_pml4[pml4_idx];
136 	}
137 
138 	if (*pml4e == 0) {
139 		m = efi_1t1_page();
140 		mphys =  VM_PAGE_TO_PHYS(m);
141 		*pml4e = mphys | X86_PG_RW | X86_PG_V;
142 	} else {
143 		mphys = *pml4e & PG_FRAME;
144 	}
145 
146 	pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys);
147 	pdp_idx = pmap_pdpe_index(va);
148 	pdpe += pdp_idx;
149 	if (*pdpe == 0) {
150 		m = efi_1t1_page();
151 		mphys =  VM_PAGE_TO_PHYS(m);
152 		*pdpe = mphys | X86_PG_RW | X86_PG_V;
153 	} else {
154 		mphys = *pdpe & PG_FRAME;
155 	}
156 
157 	pde = (pd_entry_t *)PHYS_TO_DMAP(mphys);
158 	pd_idx = pmap_pde_index(va);
159 	pde += pd_idx;
160 	if (*pde == 0) {
161 		m = efi_1t1_page();
162 		mphys = VM_PAGE_TO_PHYS(m);
163 		*pde = mphys | X86_PG_RW | X86_PG_V;
164 	} else {
165 		mphys = *pde & PG_FRAME;
166 	}
167 
168 	pte = (pt_entry_t *)PHYS_TO_DMAP(mphys);
169 	pte += pmap_pte_index(va);
170 	KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte));
171 
172 	return (pte);
173 }
174 
175 bool
176 efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
177 {
178 	struct efi_md *p;
179 	pt_entry_t *pte;
180 	void *pml;
181 	vm_page_t m;
182 	vm_offset_t va;
183 	uint64_t idx;
184 	int bits, i, mode;
185 
186 	obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 +
187 	    NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG),
188 	    VM_PROT_ALL, 0, NULL);
189 	efi_1t1_idx = 0;
190 	VM_OBJECT_WLOCK(obj_1t1_pt);
191 	efi_pmltop_page = efi_1t1_page();
192 	VM_OBJECT_WUNLOCK(obj_1t1_pt);
193 	pml = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pmltop_page));
194 	if (la57) {
195 		efi_pml5 = pml;
196 		pmap_pinit_pml5(efi_pmltop_page);
197 	} else {
198 		efi_pml4 = pml;
199 		pmap_pinit_pml4(efi_pmltop_page);
200 	}
201 
202 	for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p,
203 	    descsz)) {
204 		if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
205 			continue;
206 		if (p->md_virt != 0 && p->md_virt != p->md_phys) {
207 			if (bootverbose)
208 				printf("EFI Runtime entry %d is mapped\n", i);
209 			goto fail;
210 		}
211 		if ((p->md_phys & EFI_PAGE_MASK) != 0) {
212 			if (bootverbose)
213 				printf("EFI Runtime entry %d is not aligned\n",
214 				    i);
215 			goto fail;
216 		}
217 		if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys ||
218 		    p->md_phys + p->md_pages * EFI_PAGE_SIZE >=
219 		    VM_MAXUSER_ADDRESS) {
220 			printf("EFI Runtime entry %d is not in mappable for RT:"
221 			    "base %#016jx %#jx pages\n",
222 			    i, (uintmax_t)p->md_phys,
223 			    (uintmax_t)p->md_pages);
224 			goto fail;
225 		}
226 		if ((p->md_attr & EFI_MD_ATTR_WB) != 0)
227 			mode = VM_MEMATTR_WRITE_BACK;
228 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
229 			mode = VM_MEMATTR_WRITE_THROUGH;
230 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
231 			mode = VM_MEMATTR_WRITE_COMBINING;
232 		else if ((p->md_attr & EFI_MD_ATTR_WP) != 0)
233 			mode = VM_MEMATTR_WRITE_PROTECTED;
234 		else if ((p->md_attr & EFI_MD_ATTR_UC) != 0)
235 			mode = VM_MEMATTR_UNCACHEABLE;
236 		else {
237 			if (bootverbose)
238 				printf("EFI Runtime entry %d mapping "
239 				    "attributes unsupported\n", i);
240 			mode = VM_MEMATTR_UNCACHEABLE;
241 		}
242 		bits = pmap_cache_bits(kernel_pmap, mode, FALSE) | X86_PG_RW |
243 		    X86_PG_V;
244 		VM_OBJECT_WLOCK(obj_1t1_pt);
245 		for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++,
246 		    va += PAGE_SIZE) {
247 			pte = efi_1t1_pte(va);
248 			pte_store(pte, va | bits);
249 
250 			m = PHYS_TO_VM_PAGE(va);
251 			if (m != NULL && VM_PAGE_TO_PHYS(m) == 0) {
252 				vm_page_init_page(m, va, -1);
253 				m->order = VM_NFREEORDER + 1; /* invalid */
254 				m->pool = VM_NFREEPOOL + 1; /* invalid */
255 				pmap_page_set_memattr_noflush(m, mode);
256 			}
257 		}
258 		VM_OBJECT_WUNLOCK(obj_1t1_pt);
259 	}
260 
261 	return (true);
262 
263 fail:
264 	efi_destroy_1t1_map();
265 	return (false);
266 }
267 
268 /*
269  * Create an environment for the EFI runtime code call.  The most
270  * important part is creating the required 1:1 physical->virtual
271  * mappings for the runtime segments.  To do that, we manually create
272  * page table which unmap userspace but gives correct kernel mapping.
273  * The 1:1 mappings for runtime segments usually occupy low 4G of the
274  * physical address map.
275  *
276  * The 1:1 mappings were chosen over the SetVirtualAddressMap() EFI RT
277  * service, because there are some BIOSes which fail to correctly
278  * relocate itself on the call, requiring both 1:1 and virtual
279  * mapping.  As result, we must provide 1:1 mapping anyway, so no
280  * reason to bother with the virtual map, and no need to add a
281  * complexity into loader.
282  *
283  * There is no need to disable interrupts around the change of %cr3,
284  * the kernel mappings are correct, while we only grabbed the
285  * userspace portion of VA.  Interrupts handlers must not access
286  * userspace.  Having interrupts enabled fixes the issue with
287  * firmware/SMM long operation, which would negatively affect IPIs,
288  * esp. TLB shootdown requests.
289  */
290 int
291 efi_arch_enter(void)
292 {
293 	pmap_t curpmap;
294 	uint64_t cr3;
295 
296 	curpmap = PCPU_GET(curpmap);
297 	PMAP_LOCK_ASSERT(curpmap, MA_OWNED);
298 	curthread->td_md.md_efirt_dis_pf = vm_fault_disable_pagefaults();
299 
300 	/*
301 	 * IPI TLB shootdown handler invltlb_pcid_handler() reloads
302 	 * %cr3 from the curpmap->pm_cr3, which would disable runtime
303 	 * segments mappings.  Block the handler's action by setting
304 	 * curpmap to impossible value.  See also comment in
305 	 * pmap.c:pmap_activate_sw().
306 	 */
307 	if (pmap_pcid_enabled && !invpcid_works)
308 		PCPU_SET(curpmap, NULL);
309 
310 	cr3 = VM_PAGE_TO_PHYS(efi_pmltop_page);
311 	if (pmap_pcid_enabled)
312 		cr3 |= pmap_get_pcid(curpmap);
313 	load_cr3(cr3);
314 	/*
315 	 * If PCID is enabled, the clear CR3_PCID_SAVE bit in the loaded %cr3
316 	 * causes TLB invalidation.
317 	 */
318 	if (!pmap_pcid_enabled)
319 		invltlb();
320 	return (0);
321 }
322 
323 void
324 efi_arch_leave(void)
325 {
326 	pmap_t curpmap;
327 	uint64_t cr3;
328 
329 	curpmap = &curproc->p_vmspace->vm_pmap;
330 	cr3 = curpmap->pm_cr3;
331 	if (pmap_pcid_enabled) {
332 		cr3 |= pmap_get_pcid(curpmap);
333 		if (!invpcid_works)
334 			PCPU_SET(curpmap, curpmap);
335 	}
336 	load_cr3(cr3);
337 	if (!pmap_pcid_enabled)
338 		invltlb();
339 	vm_fault_enable_pagefaults(curthread->td_md.md_efirt_dis_pf);
340 }
341 
342 /* XXX debug stuff */
343 static int
344 efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS)
345 {
346 	struct efi_tm tm;
347 	int error, val;
348 
349 	val = 0;
350 	error = sysctl_handle_int(oidp, &val, 0, req);
351 	if (error != 0 || req->newptr == NULL)
352 		return (error);
353 	error = efi_get_time(&tm);
354 	if (error == 0) {
355 		uprintf("EFI reports: Year %d Month %d Day %d Hour %d Min %d "
356 		    "Sec %d\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour,
357 		    tm.tm_min, tm.tm_sec);
358 	}
359 	return (error);
360 }
361 
362 SYSCTL_PROC(_debug, OID_AUTO, efi_time,
363     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
364     efi_time_sysctl_handler, "I",
365     "");
366