1 /*- 2 * Copyright (c) 2004 Marcel Moolenaar 3 * Copyright (c) 2001 Doug Rabson 4 * Copyright (c) 2016 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/efi.h> 37 #include <sys/kernel.h> 38 #include <sys/linker.h> 39 #include <sys/lock.h> 40 #include <sys/module.h> 41 #include <sys/mutex.h> 42 #include <sys/clock.h> 43 #include <sys/proc.h> 44 #include <sys/rwlock.h> 45 #include <sys/sched.h> 46 #include <sys/sysctl.h> 47 #include <sys/systm.h> 48 #include <sys/vmmeter.h> 49 #include <isa/rtc.h> 50 #include <machine/fpu.h> 51 #include <machine/efi.h> 52 #include <machine/metadata.h> 53 #include <machine/md_var.h> 54 #include <machine/smp.h> 55 #include <machine/vmparam.h> 56 #include <vm/vm.h> 57 #include <vm/pmap.h> 58 #include <vm/vm_extern.h> 59 #include <vm/vm_map.h> 60 #include <vm/vm_object.h> 61 #include <vm/vm_page.h> 62 #include <vm/vm_pager.h> 63 64 static pml5_entry_t *efi_pml5; 65 static pml4_entry_t *efi_pml4; 66 static vm_object_t obj_1t1_pt; 67 static vm_page_t efi_pmltop_page; 68 static vm_pindex_t efi_1t1_idx; 69 70 void 71 efi_destroy_1t1_map(void) 72 { 73 vm_page_t m; 74 75 if (obj_1t1_pt != NULL) { 76 VM_OBJECT_RLOCK(obj_1t1_pt); 77 TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) 78 m->ref_count = VPRC_OBJREF; 79 vm_wire_sub(obj_1t1_pt->resident_page_count); 80 VM_OBJECT_RUNLOCK(obj_1t1_pt); 81 vm_object_deallocate(obj_1t1_pt); 82 } 83 84 obj_1t1_pt = NULL; 85 efi_pml4 = NULL; 86 efi_pml5 = NULL; 87 efi_pmltop_page = NULL; 88 } 89 90 /* 91 * Map a physical address from EFI runtime space into KVA space. Returns 0 to 92 * indicate a failed mapping so that the caller may handle error. 93 */ 94 vm_offset_t 95 efi_phys_to_kva(vm_paddr_t paddr) 96 { 97 98 if (paddr >= dmaplimit) 99 return (0); 100 return (PHYS_TO_DMAP(paddr)); 101 } 102 103 static vm_page_t 104 efi_1t1_page(void) 105 { 106 107 return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY | 108 VM_ALLOC_WIRED | VM_ALLOC_ZERO)); 109 } 110 111 static pt_entry_t * 112 efi_1t1_pte(vm_offset_t va) 113 { 114 pml5_entry_t *pml5e; 115 pml4_entry_t *pml4e; 116 pdp_entry_t *pdpe; 117 pd_entry_t *pde; 118 pt_entry_t *pte; 119 vm_page_t m; 120 vm_pindex_t pml5_idx, pml4_idx, pdp_idx, pd_idx; 121 vm_paddr_t mphys; 122 123 pml4_idx = pmap_pml4e_index(va); 124 if (la57) { 125 pml5_idx = pmap_pml5e_index(va); 126 pml5e = &efi_pml5[pml5_idx]; 127 if (*pml5e == 0) { 128 m = efi_1t1_page(); 129 mphys = VM_PAGE_TO_PHYS(m); 130 *pml5e = mphys | X86_PG_RW | X86_PG_V; 131 } else { 132 mphys = *pml5e & PG_FRAME; 133 } 134 pml4e = (pml4_entry_t *)PHYS_TO_DMAP(mphys); 135 pml4e = &pml4e[pml4_idx]; 136 } else { 137 pml4e = &efi_pml4[pml4_idx]; 138 } 139 140 if (*pml4e == 0) { 141 m = efi_1t1_page(); 142 mphys = VM_PAGE_TO_PHYS(m); 143 *pml4e = mphys | X86_PG_RW | X86_PG_V; 144 } else { 145 mphys = *pml4e & PG_FRAME; 146 } 147 148 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys); 149 pdp_idx = pmap_pdpe_index(va); 150 pdpe += pdp_idx; 151 if (*pdpe == 0) { 152 m = efi_1t1_page(); 153 mphys = VM_PAGE_TO_PHYS(m); 154 *pdpe = mphys | X86_PG_RW | X86_PG_V; 155 } else { 156 mphys = *pdpe & PG_FRAME; 157 } 158 159 pde = (pd_entry_t *)PHYS_TO_DMAP(mphys); 160 pd_idx = pmap_pde_index(va); 161 pde += pd_idx; 162 if (*pde == 0) { 163 m = efi_1t1_page(); 164 mphys = VM_PAGE_TO_PHYS(m); 165 *pde = mphys | X86_PG_RW | X86_PG_V; 166 } else { 167 mphys = *pde & PG_FRAME; 168 } 169 170 pte = (pt_entry_t *)PHYS_TO_DMAP(mphys); 171 pte += pmap_pte_index(va); 172 KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte)); 173 174 return (pte); 175 } 176 177 bool 178 efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) 179 { 180 struct efi_md *p; 181 pt_entry_t *pte; 182 void *pml; 183 vm_page_t m; 184 vm_offset_t va; 185 uint64_t idx; 186 int bits, i, mode; 187 188 obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 + 189 NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG), 190 VM_PROT_ALL, 0, NULL); 191 efi_1t1_idx = 0; 192 VM_OBJECT_WLOCK(obj_1t1_pt); 193 efi_pmltop_page = efi_1t1_page(); 194 VM_OBJECT_WUNLOCK(obj_1t1_pt); 195 pml = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pmltop_page)); 196 if (la57) { 197 efi_pml5 = pml; 198 pmap_pinit_pml5(efi_pmltop_page); 199 } else { 200 efi_pml4 = pml; 201 pmap_pinit_pml4(efi_pmltop_page); 202 } 203 204 for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, 205 descsz)) { 206 if ((p->md_attr & EFI_MD_ATTR_RT) == 0) 207 continue; 208 if (p->md_virt != 0 && p->md_virt != p->md_phys) { 209 if (bootverbose) 210 printf("EFI Runtime entry %d is mapped\n", i); 211 goto fail; 212 } 213 if ((p->md_phys & EFI_PAGE_MASK) != 0) { 214 if (bootverbose) 215 printf("EFI Runtime entry %d is not aligned\n", 216 i); 217 goto fail; 218 } 219 if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys || 220 p->md_phys + p->md_pages * EFI_PAGE_SIZE >= 221 VM_MAXUSER_ADDRESS) { 222 printf("EFI Runtime entry %d is not in mappable for RT:" 223 "base %#016jx %#jx pages\n", 224 i, (uintmax_t)p->md_phys, 225 (uintmax_t)p->md_pages); 226 goto fail; 227 } 228 if ((p->md_attr & EFI_MD_ATTR_WB) != 0) 229 mode = VM_MEMATTR_WRITE_BACK; 230 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) 231 mode = VM_MEMATTR_WRITE_THROUGH; 232 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) 233 mode = VM_MEMATTR_WRITE_COMBINING; 234 else if ((p->md_attr & EFI_MD_ATTR_WP) != 0) 235 mode = VM_MEMATTR_WRITE_PROTECTED; 236 else if ((p->md_attr & EFI_MD_ATTR_UC) != 0) 237 mode = VM_MEMATTR_UNCACHEABLE; 238 else { 239 if (bootverbose) 240 printf("EFI Runtime entry %d mapping " 241 "attributes unsupported\n", i); 242 mode = VM_MEMATTR_UNCACHEABLE; 243 } 244 bits = pmap_cache_bits(kernel_pmap, mode, FALSE) | X86_PG_RW | 245 X86_PG_V; 246 VM_OBJECT_WLOCK(obj_1t1_pt); 247 for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++, 248 va += PAGE_SIZE) { 249 pte = efi_1t1_pte(va); 250 pte_store(pte, va | bits); 251 252 m = PHYS_TO_VM_PAGE(va); 253 if (m != NULL && VM_PAGE_TO_PHYS(m) == 0) { 254 vm_page_init_page(m, va, -1); 255 m->order = VM_NFREEORDER + 1; /* invalid */ 256 m->pool = VM_NFREEPOOL + 1; /* invalid */ 257 pmap_page_set_memattr_noflush(m, mode); 258 } 259 } 260 VM_OBJECT_WUNLOCK(obj_1t1_pt); 261 } 262 263 return (true); 264 265 fail: 266 efi_destroy_1t1_map(); 267 return (false); 268 } 269 270 /* 271 * Create an environment for the EFI runtime code call. The most 272 * important part is creating the required 1:1 physical->virtual 273 * mappings for the runtime segments. To do that, we manually create 274 * page table which unmap userspace but gives correct kernel mapping. 275 * The 1:1 mappings for runtime segments usually occupy low 4G of the 276 * physical address map. 277 * 278 * The 1:1 mappings were chosen over the SetVirtualAddressMap() EFI RT 279 * service, because there are some BIOSes which fail to correctly 280 * relocate itself on the call, requiring both 1:1 and virtual 281 * mapping. As result, we must provide 1:1 mapping anyway, so no 282 * reason to bother with the virtual map, and no need to add a 283 * complexity into loader. 284 * 285 * The fpu_kern_enter() call allows firmware to use FPU, as mandated 286 * by the specification. In particular, CR0.TS bit is cleared. Also 287 * it enters critical section, giving us neccessary protection against 288 * context switch. 289 * 290 * There is no need to disable interrupts around the change of %cr3, 291 * the kernel mappings are correct, while we only grabbed the 292 * userspace portion of VA. Interrupts handlers must not access 293 * userspace. Having interrupts enabled fixes the issue with 294 * firmware/SMM long operation, which would negatively affect IPIs, 295 * esp. TLB shootdown requests. 296 */ 297 int 298 efi_arch_enter(void) 299 { 300 pmap_t curpmap; 301 302 curpmap = PCPU_GET(curpmap); 303 PMAP_LOCK_ASSERT(curpmap, MA_OWNED); 304 curthread->td_md.md_efirt_dis_pf = vm_fault_disable_pagefaults(); 305 306 /* 307 * IPI TLB shootdown handler invltlb_pcid_handler() reloads 308 * %cr3 from the curpmap->pm_cr3, which would disable runtime 309 * segments mappings. Block the handler's action by setting 310 * curpmap to impossible value. See also comment in 311 * pmap.c:pmap_activate_sw(). 312 */ 313 if (pmap_pcid_enabled && !invpcid_works) 314 PCPU_SET(curpmap, NULL); 315 316 load_cr3(VM_PAGE_TO_PHYS(efi_pmltop_page) | (pmap_pcid_enabled ? 317 curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0)); 318 /* 319 * If PCID is enabled, the clear CR3_PCID_SAVE bit in the loaded %cr3 320 * causes TLB invalidation. 321 */ 322 if (!pmap_pcid_enabled) 323 invltlb(); 324 return (0); 325 } 326 327 void 328 efi_arch_leave(void) 329 { 330 pmap_t curpmap; 331 332 curpmap = &curproc->p_vmspace->vm_pmap; 333 if (pmap_pcid_enabled && !invpcid_works) 334 PCPU_SET(curpmap, curpmap); 335 load_cr3(curpmap->pm_cr3 | (pmap_pcid_enabled ? 336 curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0)); 337 if (!pmap_pcid_enabled) 338 invltlb(); 339 vm_fault_enable_pagefaults(curthread->td_md.md_efirt_dis_pf); 340 } 341 342 /* XXX debug stuff */ 343 static int 344 efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS) 345 { 346 struct efi_tm tm; 347 int error, val; 348 349 val = 0; 350 error = sysctl_handle_int(oidp, &val, 0, req); 351 if (error != 0 || req->newptr == NULL) 352 return (error); 353 error = efi_get_time(&tm); 354 if (error == 0) { 355 uprintf("EFI reports: Year %d Month %d Day %d Hour %d Min %d " 356 "Sec %d\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, 357 tm.tm_min, tm.tm_sec); 358 } 359 return (error); 360 } 361 362 SYSCTL_PROC(_debug, OID_AUTO, efi_time, 363 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 364 efi_time_sysctl_handler, "I", 365 ""); 366