1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/smp.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/mman.h>
36 #include <sys/pcpu.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/vmem.h>
42
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 #include <vm/vm_extern.h>
46 #include <vm/vm_map.h>
47 #include <vm/vm_page.h>
48 #include <vm/vm_param.h>
49
50 #include <machine/armreg.h>
51 #include <machine/vm.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpu.h>
54 #include <machine/machdep.h>
55 #include <machine/vmm.h>
56 #include <machine/vmm_dev.h>
57 #include <machine/atomic.h>
58 #include <machine/hypervisor.h>
59 #include <machine/pmap.h>
60
61 #include "mmu.h"
62 #include "arm64.h"
63 #include "hyp.h"
64 #include "reset.h"
65 #include "io/vgic.h"
66 #include "io/vgic_v3.h"
67 #include "io/vtimer.h"
68 #include "vmm_stat.h"
69
70 #define HANDLED 1
71 #define UNHANDLED 0
72
73 /* Number of bits in an EL2 virtual address */
74 #define EL2_VIRT_BITS 48
75 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
76
77 /* TODO: Move the host hypctx off the stack */
78 #define VMM_STACK_PAGES 4
79 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
80
81 static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
82
83 /* Register values passed to arm_setup_vectors to set in the hypervisor */
84 struct vmm_init_regs {
85 uint64_t tcr_el2;
86 uint64_t vtcr_el2;
87 };
88
89 MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
90
91 extern char hyp_init_vectors[];
92 extern char hyp_vectors[];
93 extern char hyp_stub_vectors[];
94
95 static vm_paddr_t hyp_code_base;
96 static size_t hyp_code_len;
97
98 static char *stack[MAXCPU];
99 static vm_offset_t stack_hyp_va[MAXCPU];
100
101 static vmem_t *el2_mem_alloc;
102
103 static void arm_setup_vectors(void *arg);
104 static void vmm_pmap_clean_stage2_tlbi(void);
105 static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool);
106 static void vmm_pmap_invalidate_all(uint64_t);
107
108 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
109
110 static inline void
arm64_set_active_vcpu(struct hypctx * hypctx)111 arm64_set_active_vcpu(struct hypctx *hypctx)
112 {
113 DPCPU_SET(vcpu, hypctx);
114 }
115
116 struct hypctx *
arm64_get_active_vcpu(void)117 arm64_get_active_vcpu(void)
118 {
119 return (DPCPU_GET(vcpu));
120 }
121
122 static void
arm_setup_vectors(void * arg)123 arm_setup_vectors(void *arg)
124 {
125 struct vmm_init_regs *el2_regs;
126 uintptr_t stack_top;
127 uint32_t sctlr_el2;
128 register_t daif;
129
130 el2_regs = arg;
131 arm64_set_active_vcpu(NULL);
132
133 daif = intr_disable();
134
135 /*
136 * Install the temporary vectors which will be responsible for
137 * initializing the VMM when we next trap into EL2.
138 *
139 * x0: the exception vector table responsible for hypervisor
140 * initialization on the next call.
141 */
142 vmm_call_hyp(vtophys(&vmm_hyp_code));
143
144 /* Create and map the hypervisor stack */
145 stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
146
147 /*
148 * Configure the system control register for EL2:
149 *
150 * SCTLR_EL2_M: MMU on
151 * SCTLR_EL2_C: Data cacheability not affected
152 * SCTLR_EL2_I: Instruction cacheability not affected
153 * SCTLR_EL2_A: Instruction alignment check
154 * SCTLR_EL2_SA: Stack pointer alignment check
155 * SCTLR_EL2_WXN: Treat writable memory as execute never
156 * ~SCTLR_EL2_EE: Data accesses are little-endian
157 */
158 sctlr_el2 = SCTLR_EL2_RES1;
159 sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
160 sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
161 sctlr_el2 |= SCTLR_EL2_WXN;
162 sctlr_el2 &= ~SCTLR_EL2_EE;
163
164 /* Special call to initialize EL2 */
165 vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
166 sctlr_el2, el2_regs->vtcr_el2);
167
168 intr_restore(daif);
169 }
170
171 static void
arm_teardown_vectors(void * arg)172 arm_teardown_vectors(void *arg)
173 {
174 register_t daif;
175
176 /*
177 * vmm_cleanup() will disable the MMU. For the next few instructions,
178 * before the hardware disables the MMU, one of the following is
179 * possible:
180 *
181 * a. The instruction addresses are fetched with the MMU disabled,
182 * and they must represent the actual physical addresses. This will work
183 * because we call the vmm_cleanup() function by its physical address.
184 *
185 * b. The instruction addresses are fetched using the old translation
186 * tables. This will work because we have an identity mapping in place
187 * in the translation tables and vmm_cleanup() is called by its physical
188 * address.
189 */
190 daif = intr_disable();
191 /* TODO: Invalidate the cache */
192 vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
193 intr_restore(daif);
194
195 arm64_set_active_vcpu(NULL);
196 }
197
198 static uint64_t
vmm_vtcr_el2_sl(u_int levels)199 vmm_vtcr_el2_sl(u_int levels)
200 {
201 #if PAGE_SIZE == PAGE_SIZE_4K
202 switch (levels) {
203 case 2:
204 return (VTCR_EL2_SL0_4K_LVL2);
205 case 3:
206 return (VTCR_EL2_SL0_4K_LVL1);
207 case 4:
208 return (VTCR_EL2_SL0_4K_LVL0);
209 default:
210 panic("%s: Invalid number of page table levels %u", __func__,
211 levels);
212 }
213 #elif PAGE_SIZE == PAGE_SIZE_16K
214 switch (levels) {
215 case 2:
216 return (VTCR_EL2_SL0_16K_LVL2);
217 case 3:
218 return (VTCR_EL2_SL0_16K_LVL1);
219 case 4:
220 return (VTCR_EL2_SL0_16K_LVL0);
221 default:
222 panic("%s: Invalid number of page table levels %u", __func__,
223 levels);
224 }
225 #else
226 #error Unsupported page size
227 #endif
228 }
229
230 int
vmmops_modinit(int ipinum)231 vmmops_modinit(int ipinum)
232 {
233 struct vmm_init_regs el2_regs;
234 vm_offset_t next_hyp_va;
235 vm_paddr_t vmm_base;
236 uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
237 uint64_t cnthctl_el2;
238 register_t daif;
239 int cpu, i;
240 bool rv __diagused;
241
242 if (!virt_enabled()) {
243 printf(
244 "vmm: Processor doesn't have support for virtualization\n");
245 return (ENXIO);
246 }
247
248 /* TODO: Support VHE */
249 if (in_vhe()) {
250 printf("vmm: VHE is unsupported\n");
251 return (ENXIO);
252 }
253
254 if (!vgic_present()) {
255 printf("vmm: No vgic found\n");
256 return (ENODEV);
257 }
258
259 if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
260 printf("vmm: Unable to read ID_AA64MMFR0_EL1\n");
261 return (ENXIO);
262 }
263 pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
264 /*
265 * Use 3 levels to give us up to 39 bits with 4k pages, or
266 * 47 bits with 16k pages.
267 */
268 /* TODO: Check the number of levels for 64k pages */
269 vmm_pmap_levels = 3;
270 switch (pa_range_field) {
271 case ID_AA64MMFR0_PARange_4G:
272 printf("vmm: Not enough physical address bits\n");
273 return (ENXIO);
274 case ID_AA64MMFR0_PARange_64G:
275 vmm_virt_bits = 36;
276 #if PAGE_SIZE == PAGE_SIZE_16K
277 vmm_pmap_levels = 2;
278 #endif
279 break;
280 default:
281 vmm_virt_bits = 39;
282 break;
283 }
284 pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
285
286 /* Initialise the EL2 MMU */
287 if (!vmmpmap_init()) {
288 printf("vmm: Failed to init the EL2 MMU\n");
289 return (ENOMEM);
290 }
291
292 /* Set up the stage 2 pmap callbacks */
293 MPASS(pmap_clean_stage2_tlbi == NULL);
294 pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi;
295 pmap_stage2_invalidate_range = vmm_pmap_invalidate_range;
296 pmap_stage2_invalidate_all = vmm_pmap_invalidate_all;
297
298 /*
299 * Create an allocator for the virtual address space used by EL2.
300 * EL2 code is identity-mapped; the allocator is used to find space for
301 * VM structures.
302 */
303 el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK);
304
305 /* Create the mappings for the hypervisor translation table. */
306 hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
307
308 /* We need an physical identity mapping for when we activate the MMU */
309 hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
310 rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
311 VM_PROT_READ | VM_PROT_EXECUTE);
312 MPASS(rv);
313
314 next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
315
316 /* Create a per-CPU hypervisor stack */
317 CPU_FOREACH(cpu) {
318 stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
319 stack_hyp_va[cpu] = next_hyp_va;
320
321 for (i = 0; i < VMM_STACK_PAGES; i++) {
322 rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
323 PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
324 VM_PROT_READ | VM_PROT_WRITE);
325 MPASS(rv);
326 }
327 next_hyp_va += L2_SIZE;
328 }
329
330 el2_regs.tcr_el2 = TCR_EL2_RES1;
331 el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
332 TCR_EL2_PS_52BITS);
333 el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
334 el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
335 #if PAGE_SIZE == PAGE_SIZE_4K
336 el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
337 #elif PAGE_SIZE == PAGE_SIZE_16K
338 el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
339 #else
340 #error Unsupported page size
341 #endif
342 #ifdef SMP
343 el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
344 #endif
345
346 switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) {
347 case TCR_EL2_PS_32BITS:
348 vmm_max_ipa_bits = 32;
349 break;
350 case TCR_EL2_PS_36BITS:
351 vmm_max_ipa_bits = 36;
352 break;
353 case TCR_EL2_PS_40BITS:
354 vmm_max_ipa_bits = 40;
355 break;
356 case TCR_EL2_PS_42BITS:
357 vmm_max_ipa_bits = 42;
358 break;
359 case TCR_EL2_PS_44BITS:
360 vmm_max_ipa_bits = 44;
361 break;
362 case TCR_EL2_PS_48BITS:
363 vmm_max_ipa_bits = 48;
364 break;
365 case TCR_EL2_PS_52BITS:
366 default:
367 vmm_max_ipa_bits = 52;
368 break;
369 }
370
371 /*
372 * Configure the Stage 2 translation control register:
373 *
374 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
375 * normal memory
376 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
377 * normal memory
378 * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
379 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
380 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
381 * shareable
382 */
383 el2_regs.vtcr_el2 = VTCR_EL2_RES1;
384 el2_regs.vtcr_el2 |=
385 min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
386 el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
387 el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
388 el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
389 #if PAGE_SIZE == PAGE_SIZE_4K
390 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
391 #elif PAGE_SIZE == PAGE_SIZE_16K
392 el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
393 #else
394 #error Unsupported page size
395 #endif
396 #ifdef SMP
397 el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
398 #endif
399
400 smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
401
402 /* Add memory to the vmem allocator (checking there is space) */
403 if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
404 /*
405 * Ensure there is an L2 block before the vmm code to check
406 * for buffer overflows on earlier data. Include the PAGE_SIZE
407 * of the minimum we can allocate.
408 */
409 vmm_base -= L2_SIZE + PAGE_SIZE;
410 vmm_base = rounddown2(vmm_base, L2_SIZE);
411
412 /*
413 * Check there is memory before the vmm code to add.
414 *
415 * Reserve the L2 block at address 0 so NULL dereference will
416 * raise an exception.
417 */
418 if (vmm_base > L2_SIZE)
419 vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
420 M_WAITOK);
421 }
422
423 /*
424 * Add the memory after the stacks. There is most of an L2 block
425 * between the last stack and the first allocation so this should
426 * be safe without adding more padding.
427 */
428 if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
429 vmem_add(el2_mem_alloc, next_hyp_va,
430 HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
431
432 daif = intr_disable();
433 cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL);
434 intr_restore(daif);
435
436 vgic_init();
437 vtimer_init(cnthctl_el2);
438
439 return (0);
440 }
441
442 int
vmmops_modcleanup(void)443 vmmops_modcleanup(void)
444 {
445 int cpu;
446
447 smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
448
449 CPU_FOREACH(cpu) {
450 vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE,
451 false);
452 }
453
454 vmmpmap_remove(hyp_code_base, hyp_code_len, false);
455
456 vtimer_cleanup();
457
458 vmmpmap_fini();
459
460 CPU_FOREACH(cpu)
461 free(stack[cpu], M_HYP);
462
463 pmap_clean_stage2_tlbi = NULL;
464 pmap_stage2_invalidate_range = NULL;
465 pmap_stage2_invalidate_all = NULL;
466
467 return (0);
468 }
469
470 static vm_size_t
el2_hyp_size(struct vm * vm)471 el2_hyp_size(struct vm *vm)
472 {
473 return (round_page(sizeof(struct hyp) +
474 sizeof(struct hypctx *) * vm_get_maxcpus(vm)));
475 }
476
477 static vm_size_t
el2_hypctx_size(void)478 el2_hypctx_size(void)
479 {
480 return (round_page(sizeof(struct hypctx)));
481 }
482
483 static vm_offset_t
el2_map_enter(vm_offset_t data,vm_size_t size,vm_prot_t prot)484 el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
485 {
486 vmem_addr_t addr;
487 int err __diagused;
488 bool rv __diagused;
489
490 err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr);
491 MPASS(err == 0);
492 rv = vmmpmap_enter(addr, size, vtophys(data), prot);
493 MPASS(rv);
494
495 return (addr);
496 }
497
498 void *
vmmops_init(struct vm * vm,pmap_t pmap)499 vmmops_init(struct vm *vm, pmap_t pmap)
500 {
501 struct hyp *hyp;
502 vm_size_t size;
503
504 size = el2_hyp_size(vm);
505 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
506
507 hyp->vm = vm;
508 hyp->vgic_attached = false;
509
510 vtimer_vminit(hyp);
511 vgic_vminit(hyp);
512
513 hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
514 VM_PROT_READ | VM_PROT_WRITE);
515
516 return (hyp);
517 }
518
519 void *
vmmops_vcpu_init(void * vmi,struct vcpu * vcpu1,int vcpuid)520 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
521 {
522 struct hyp *hyp = vmi;
523 struct hypctx *hypctx;
524 vm_size_t size;
525
526 size = el2_hypctx_size();
527 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
528
529 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
530 ("%s: Invalid vcpuid %d", __func__, vcpuid));
531 hyp->ctx[vcpuid] = hypctx;
532
533 hypctx->hyp = hyp;
534 hypctx->vcpu = vcpu1;
535
536 reset_vm_el01_regs(hypctx);
537 reset_vm_el2_regs(hypctx);
538
539 vtimer_cpuinit(hypctx);
540 vgic_cpuinit(hypctx);
541
542 hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
543 VM_PROT_READ | VM_PROT_WRITE);
544
545 return (hypctx);
546 }
547
548 static int
arm_vmm_pinit(pmap_t pmap)549 arm_vmm_pinit(pmap_t pmap)
550 {
551
552 pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
553 return (1);
554 }
555
556 struct vmspace *
vmmops_vmspace_alloc(vm_offset_t min,vm_offset_t max)557 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
558 {
559 return (vmspace_alloc(min, max, arm_vmm_pinit));
560 }
561
562 void
vmmops_vmspace_free(struct vmspace * vmspace)563 vmmops_vmspace_free(struct vmspace *vmspace)
564 {
565
566 pmap_remove_pages(vmspace_pmap(vmspace));
567 vmspace_free(vmspace);
568 }
569
570 static void
vmm_pmap_clean_stage2_tlbi(void)571 vmm_pmap_clean_stage2_tlbi(void)
572 {
573 vmm_call_hyp(HYP_CLEAN_S2_TLBI);
574 }
575
576 static void
vmm_pmap_invalidate_range(uint64_t vttbr,vm_offset_t sva,vm_offset_t eva,bool final_only)577 vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva,
578 bool final_only)
579 {
580 MPASS(eva > sva);
581 vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only);
582 }
583
584 static void
vmm_pmap_invalidate_all(uint64_t vttbr)585 vmm_pmap_invalidate_all(uint64_t vttbr)
586 {
587 vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr);
588 }
589
590 static inline void
arm64_print_hyp_regs(struct vm_exit * vme)591 arm64_print_hyp_regs(struct vm_exit *vme)
592 {
593 printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
594 printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
595 printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
596 printf("elr_el2: 0x%016lx\n", vme->pc);
597 }
598
599 static void
arm64_gen_inst_emul_data(struct hypctx * hypctx,uint32_t esr_iss,struct vm_exit * vme_ret)600 arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
601 struct vm_exit *vme_ret)
602 {
603 struct vm_guest_paging *paging;
604 struct vie *vie;
605 uint32_t esr_sas, reg_num;
606
607 /*
608 * Get the page address from HPFAR_EL2.
609 */
610 vme_ret->u.inst_emul.gpa =
611 HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
612 /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
613 vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
614 FAR_EL2_HPFAR_PAGE_MASK;
615
616 esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
617 reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
618
619 vie = &vme_ret->u.inst_emul.vie;
620 vie->access_size = 1 << esr_sas;
621 vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
622 vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
623 vie->reg = reg_num;
624
625 paging = &vme_ret->u.inst_emul.paging;
626 paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
627 paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
628 paging->tcr_el1 = hypctx->tcr_el1;
629 paging->tcr2_el1 = hypctx->tcr2_el1;
630 paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
631 if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
632 paging->flags |= VM_GP_MMU_ENABLED;
633 }
634
635 static void
arm64_gen_reg_emul_data(uint32_t esr_iss,struct vm_exit * vme_ret)636 arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
637 {
638 uint32_t reg_num;
639 struct vre *vre;
640
641 /* u.hyp member will be replaced by u.reg_emul */
642 vre = &vme_ret->u.reg_emul.vre;
643
644 vre->inst_syndrome = esr_iss;
645 /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
646 vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
647 reg_num = ISS_MSR_Rt(esr_iss);
648 vre->reg = reg_num;
649 }
650
651 void
raise_data_insn_abort(struct hypctx * hypctx,uint64_t far,bool dabort,int fsc)652 raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
653 {
654 uint64_t esr;
655
656 if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
657 esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
658 else
659 esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
660 /* Set the bit that changes from insn -> data abort */
661 if (dabort)
662 esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
663 /* Set the IL bit if set by hardware */
664 esr |= hypctx->tf.tf_esr & ESR_ELx_IL;
665
666 vmmops_exception(hypctx, esr | fsc, far);
667 }
668
669 static int
handle_el1_sync_excp(struct hypctx * hypctx,struct vm_exit * vme_ret,pmap_t pmap)670 handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
671 pmap_t pmap)
672 {
673 uint64_t gpa;
674 uint32_t esr_ec, esr_iss;
675
676 esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
677 esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
678
679 switch (esr_ec) {
680 case EXCP_UNKNOWN:
681 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
682 arm64_print_hyp_regs(vme_ret);
683 vme_ret->exitcode = VM_EXITCODE_HYP;
684 break;
685 case EXCP_TRAP_WFI_WFE:
686 if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
687 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
688 vme_ret->exitcode = VM_EXITCODE_WFI;
689 } else {
690 vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
691 vme_ret->exitcode = VM_EXITCODE_HYP;
692 }
693 break;
694 case EXCP_HVC:
695 vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
696 vme_ret->exitcode = VM_EXITCODE_HVC;
697 break;
698 case EXCP_MSR:
699 vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
700 arm64_gen_reg_emul_data(esr_iss, vme_ret);
701 vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
702 break;
703
704 case EXCP_INSN_ABORT_L:
705 case EXCP_DATA_ABORT_L:
706 vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
707 VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
708 switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
709 case ISS_DATA_DFSC_TF_L0:
710 case ISS_DATA_DFSC_TF_L1:
711 case ISS_DATA_DFSC_TF_L2:
712 case ISS_DATA_DFSC_TF_L3:
713 case ISS_DATA_DFSC_AFF_L1:
714 case ISS_DATA_DFSC_AFF_L2:
715 case ISS_DATA_DFSC_AFF_L3:
716 case ISS_DATA_DFSC_PF_L1:
717 case ISS_DATA_DFSC_PF_L2:
718 case ISS_DATA_DFSC_PF_L3:
719 gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
720 /* Check the IPA is valid */
721 if (gpa >= (1ul << vmm_max_ipa_bits)) {
722 raise_data_insn_abort(hypctx,
723 hypctx->exit_info.far_el2,
724 esr_ec == EXCP_DATA_ABORT_L,
725 ISS_DATA_DFSC_ASF_L0);
726 vme_ret->inst_length = 0;
727 return (HANDLED);
728 }
729
730 if (vm_mem_allocated(hypctx->vcpu, gpa)) {
731 vme_ret->exitcode = VM_EXITCODE_PAGING;
732 vme_ret->inst_length = 0;
733 vme_ret->u.paging.esr = hypctx->tf.tf_esr;
734 vme_ret->u.paging.gpa = gpa;
735 } else if (esr_ec == EXCP_INSN_ABORT_L) {
736 /*
737 * Raise an external abort. Device memory is
738 * not executable
739 */
740 raise_data_insn_abort(hypctx,
741 hypctx->exit_info.far_el2, false,
742 ISS_DATA_DFSC_EXT);
743 vme_ret->inst_length = 0;
744 return (HANDLED);
745 } else {
746 arm64_gen_inst_emul_data(hypctx, esr_iss,
747 vme_ret);
748 vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
749 }
750 break;
751 default:
752 arm64_print_hyp_regs(vme_ret);
753 vme_ret->exitcode = VM_EXITCODE_HYP;
754 break;
755 }
756
757 break;
758
759 default:
760 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
761 arm64_print_hyp_regs(vme_ret);
762 vme_ret->exitcode = VM_EXITCODE_HYP;
763 break;
764 }
765
766 /* We don't don't do any instruction emulation here */
767 return (UNHANDLED);
768 }
769
770 static int
arm64_handle_world_switch(struct hypctx * hypctx,int excp_type,struct vm_exit * vme,pmap_t pmap)771 arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
772 struct vm_exit *vme, pmap_t pmap)
773 {
774 int handled;
775
776 switch (excp_type) {
777 case EXCP_TYPE_EL1_SYNC:
778 /* The exit code will be set by handle_el1_sync_excp(). */
779 handled = handle_el1_sync_excp(hypctx, vme, pmap);
780 break;
781
782 case EXCP_TYPE_EL1_IRQ:
783 case EXCP_TYPE_EL1_FIQ:
784 /* The host kernel will handle IRQs and FIQs. */
785 vmm_stat_incr(hypctx->vcpu,
786 excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
787 vme->exitcode = VM_EXITCODE_BOGUS;
788 handled = UNHANDLED;
789 break;
790
791 case EXCP_TYPE_EL1_ERROR:
792 case EXCP_TYPE_EL2_SYNC:
793 case EXCP_TYPE_EL2_IRQ:
794 case EXCP_TYPE_EL2_FIQ:
795 case EXCP_TYPE_EL2_ERROR:
796 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
797 vme->exitcode = VM_EXITCODE_BOGUS;
798 handled = UNHANDLED;
799 break;
800
801 default:
802 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
803 vme->exitcode = VM_EXITCODE_BOGUS;
804 handled = UNHANDLED;
805 break;
806 }
807
808 return (handled);
809 }
810
811 static void
ptp_release(void ** cookie)812 ptp_release(void **cookie)
813 {
814 if (*cookie != NULL) {
815 vm_gpa_release(*cookie);
816 *cookie = NULL;
817 }
818 }
819
820 static void *
ptp_hold(struct vcpu * vcpu,vm_paddr_t ptpphys,size_t len,void ** cookie)821 ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
822 {
823 void *ptr;
824
825 ptp_release(cookie);
826 ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
827 return (ptr);
828 }
829
830 /* log2 of the number of bytes in a page table entry */
831 #define PTE_SHIFT 3
832 int
vmmops_gla2gpa(void * vcpui,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)833 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
834 int prot, uint64_t *gpa, int *is_fault)
835 {
836 struct hypctx *hypctx;
837 void *cookie;
838 uint64_t mask, *ptep, pte, pte_addr;
839 int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
840 bool is_el0;
841
842 /* Check if the MMU is off */
843 if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
844 *is_fault = 0;
845 *gpa = gla;
846 return (0);
847 }
848
849 is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
850
851 if (ADDR_IS_KERNEL(gla)) {
852 /* If address translation is disabled raise an exception */
853 if ((paging->tcr_el1 & TCR_EPD1) != 0) {
854 *is_fault = 1;
855 return (0);
856 }
857 if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
858 *is_fault = 1;
859 return (0);
860 }
861 pte_addr = paging->ttbr1_addr;
862 tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
863 /* Clear the top byte if TBI is on */
864 if ((paging->tcr_el1 & TCR_TBI1) != 0)
865 gla |= (0xfful << 56);
866 switch (paging->tcr_el1 & TCR_TG1_MASK) {
867 case TCR_TG1_4K:
868 granule_shift = PAGE_SHIFT_4K;
869 break;
870 case TCR_TG1_16K:
871 granule_shift = PAGE_SHIFT_16K;
872 break;
873 case TCR_TG1_64K:
874 granule_shift = PAGE_SHIFT_64K;
875 break;
876 default:
877 *is_fault = 1;
878 return (EINVAL);
879 }
880 } else {
881 /* If address translation is disabled raise an exception */
882 if ((paging->tcr_el1 & TCR_EPD0) != 0) {
883 *is_fault = 1;
884 return (0);
885 }
886 if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
887 *is_fault = 1;
888 return (0);
889 }
890 pte_addr = paging->ttbr0_addr;
891 tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
892 /* Clear the top byte if TBI is on */
893 if ((paging->tcr_el1 & TCR_TBI0) != 0)
894 gla &= ~(0xfful << 56);
895 switch (paging->tcr_el1 & TCR_TG0_MASK) {
896 case TCR_TG0_4K:
897 granule_shift = PAGE_SHIFT_4K;
898 break;
899 case TCR_TG0_16K:
900 granule_shift = PAGE_SHIFT_16K;
901 break;
902 case TCR_TG0_64K:
903 granule_shift = PAGE_SHIFT_64K;
904 break;
905 default:
906 *is_fault = 1;
907 return (EINVAL);
908 }
909 }
910
911 /*
912 * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
913 * for larger values.
914 */
915 switch (granule_shift) {
916 case PAGE_SHIFT_4K:
917 case PAGE_SHIFT_16K:
918 /*
919 * See "Table D8-11 4KB granule, determining stage 1 initial
920 * lookup level" and "Table D8-21 16KB granule, determining
921 * stage 1 initial lookup level" from the "Arm Architecture
922 * Reference Manual for A-Profile architecture" revision I.a
923 * for the minimum and maximum values.
924 *
925 * TODO: Support less than 16 when FEAT_LPA2 is implemented
926 * and TCR_EL1.DS == 1
927 * TODO: Support more than 39 when FEAT_TTST is implemented
928 */
929 if (tsz < 16 || tsz > 39) {
930 *is_fault = 1;
931 return (EINVAL);
932 }
933 break;
934 case PAGE_SHIFT_64K:
935 /* TODO: Support 64k granule. It will probably work, but is untested */
936 default:
937 *is_fault = 1;
938 return (EINVAL);
939 }
940
941 /*
942 * Calculate the input address bits. These are 64 bit in an address
943 * with the top tsz bits being all 0 or all 1.
944 */
945 ia_bits = 64 - tsz;
946
947 /*
948 * Calculate the number of address bits used in the page table
949 * calculation. This is ia_bits minus the bottom granule_shift
950 * bits that are passed to the output address.
951 */
952 address_bits = ia_bits - granule_shift;
953
954 /*
955 * Calculate the number of levels. Each level uses
956 * granule_shift - PTE_SHIFT bits of the input address.
957 * This is because the table is 1 << granule_shift and each
958 * entry is 1 << PTE_SHIFT bytes.
959 */
960 levels = howmany(address_bits, granule_shift - PTE_SHIFT);
961
962 /* Mask of the upper unused bits in the virtual address */
963 gla &= (1ul << ia_bits) - 1;
964 hypctx = (struct hypctx *)vcpui;
965 cookie = NULL;
966 /* TODO: Check if the level supports block descriptors */
967 for (;levels > 0; levels--) {
968 int idx;
969
970 pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
971 granule_shift;
972 idx = (gla >> pte_shift) &
973 ((1ul << (granule_shift - PTE_SHIFT)) - 1);
974 while (idx > PAGE_SIZE / sizeof(pte)) {
975 idx -= PAGE_SIZE / sizeof(pte);
976 pte_addr += PAGE_SIZE;
977 }
978
979 ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
980 if (ptep == NULL)
981 goto error;
982 pte = ptep[idx];
983
984 /* Calculate the level we are looking at */
985 switch (levels) {
986 default:
987 goto fault;
988 /* TODO: Level -1 when FEAT_LPA2 is implemented */
989 case 4: /* Level 0 */
990 if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
991 goto fault;
992 /* FALLTHROUGH */
993 case 3: /* Level 1 */
994 case 2: /* Level 2 */
995 switch (pte & ATTR_DESCR_MASK) {
996 /* Use L1 macro as all levels are the same */
997 case L1_TABLE:
998 /* Check if EL0 can access this address space */
999 if (is_el0 &&
1000 (pte & TATTR_AP_TABLE_NO_EL0) != 0)
1001 goto fault;
1002 /* Check if the address space is writable */
1003 if ((prot & PROT_WRITE) != 0 &&
1004 (pte & TATTR_AP_TABLE_RO) != 0)
1005 goto fault;
1006 if ((prot & PROT_EXEC) != 0) {
1007 /* Check the table exec attribute */
1008 if ((is_el0 &&
1009 (pte & TATTR_UXN_TABLE) != 0) ||
1010 (!is_el0 &&
1011 (pte & TATTR_PXN_TABLE) != 0))
1012 goto fault;
1013 }
1014 pte_addr = pte & ~ATTR_MASK;
1015 break;
1016 case L1_BLOCK:
1017 goto done;
1018 default:
1019 goto fault;
1020 }
1021 break;
1022 case 1: /* Level 3 */
1023 if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
1024 goto done;
1025 goto fault;
1026 }
1027 }
1028
1029 done:
1030 /* Check if EL0 has access to the block/page */
1031 if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
1032 goto fault;
1033 if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
1034 goto fault;
1035 if ((prot & PROT_EXEC) != 0) {
1036 if ((is_el0 && (pte & ATTR_S1_UXN) != 0) ||
1037 (!is_el0 && (pte & ATTR_S1_PXN) != 0))
1038 goto fault;
1039 }
1040 mask = (1ul << pte_shift) - 1;
1041 *gpa = (pte & ~ATTR_MASK) | (gla & mask);
1042 *is_fault = 0;
1043 ptp_release(&cookie);
1044 return (0);
1045
1046 error:
1047 ptp_release(&cookie);
1048 return (EFAULT);
1049 fault:
1050 *is_fault = 1;
1051 ptp_release(&cookie);
1052 return (0);
1053 }
1054
1055 int
vmmops_run(void * vcpui,register_t pc,pmap_t pmap,struct vm_eventinfo * evinfo)1056 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
1057 {
1058 uint64_t excp_type;
1059 int handled;
1060 register_t daif;
1061 struct hyp *hyp;
1062 struct hypctx *hypctx;
1063 struct vcpu *vcpu;
1064 struct vm_exit *vme;
1065 int mode;
1066
1067 hypctx = (struct hypctx *)vcpui;
1068 hyp = hypctx->hyp;
1069 vcpu = hypctx->vcpu;
1070 vme = vm_exitinfo(vcpu);
1071
1072 hypctx->tf.tf_elr = (uint64_t)pc;
1073
1074 for (;;) {
1075 if (hypctx->has_exception) {
1076 hypctx->has_exception = false;
1077 hypctx->elr_el1 = hypctx->tf.tf_elr;
1078
1079 mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
1080
1081 if (mode == PSR_M_EL1t) {
1082 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
1083 } else if (mode == PSR_M_EL1h) {
1084 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
1085 } else if ((mode & PSR_M_32) == PSR_M_64) {
1086 /* 64-bit EL0 */
1087 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
1088 } else {
1089 /* 32-bit EL0 */
1090 hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
1091 }
1092
1093 /* Set the new spsr */
1094 hypctx->spsr_el1 = hypctx->tf.tf_spsr;
1095
1096 /* Set the new cpsr */
1097 hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
1098 hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h;
1099
1100 /*
1101 * Update fields that may change on exeption entry
1102 * based on how sctlr_el1 is configured.
1103 */
1104 if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0)
1105 hypctx->tf.tf_spsr |= PSR_PAN;
1106 if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
1107 hypctx->tf.tf_spsr &= ~PSR_SSBS;
1108 else
1109 hypctx->tf.tf_spsr |= PSR_SSBS;
1110 }
1111
1112 daif = intr_disable();
1113
1114 /* Check if the vcpu is suspended */
1115 if (vcpu_suspended(evinfo)) {
1116 intr_restore(daif);
1117 vm_exit_suspended(vcpu, pc);
1118 break;
1119 }
1120
1121 if (vcpu_debugged(vcpu)) {
1122 intr_restore(daif);
1123 vm_exit_debug(vcpu, pc);
1124 break;
1125 }
1126
1127 /* Activate the stage2 pmap so the vmid is valid */
1128 pmap_activate_vm(pmap);
1129 hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
1130
1131 /*
1132 * TODO: What happens if a timer interrupt is asserted exactly
1133 * here, but for the previous VM?
1134 */
1135 arm64_set_active_vcpu(hypctx);
1136 vgic_flush_hwstate(hypctx);
1137
1138 /* Call into EL2 to switch to the guest */
1139 excp_type = vmm_call_hyp(HYP_ENTER_GUEST,
1140 hyp->el2_addr, hypctx->el2_addr);
1141
1142 vgic_sync_hwstate(hypctx);
1143 vtimer_sync_hwstate(hypctx);
1144
1145 /*
1146 * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi
1147 * depends on this meaning we activate the VM before entering
1148 * the vm again
1149 */
1150 PCPU_SET(curvmpmap, NULL);
1151 intr_restore(daif);
1152
1153 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
1154 if (excp_type == EXCP_TYPE_MAINT_IRQ)
1155 continue;
1156
1157 vme->pc = hypctx->tf.tf_elr;
1158 vme->inst_length = INSN_SIZE;
1159 vme->u.hyp.exception_nr = excp_type;
1160 vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
1161 vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
1162 vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
1163
1164 handled = arm64_handle_world_switch(hypctx, excp_type, vme,
1165 pmap);
1166 if (handled == UNHANDLED)
1167 /* Exit loop to emulate instruction. */
1168 break;
1169 else
1170 /* Resume guest execution from the next instruction. */
1171 hypctx->tf.tf_elr += vme->inst_length;
1172 }
1173
1174 return (0);
1175 }
1176
1177 static void
arm_pcpu_vmcleanup(void * arg)1178 arm_pcpu_vmcleanup(void *arg)
1179 {
1180 struct hyp *hyp;
1181 int i, maxcpus;
1182
1183 hyp = arg;
1184 maxcpus = vm_get_maxcpus(hyp->vm);
1185 for (i = 0; i < maxcpus; i++) {
1186 if (arm64_get_active_vcpu() == hyp->ctx[i]) {
1187 arm64_set_active_vcpu(NULL);
1188 break;
1189 }
1190 }
1191 }
1192
1193 void
vmmops_vcpu_cleanup(void * vcpui)1194 vmmops_vcpu_cleanup(void *vcpui)
1195 {
1196 struct hypctx *hypctx = vcpui;
1197
1198 vtimer_cpucleanup(hypctx);
1199 vgic_cpucleanup(hypctx);
1200
1201 vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
1202
1203 free(hypctx, M_HYP);
1204 }
1205
1206 void
vmmops_cleanup(void * vmi)1207 vmmops_cleanup(void *vmi)
1208 {
1209 struct hyp *hyp = vmi;
1210
1211 vtimer_vmcleanup(hyp);
1212 vgic_vmcleanup(hyp);
1213
1214 smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
1215
1216 vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
1217
1218 free(hyp, M_HYP);
1219 }
1220
1221 /*
1222 * Return register value. Registers have different sizes and an explicit cast
1223 * must be made to ensure proper conversion.
1224 */
1225 static uint64_t *
hypctx_regptr(struct hypctx * hypctx,int reg)1226 hypctx_regptr(struct hypctx *hypctx, int reg)
1227 {
1228 switch (reg) {
1229 case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
1230 return (&hypctx->tf.tf_x[reg]);
1231 case VM_REG_GUEST_LR:
1232 return (&hypctx->tf.tf_lr);
1233 case VM_REG_GUEST_SP:
1234 return (&hypctx->tf.tf_sp);
1235 case VM_REG_GUEST_CPSR:
1236 return (&hypctx->tf.tf_spsr);
1237 case VM_REG_GUEST_PC:
1238 return (&hypctx->tf.tf_elr);
1239 case VM_REG_GUEST_SCTLR_EL1:
1240 return (&hypctx->sctlr_el1);
1241 case VM_REG_GUEST_TTBR0_EL1:
1242 return (&hypctx->ttbr0_el1);
1243 case VM_REG_GUEST_TTBR1_EL1:
1244 return (&hypctx->ttbr1_el1);
1245 case VM_REG_GUEST_TCR_EL1:
1246 return (&hypctx->tcr_el1);
1247 case VM_REG_GUEST_TCR2_EL1:
1248 return (&hypctx->tcr2_el1);
1249 default:
1250 break;
1251 }
1252 return (NULL);
1253 }
1254
1255 int
vmmops_getreg(void * vcpui,int reg,uint64_t * retval)1256 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
1257 {
1258 uint64_t *regp;
1259 int running, hostcpu;
1260 struct hypctx *hypctx = vcpui;
1261
1262 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1263 if (running && hostcpu != curcpu)
1264 panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
1265 vcpu_vcpuid(hypctx->vcpu));
1266
1267 regp = hypctx_regptr(hypctx, reg);
1268 if (regp == NULL)
1269 return (EINVAL);
1270
1271 *retval = *regp;
1272 return (0);
1273 }
1274
1275 int
vmmops_setreg(void * vcpui,int reg,uint64_t val)1276 vmmops_setreg(void *vcpui, int reg, uint64_t val)
1277 {
1278 uint64_t *regp;
1279 struct hypctx *hypctx = vcpui;
1280 int running, hostcpu;
1281
1282 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1283 if (running && hostcpu != curcpu)
1284 panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
1285 vcpu_vcpuid(hypctx->vcpu));
1286
1287 regp = hypctx_regptr(hypctx, reg);
1288 if (regp == NULL)
1289 return (EINVAL);
1290
1291 *regp = val;
1292 return (0);
1293 }
1294
1295 int
vmmops_exception(void * vcpui,uint64_t esr,uint64_t far)1296 vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
1297 {
1298 struct hypctx *hypctx = vcpui;
1299 int running, hostcpu;
1300
1301 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1302 if (running && hostcpu != curcpu)
1303 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
1304 vcpu_vcpuid(hypctx->vcpu));
1305
1306 hypctx->far_el1 = far;
1307 hypctx->esr_el1 = esr;
1308 hypctx->has_exception = true;
1309
1310 return (0);
1311 }
1312
1313 int
vmmops_getcap(void * vcpui,int num,int * retval)1314 vmmops_getcap(void *vcpui, int num, int *retval)
1315 {
1316 int ret;
1317
1318 ret = ENOENT;
1319
1320 switch (num) {
1321 case VM_CAP_UNRESTRICTED_GUEST:
1322 *retval = 1;
1323 ret = 0;
1324 break;
1325 default:
1326 break;
1327 }
1328
1329 return (ret);
1330 }
1331
1332 int
vmmops_setcap(void * vcpui,int num,int val)1333 vmmops_setcap(void *vcpui, int num, int val)
1334 {
1335
1336 return (ENOENT);
1337 }
1338