xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/vlapic.c (revision 2a690de6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  * Copyright (c) 2019 Joyent, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 /*
32  * This file and its contents are supplied under the terms of the
33  * Common Development and Distribution License ("CDDL"), version 1.0.
34  * You may only use this file in accordance with the terms of version
35  * 1.0 of the CDDL.
36  *
37  * A full copy of the text of the CDDL should have accompanied this
38  * source.  A copy of the CDDL is also available via the Internet at
39  * http://www.illumos.org/license/CDDL.
40  *
41  * Copyright 2014 Pluribus Networks Inc.
42  * Copyright 2018 Joyent, Inc.
43  * Copyright 2022 Oxide Computer Company
44  */
45 
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
48 
49 #include <sys/param.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/mutex.h>
53 #include <sys/systm.h>
54 #include <sys/cpuset.h>
55 
56 #include <x86/specialreg.h>
57 #include <x86/apicreg.h>
58 
59 #include <machine/clock.h>
60 
61 #include <machine/vmm.h>
62 #include <sys/vmm_kernel.h>
63 
64 #include "vmm_lapic.h"
65 #include "vmm_stat.h"
66 
67 #include "vlapic.h"
68 #include "vlapic_priv.h"
69 #include "vioapic.h"
70 
71 
72 /*
73  * The 4 high bits of a given interrupt vector represent its priority.  The same
74  * is true for the contents of the TPR when it is used to calculate the ultimate
75  * PPR of an APIC - the 4 high bits hold the priority.
76  */
77 #define	PRIO(x)			((x) & 0xf0)
78 
79 #define	VLAPIC_VERSION		(16)
80 
81 /*
82  * The 'vlapic->timer_lock' is used to provide mutual exclusion between the
83  * vlapic_callout_handler() and vcpu accesses to:
84  * - timer_freq_bt, timer_period_bt, timer_fire_bt
85  * - timer LVT register
86  */
87 #define	VLAPIC_TIMER_LOCK(vlapic)	mutex_enter(&((vlapic)->timer_lock))
88 #define	VLAPIC_TIMER_UNLOCK(vlapic)	mutex_exit(&((vlapic)->timer_lock))
89 #define	VLAPIC_TIMER_LOCKED(vlapic)	MUTEX_HELD(&((vlapic)->timer_lock))
90 
91 /*
92  * APIC timer frequency:
93  * - arbitrary but chosen to be in the ballpark of contemporary hardware.
94  * - power-of-two to avoid loss of precision when calculating times
95  */
96 #define	VLAPIC_BUS_FREQ		(128 * 1024 * 1024)
97 
98 #define	APICBASE_ADDR_MASK	0xfffffffffffff000UL
99 
100 #define	APIC_VALID_MASK_ESR	(APIC_ESR_SEND_CS_ERROR | \
101 		APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
102 		APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
103 		APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
104 
105 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
106 static void vlapic_callout_handler(void *arg);
107 
108 static __inline bool
109 vlapic_x2mode(const struct vlapic *vlapic)
110 {
111 	return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0);
112 }
113 
114 static __inline bool
115 vlapic_hw_disabled(const struct vlapic *vlapic)
116 {
117 	return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0);
118 }
119 
120 static __inline bool
121 vlapic_sw_disabled(const struct vlapic *vlapic)
122 {
123 	const struct LAPIC *lapic = vlapic->apic_page;
124 
125 	return ((lapic->svr & APIC_SVR_ENABLE) == 0);
126 }
127 
128 static __inline bool
129 vlapic_enabled(const struct vlapic *vlapic)
130 {
131 	return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic));
132 }
133 
134 static __inline uint32_t
135 vlapic_get_id(const struct vlapic *vlapic)
136 {
137 
138 	if (vlapic_x2mode(vlapic))
139 		return (vlapic->vcpuid);
140 	else
141 		return (vlapic->vcpuid << 24);
142 }
143 
144 static uint32_t
145 x2apic_ldr(const struct vlapic *vlapic)
146 {
147 	int apicid;
148 	uint32_t ldr;
149 
150 	apicid = vlapic_get_id(vlapic);
151 	ldr = 1 << (apicid & 0xf);
152 	ldr |= (apicid & 0xffff0) << 12;
153 	return (ldr);
154 }
155 
156 void
157 vlapic_dfr_write_handler(struct vlapic *vlapic)
158 {
159 	struct LAPIC *lapic;
160 
161 	lapic = vlapic->apic_page;
162 	if (vlapic_x2mode(vlapic)) {
163 		/* Ignore write to DFR in x2APIC mode */
164 		lapic->dfr = 0;
165 		return;
166 	}
167 
168 	lapic->dfr &= APIC_DFR_MODEL_MASK;
169 	lapic->dfr |= APIC_DFR_RESERVED;
170 }
171 
172 void
173 vlapic_ldr_write_handler(struct vlapic *vlapic)
174 {
175 	struct LAPIC *lapic;
176 
177 	lapic = vlapic->apic_page;
178 
179 	/* LDR is read-only in x2apic mode */
180 	if (vlapic_x2mode(vlapic)) {
181 		/* Ignore write to LDR in x2APIC mode */
182 		lapic->ldr = x2apic_ldr(vlapic);
183 	} else {
184 		lapic->ldr &= ~APIC_LDR_RESERVED;
185 	}
186 }
187 
188 void
189 vlapic_id_write_handler(struct vlapic *vlapic)
190 {
191 	struct LAPIC *lapic;
192 
193 	/*
194 	 * We don't allow the ID register to be modified so reset it back to
195 	 * its default value.
196 	 */
197 	lapic = vlapic->apic_page;
198 	lapic->id = vlapic_get_id(vlapic);
199 }
200 
201 static int
202 vlapic_timer_divisor(uint32_t dcr)
203 {
204 	switch (dcr & 0xB) {
205 	case APIC_TDCR_1:
206 		return (1);
207 	case APIC_TDCR_2:
208 		return (2);
209 	case APIC_TDCR_4:
210 		return (4);
211 	case APIC_TDCR_8:
212 		return (8);
213 	case APIC_TDCR_16:
214 		return (16);
215 	case APIC_TDCR_32:
216 		return (32);
217 	case APIC_TDCR_64:
218 		return (64);
219 	case APIC_TDCR_128:
220 		return (128);
221 	default:
222 		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
223 	}
224 }
225 
226 #if 0
227 static inline void
228 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
229 {
230 	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
231 	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
232 	    *lvt & APIC_LVTT_M);
233 }
234 #endif
235 
236 static uint32_t
237 vlapic_get_ccr(struct vlapic *vlapic)
238 {
239 	struct LAPIC *lapic;
240 	uint32_t ccr;
241 
242 	ccr = 0;
243 	lapic = vlapic->apic_page;
244 
245 	VLAPIC_TIMER_LOCK(vlapic);
246 	if (callout_active(&vlapic->callout)) {
247 		/*
248 		 * If the timer is scheduled to expire in the future then
249 		 * compute the value of 'ccr' based on the remaining time.
250 		 */
251 
252 		const hrtime_t now = gethrtime();
253 		if (vlapic->timer_fire_when > now) {
254 			ccr += hrt_freq_count(vlapic->timer_fire_when - now,
255 			    vlapic->timer_cur_freq);
256 		}
257 	}
258 	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, "
259 	    "icr_timer is %x", ccr, lapic->icr_timer));
260 	VLAPIC_TIMER_UNLOCK(vlapic);
261 	return (ccr);
262 }
263 
264 static void
265 vlapic_update_divider(struct vlapic *vlapic)
266 {
267 	struct LAPIC *lapic = vlapic->apic_page;
268 
269 	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
270 
271 	vlapic->timer_cur_freq =
272 	    VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
273 	vlapic->timer_period =
274 	    hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
275 }
276 
277 void
278 vlapic_dcr_write_handler(struct vlapic *vlapic)
279 {
280 	/*
281 	 * Update the timer frequency and the timer period.
282 	 *
283 	 * XXX changes to the frequency divider will not take effect until
284 	 * the timer is reloaded.
285 	 */
286 	VLAPIC_TIMER_LOCK(vlapic);
287 	vlapic_update_divider(vlapic);
288 	VLAPIC_TIMER_UNLOCK(vlapic);
289 }
290 
291 void
292 vlapic_esr_write_handler(struct vlapic *vlapic)
293 {
294 	struct LAPIC *lapic;
295 
296 	lapic = vlapic->apic_page;
297 	lapic->esr = vlapic->esr_pending;
298 	vlapic->esr_pending = 0;
299 }
300 
301 vcpu_notify_t
302 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
303 {
304 	struct LAPIC *lapic;
305 	uint32_t *irrptr, *tmrptr, mask, tmr;
306 	int idx;
307 
308 	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
309 
310 	lapic = vlapic->apic_page;
311 	if (!(lapic->svr & APIC_SVR_ENABLE)) {
312 		/* ignore interrupt on software-disabled APIC */
313 		return (VCPU_NOTIFY_NONE);
314 	}
315 
316 	if (vector < 16) {
317 		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
318 		    false);
319 
320 		/*
321 		 * If the error LVT is configured to interrupt the vCPU, it will
322 		 * have delivered a notification through that mechanism.
323 		 */
324 		return (VCPU_NOTIFY_NONE);
325 	}
326 
327 	if (vlapic->ops.set_intr_ready) {
328 		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
329 	}
330 
331 	idx = (vector / 32) * 4;
332 	mask = 1 << (vector % 32);
333 	tmrptr = &lapic->tmr0;
334 	irrptr = &lapic->irr0;
335 
336 	/*
337 	 * Update TMR for requested vector, if necessary.
338 	 * This must be done prior to asserting the bit in IRR so that the
339 	 * proper TMR state is always visible before the to-be-queued interrupt
340 	 * can be injected.
341 	 */
342 	tmr = atomic_load_acq_32(&tmrptr[idx]);
343 	if ((tmr & mask) != (level ? mask : 0)) {
344 		if (level) {
345 			atomic_set_int(&tmrptr[idx], mask);
346 		} else {
347 			atomic_clear_int(&tmrptr[idx], mask);
348 		}
349 	}
350 
351 	/* Now set the bit in IRR */
352 	atomic_set_int(&irrptr[idx], mask);
353 
354 	return (VCPU_NOTIFY_EXIT);
355 }
356 
357 static __inline uint32_t *
358 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
359 {
360 	struct LAPIC	*lapic = vlapic->apic_page;
361 	int		i;
362 
363 	switch (offset) {
364 	case APIC_OFFSET_CMCI_LVT:
365 		return (&lapic->lvt_cmci);
366 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
367 		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
368 		return ((&lapic->lvt_timer) + i);
369 	default:
370 		panic("vlapic_get_lvt: invalid LVT\n");
371 	}
372 }
373 
374 static __inline int
375 lvt_off_to_idx(uint32_t offset)
376 {
377 	int index;
378 
379 	switch (offset) {
380 	case APIC_OFFSET_CMCI_LVT:
381 		index = APIC_LVT_CMCI;
382 		break;
383 	case APIC_OFFSET_TIMER_LVT:
384 		index = APIC_LVT_TIMER;
385 		break;
386 	case APIC_OFFSET_THERM_LVT:
387 		index = APIC_LVT_THERMAL;
388 		break;
389 	case APIC_OFFSET_PERF_LVT:
390 		index = APIC_LVT_PMC;
391 		break;
392 	case APIC_OFFSET_LINT0_LVT:
393 		index = APIC_LVT_LINT0;
394 		break;
395 	case APIC_OFFSET_LINT1_LVT:
396 		index = APIC_LVT_LINT1;
397 		break;
398 	case APIC_OFFSET_ERROR_LVT:
399 		index = APIC_LVT_ERROR;
400 		break;
401 	default:
402 		index = -1;
403 		break;
404 	}
405 	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
406 	    "invalid lvt index %d for offset %x", index, offset));
407 
408 	return (index);
409 }
410 
411 static __inline uint32_t
412 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
413 {
414 	int idx;
415 	uint32_t val;
416 
417 	idx = lvt_off_to_idx(offset);
418 	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
419 	return (val);
420 }
421 
422 void
423 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
424 {
425 	uint32_t *lvtptr, mask, val;
426 	struct LAPIC *lapic;
427 	int idx;
428 
429 	lapic = vlapic->apic_page;
430 	lvtptr = vlapic_get_lvtptr(vlapic, offset);
431 	val = *lvtptr;
432 	idx = lvt_off_to_idx(offset);
433 
434 	if (!(lapic->svr & APIC_SVR_ENABLE))
435 		val |= APIC_LVT_M;
436 	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
437 	switch (offset) {
438 	case APIC_OFFSET_TIMER_LVT:
439 		mask |= APIC_LVTT_TM;
440 		break;
441 	case APIC_OFFSET_ERROR_LVT:
442 		break;
443 	case APIC_OFFSET_LINT0_LVT:
444 	case APIC_OFFSET_LINT1_LVT:
445 		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
446 		/* FALLTHROUGH */
447 	default:
448 		mask |= APIC_LVT_DM;
449 		break;
450 	}
451 	val &= mask;
452 	*lvtptr = val;
453 	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
454 }
455 
456 static void
457 vlapic_refresh_lvts(struct vlapic *vlapic)
458 {
459 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
460 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
461 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
462 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
463 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
464 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
465 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
466 }
467 
468 static void
469 vlapic_mask_lvts(struct vlapic *vlapic)
470 {
471 	struct LAPIC *lapic = vlapic->apic_page;
472 
473 	lapic->lvt_cmci |= APIC_LVT_M;
474 	lapic->lvt_timer |= APIC_LVT_M;
475 	lapic->lvt_thermal |= APIC_LVT_M;
476 	lapic->lvt_pcint |= APIC_LVT_M;
477 	lapic->lvt_lint0 |= APIC_LVT_M;
478 	lapic->lvt_lint1 |= APIC_LVT_M;
479 	lapic->lvt_error |= APIC_LVT_M;
480 	vlapic_refresh_lvts(vlapic);
481 }
482 
483 static int
484 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt)
485 {
486 	uint32_t mode, reg, vec;
487 	vcpu_notify_t notify;
488 
489 	reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
490 
491 	if (reg & APIC_LVT_M)
492 		return (0);
493 	vec = reg & APIC_LVT_VECTOR;
494 	mode = reg & APIC_LVT_DM;
495 
496 	switch (mode) {
497 	case APIC_LVT_DM_FIXED:
498 		if (vec < 16) {
499 			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
500 			    lvt == APIC_LVT_ERROR);
501 			return (0);
502 		}
503 		notify = vlapic_set_intr_ready(vlapic, vec, false);
504 		vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
505 		break;
506 	case APIC_LVT_DM_NMI:
507 		(void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
508 		break;
509 	case APIC_LVT_DM_EXTINT:
510 		(void) vm_inject_extint(vlapic->vm, vlapic->vcpuid);
511 		break;
512 	default:
513 		// Other modes ignored
514 		return (0);
515 	}
516 	return (1);
517 }
518 
519 static uint_t
520 vlapic_active_isr(struct vlapic *vlapic)
521 {
522 	int i;
523 	uint32_t *isrp;
524 
525 	isrp = &vlapic->apic_page->isr7;
526 
527 	for (i = 7; i >= 0; i--, isrp -= 4) {
528 		uint32_t reg = *isrp;
529 
530 		if (reg != 0) {
531 			uint_t vec = (i * 32) + bsrl(reg);
532 
533 			if (vec < 16) {
534 				/*
535 				 * Truncate the illegal low vectors to value of
536 				 * 0, indicating that no active ISR was found.
537 				 */
538 				return (0);
539 			}
540 			return (vec);
541 		}
542 	}
543 
544 	return (0);
545 }
546 
547 /*
548  * After events which might arbitrarily change the value of PPR, such as a TPR
549  * write or an EOI, calculate that new PPR value and store it in the APIC page.
550  */
551 static void
552 vlapic_update_ppr(struct vlapic *vlapic)
553 {
554 	int isrvec, tpr, ppr;
555 
556 	isrvec = vlapic_active_isr(vlapic);
557 	tpr = vlapic->apic_page->tpr;
558 
559 	/*
560 	 * Algorithm adopted from section "Interrupt, Task and Processor
561 	 * Priority" in Intel Architecture Manual Vol 3a.
562 	 */
563 	if (PRIO(tpr) >= PRIO(isrvec)) {
564 		ppr = tpr;
565 	} else {
566 		ppr = PRIO(isrvec);
567 	}
568 
569 	vlapic->apic_page->ppr = ppr;
570 }
571 
572 /*
573  * When a vector is asserted in ISR as in-service, the PPR must be raised to the
574  * priority of that vector, as the vCPU would have been at a lower priority in
575  * order for the vector to be accepted.
576  */
577 static void
578 vlapic_raise_ppr(struct vlapic *vlapic, int vec)
579 {
580 	struct LAPIC *lapic = vlapic->apic_page;
581 	int ppr;
582 
583 	ppr = PRIO(vec);
584 
585 	lapic->ppr = ppr;
586 }
587 
588 void
589 vlapic_sync_tpr(struct vlapic *vlapic)
590 {
591 	vlapic_update_ppr(vlapic);
592 }
593 
594 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
595 
596 static void
597 vlapic_process_eoi(struct vlapic *vlapic)
598 {
599 	struct LAPIC	*lapic = vlapic->apic_page;
600 	uint32_t	*isrptr, *tmrptr;
601 	int		i;
602 	uint_t		idx, bitpos, vector;
603 
604 	isrptr = &lapic->isr0;
605 	tmrptr = &lapic->tmr0;
606 
607 	for (i = 7; i >= 0; i--) {
608 		idx = i * 4;
609 		if (isrptr[idx] != 0) {
610 			bitpos = bsrl(isrptr[idx]);
611 			vector = i * 32 + bitpos;
612 
613 			isrptr[idx] &= ~(1 << bitpos);
614 			vlapic_update_ppr(vlapic);
615 			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
616 				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
617 				    vector);
618 			}
619 			return;
620 		}
621 	}
622 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
623 }
624 
625 static __inline int
626 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
627 {
628 
629 	return (lvt & mask);
630 }
631 
632 static __inline int
633 vlapic_periodic_timer(struct vlapic *vlapic)
634 {
635 	uint32_t lvt;
636 
637 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
638 
639 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
640 }
641 
642 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
643 
644 static void
645 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error)
646 {
647 
648 	vlapic->esr_pending |= mask;
649 
650 	/*
651 	 * Avoid infinite recursion if the error LVT itself is configured with
652 	 * an illegal vector.
653 	 */
654 	if (lvt_error)
655 		return;
656 
657 	if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) {
658 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
659 	}
660 }
661 
662 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
663 
664 static void
665 vlapic_fire_timer(struct vlapic *vlapic)
666 {
667 	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
668 
669 	if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) {
670 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
671 	}
672 }
673 
674 static VMM_STAT(VLAPIC_INTR_CMC,
675 	"corrected machine check interrupts generated by vlapic");
676 
677 void
678 vlapic_fire_cmci(struct vlapic *vlapic)
679 {
680 
681 	if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) {
682 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
683 	}
684 }
685 
686 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
687 	"lvts triggered");
688 
689 int
690 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
691 {
692 	if (!vlapic_enabled(vlapic)) {
693 		/*
694 		 * When the local APIC is global/hardware disabled,
695 		 * LINT[1:0] pins are configured as INTR and NMI pins,
696 		 * respectively.
697 		 */
698 		switch (vector) {
699 			case APIC_LVT_LINT0:
700 				(void) vm_inject_extint(vlapic->vm,
701 				    vlapic->vcpuid);
702 				break;
703 			case APIC_LVT_LINT1:
704 				(void) vm_inject_nmi(vlapic->vm,
705 				    vlapic->vcpuid);
706 				break;
707 			default:
708 				break;
709 		}
710 		return (0);
711 	}
712 
713 	switch (vector) {
714 	case APIC_LVT_LINT0:
715 	case APIC_LVT_LINT1:
716 	case APIC_LVT_TIMER:
717 	case APIC_LVT_ERROR:
718 	case APIC_LVT_PMC:
719 	case APIC_LVT_THERMAL:
720 	case APIC_LVT_CMCI:
721 		if (vlapic_fire_lvt(vlapic, vector)) {
722 			vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
723 			    LVTS_TRIGGERRED, vector, 1);
724 		}
725 		break;
726 	default:
727 		return (EINVAL);
728 	}
729 	return (0);
730 }
731 
732 static void
733 vlapic_callout_reset(struct vlapic *vlapic)
734 {
735 	callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
736 	    vlapic_callout_handler, vlapic, C_ABSOLUTE);
737 }
738 
739 static void
740 vlapic_callout_handler(void *arg)
741 {
742 	struct vlapic *vlapic = arg;
743 
744 	VLAPIC_TIMER_LOCK(vlapic);
745 	if (callout_pending(&vlapic->callout))	/* callout was reset */
746 		goto done;
747 
748 	if (!callout_active(&vlapic->callout))	/* callout was stopped */
749 		goto done;
750 
751 	callout_deactivate(&vlapic->callout);
752 
753 	vlapic_fire_timer(vlapic);
754 
755 	if (vlapic_periodic_timer(vlapic)) {
756 		/*
757 		 * Compute the delta between when the timer was supposed to
758 		 * fire and the present time.  We can depend on the fact that
759 		 * cyclics (which underly these callouts) will never be called
760 		 * early.
761 		 */
762 		const hrtime_t now = gethrtime();
763 		const hrtime_t delta = now - vlapic->timer_fire_when;
764 		if (delta >= vlapic->timer_period) {
765 			/*
766 			 * If we are so behind that we have missed an entire
767 			 * timer period, reset the time base rather than
768 			 * attempting to catch up.
769 			 */
770 			vlapic->timer_fire_when = now + vlapic->timer_period;
771 		} else {
772 			vlapic->timer_fire_when += vlapic->timer_period;
773 		}
774 		vlapic_callout_reset(vlapic);
775 	} else {
776 		/*
777 		 * Clear the target time so that logic can distinguish from a
778 		 * timer which has fired (where the value is zero) from one
779 		 * which is held pending due to the instance being paused (where
780 		 * the value is non-zero, but the callout is not pending).
781 		 */
782 		vlapic->timer_fire_when = 0;
783 	}
784 done:
785 	VLAPIC_TIMER_UNLOCK(vlapic);
786 }
787 
788 void
789 vlapic_icrtmr_write_handler(struct vlapic *vlapic)
790 {
791 	struct LAPIC *lapic = vlapic->apic_page;
792 
793 	VLAPIC_TIMER_LOCK(vlapic);
794 	vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
795 	    lapic->icr_timer);
796 	if (vlapic->timer_period != 0) {
797 		vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
798 		vlapic_callout_reset(vlapic);
799 	} else {
800 		vlapic->timer_fire_when = 0;
801 		callout_stop(&vlapic->callout);
802 	}
803 	VLAPIC_TIMER_UNLOCK(vlapic);
804 }
805 
806 /*
807  * This function populates 'dmask' with the set of vcpus that match the
808  * addressing specified by the (dest, phys, lowprio) tuple.
809  *
810  * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
811  * or xAPIC (8-bit) destination field.
812  */
813 void
814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
815     bool lowprio, bool x2apic_dest)
816 {
817 	struct vlapic *vlapic;
818 	uint32_t dfr, ldr, ldest, cluster;
819 	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
820 	cpuset_t amask;
821 	int vcpuid;
822 
823 	if ((x2apic_dest && dest == 0xffffffff) ||
824 	    (!x2apic_dest && dest == 0xff)) {
825 		/*
826 		 * Broadcast in both logical and physical modes.
827 		 */
828 		*dmask = vm_active_cpus(vm);
829 		return;
830 	}
831 
832 	if (phys) {
833 		/*
834 		 * Physical mode: destination is APIC ID.
835 		 */
836 		CPU_ZERO(dmask);
837 		vcpuid = vm_apicid2vcpuid(vm, dest);
838 		amask = vm_active_cpus(vm);
839 		if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
840 			CPU_SET(vcpuid, dmask);
841 	} else {
842 		/*
843 		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
844 		 * bitmask. This model is only available in the xAPIC mode.
845 		 */
846 		mda_flat_ldest = dest & 0xff;
847 
848 		/*
849 		 * In the "Cluster Model" the MDA is used to identify a
850 		 * specific cluster and a set of APICs in that cluster.
851 		 */
852 		if (x2apic_dest) {
853 			mda_cluster_id = dest >> 16;
854 			mda_cluster_ldest = dest & 0xffff;
855 		} else {
856 			mda_cluster_id = (dest >> 4) & 0xf;
857 			mda_cluster_ldest = dest & 0xf;
858 		}
859 
860 		/*
861 		 * Logical mode: match each APIC that has a bit set
862 		 * in its LDR that matches a bit in the ldest.
863 		 */
864 		CPU_ZERO(dmask);
865 		amask = vm_active_cpus(vm);
866 		while ((vcpuid = CPU_FFS(&amask)) != 0) {
867 			vcpuid--;
868 			CPU_CLR(vcpuid, &amask);
869 
870 			vlapic = vm_lapic(vm, vcpuid);
871 			dfr = vlapic->apic_page->dfr;
872 			ldr = vlapic->apic_page->ldr;
873 
874 			if ((dfr & APIC_DFR_MODEL_MASK) ==
875 			    APIC_DFR_MODEL_FLAT) {
876 				ldest = ldr >> 24;
877 				mda_ldest = mda_flat_ldest;
878 			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
879 			    APIC_DFR_MODEL_CLUSTER) {
880 				if (vlapic_x2mode(vlapic)) {
881 					cluster = ldr >> 16;
882 					ldest = ldr & 0xffff;
883 				} else {
884 					cluster = ldr >> 28;
885 					ldest = (ldr >> 24) & 0xf;
886 				}
887 				if (cluster != mda_cluster_id)
888 					continue;
889 				mda_ldest = mda_cluster_ldest;
890 			} else {
891 				/*
892 				 * Guest has configured a bad logical
893 				 * model for this vcpu - skip it.
894 				 */
895 				continue;
896 			}
897 
898 			if ((mda_ldest & ldest) != 0) {
899 				CPU_SET(vcpuid, dmask);
900 				if (lowprio)
901 					break;
902 			}
903 		}
904 	}
905 }
906 
907 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu");
908 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu");
909 
910 static void
911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
912 {
913 	struct LAPIC *lapic = vlapic->apic_page;
914 
915 	if (lapic->tpr != val) {
916 		lapic->tpr = val;
917 		vlapic_update_ppr(vlapic);
918 	}
919 }
920 
921 void
922 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
923 {
924 	uint8_t tpr;
925 
926 	if (val & ~0xf) {
927 		vm_inject_gp(vlapic->vm, vlapic->vcpuid);
928 		return;
929 	}
930 
931 	tpr = val << 4;
932 	vlapic_set_tpr(vlapic, tpr);
933 }
934 
935 uint64_t
936 vlapic_get_cr8(const struct vlapic *vlapic)
937 {
938 	const struct LAPIC *lapic = vlapic->apic_page;
939 
940 	return (lapic->tpr >> 4);
941 }
942 
943 void
944 vlapic_icrlo_write_handler(struct vlapic *vlapic)
945 {
946 	int i;
947 	cpuset_t dmask;
948 	uint64_t icrval;
949 	uint32_t dest, vec, mode, dsh;
950 	struct LAPIC *lapic;
951 
952 	lapic = vlapic->apic_page;
953 	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
954 	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
955 
956 	if (vlapic_x2mode(vlapic))
957 		dest = icrval >> 32;
958 	else
959 		dest = icrval >> (32 + 24);
960 	vec = icrval & APIC_VECTOR_MASK;
961 	mode = icrval & APIC_DELMODE_MASK;
962 	dsh = icrval & APIC_DEST_MASK;
963 
964 	if (mode == APIC_DELMODE_FIXED && vec < 16) {
965 		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
966 		return;
967 	}
968 	if (mode == APIC_DELMODE_INIT &&
969 	    (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
970 		/* No work required to deassert INIT */
971 		return;
972 	}
973 	if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) &&
974 	    !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) {
975 		/*
976 		 * While Intel makes no mention of restrictions for destination
977 		 * shorthand when sending INIT or SIPI, AMD requires either a
978 		 * specific destination or all-excluding self.  Common use seems
979 		 * to be restricted to those two cases.  Until handling is in
980 		 * place to halt a guest which makes such a frivolous request,
981 		 * we will ignore them.
982 		 */
983 		return;
984 	}
985 
986 	switch (dsh) {
987 	case APIC_DEST_DESTFLD:
988 		vlapic_calcdest(vlapic->vm, &dmask, dest,
989 		    (icrval & APIC_DESTMODE_LOG) == 0, false,
990 		    vlapic_x2mode(vlapic));
991 		break;
992 	case APIC_DEST_SELF:
993 		CPU_SETOF(vlapic->vcpuid, &dmask);
994 		break;
995 	case APIC_DEST_ALLISELF:
996 		dmask = vm_active_cpus(vlapic->vm);
997 		break;
998 	case APIC_DEST_ALLESELF:
999 		dmask = vm_active_cpus(vlapic->vm);
1000 		CPU_CLR(vlapic->vcpuid, &dmask);
1001 		break;
1002 	default:
1003 		/*
1004 		 * All possible delivery notations are covered above.
1005 		 * We should never end up here.
1006 		 */
1007 		panic("unknown delivery shorthand: %x", dsh);
1008 	}
1009 
1010 	while ((i = CPU_FFS(&dmask)) != 0) {
1011 		i--;
1012 		CPU_CLR(i, &dmask);
1013 		switch (mode) {
1014 		case APIC_DELMODE_FIXED:
1015 			(void) lapic_intr_edge(vlapic->vm, i, vec);
1016 			vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
1017 			    VLAPIC_IPI_SEND, 1);
1018 			vmm_stat_incr(vlapic->vm, i,
1019 			    VLAPIC_IPI_RECV, 1);
1020 			break;
1021 		case APIC_DELMODE_NMI:
1022 			(void) vm_inject_nmi(vlapic->vm, i);
1023 			break;
1024 		case APIC_DELMODE_INIT:
1025 			(void) vm_inject_init(vlapic->vm, i);
1026 			break;
1027 		case APIC_DELMODE_STARTUP:
1028 			(void) vm_inject_sipi(vlapic->vm, i, vec);
1029 			break;
1030 		case APIC_DELMODE_LOWPRIO:
1031 		case APIC_DELMODE_SMI:
1032 		default:
1033 			/* Unhandled IPI modes (for now) */
1034 			break;
1035 		}
1036 	}
1037 }
1038 
1039 void
1040 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val)
1041 {
1042 	const int vec = val & 0xff;
1043 
1044 	/* self-IPI is only exposed via x2APIC */
1045 	ASSERT(vlapic_x2mode(vlapic));
1046 
1047 	(void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1048 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1);
1049 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1);
1050 }
1051 
1052 int
1053 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1054 {
1055 	struct LAPIC	*lapic = vlapic->apic_page;
1056 	int		 idx, i, bitpos, vector;
1057 	uint32_t	*irrptr, val;
1058 
1059 	if (vlapic->ops.sync_state) {
1060 		(*vlapic->ops.sync_state)(vlapic);
1061 	}
1062 
1063 	irrptr = &lapic->irr0;
1064 
1065 	for (i = 7; i >= 0; i--) {
1066 		idx = i * 4;
1067 		val = atomic_load_acq_int(&irrptr[idx]);
1068 		bitpos = fls(val);
1069 		if (bitpos != 0) {
1070 			vector = i * 32 + (bitpos - 1);
1071 			if (PRIO(vector) > PRIO(lapic->ppr)) {
1072 				if (vecptr != NULL)
1073 					*vecptr = vector;
1074 				return (1);
1075 			} else
1076 				break;
1077 		}
1078 	}
1079 	return (0);
1080 }
1081 
1082 void
1083 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1084 {
1085 	struct LAPIC	*lapic = vlapic->apic_page;
1086 	uint32_t	*irrptr, *isrptr;
1087 	int		idx;
1088 
1089 	KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
1090 
1091 	if (vlapic->ops.intr_accepted)
1092 		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1093 
1094 	/*
1095 	 * clear the ready bit for vector being accepted in irr
1096 	 * and set the vector as in service in isr.
1097 	 */
1098 	idx = (vector / 32) * 4;
1099 
1100 	irrptr = &lapic->irr0;
1101 	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1102 
1103 	isrptr = &lapic->isr0;
1104 	isrptr[idx] |= 1 << (vector % 32);
1105 
1106 	/*
1107 	 * The only way a fresh vector could be accepted into ISR is if it was
1108 	 * of a higher priority than the current PPR.  With that vector now
1109 	 * in-service, the PPR must be raised.
1110 	 */
1111 	vlapic_raise_ppr(vlapic, vector);
1112 }
1113 
1114 void
1115 vlapic_svr_write_handler(struct vlapic *vlapic)
1116 {
1117 	struct LAPIC *lapic;
1118 	uint32_t old, new, changed;
1119 
1120 	lapic = vlapic->apic_page;
1121 
1122 	new = lapic->svr;
1123 	old = vlapic->svr_last;
1124 	vlapic->svr_last = new;
1125 
1126 	changed = old ^ new;
1127 	if ((changed & APIC_SVR_ENABLE) != 0) {
1128 		if ((new & APIC_SVR_ENABLE) == 0) {
1129 			/*
1130 			 * The apic is now disabled so stop the apic timer
1131 			 * and mask all the LVT entries.
1132 			 */
1133 			VLAPIC_TIMER_LOCK(vlapic);
1134 			callout_stop(&vlapic->callout);
1135 			VLAPIC_TIMER_UNLOCK(vlapic);
1136 			vlapic_mask_lvts(vlapic);
1137 		} else {
1138 			/*
1139 			 * The apic is now enabled so restart the apic timer
1140 			 * if it is configured in periodic mode.
1141 			 */
1142 			if (vlapic_periodic_timer(vlapic))
1143 				vlapic_icrtmr_write_handler(vlapic);
1144 		}
1145 	}
1146 }
1147 
1148 static bool
1149 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp)
1150 {
1151 	struct LAPIC *lapic = vlapic->apic_page;
1152 	uint32_t *reg;
1153 	int i;
1154 
1155 	ASSERT3U(offset & 0x3, ==, 0);
1156 	ASSERT3U(offset, <, PAGESIZE);
1157 	ASSERT3P(outp, !=, NULL);
1158 
1159 	uint32_t data = 0;
1160 	switch (offset) {
1161 	case APIC_OFFSET_ID:
1162 		data = lapic->id;
1163 		break;
1164 	case APIC_OFFSET_VER:
1165 		data = lapic->version;
1166 		break;
1167 	case APIC_OFFSET_TPR:
1168 		data = lapic->tpr;
1169 		break;
1170 	case APIC_OFFSET_APR:
1171 		data = lapic->apr;
1172 		break;
1173 	case APIC_OFFSET_PPR:
1174 		data = lapic->ppr;
1175 		break;
1176 	case APIC_OFFSET_LDR:
1177 		data = lapic->ldr;
1178 		break;
1179 	case APIC_OFFSET_DFR:
1180 		data = lapic->dfr;
1181 		break;
1182 	case APIC_OFFSET_SVR:
1183 		data = lapic->svr;
1184 		break;
1185 	case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1186 		i = (offset - APIC_OFFSET_ISR0) >> 2;
1187 		reg = &lapic->isr0;
1188 		data = *(reg + i);
1189 		break;
1190 	case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1191 		i = (offset - APIC_OFFSET_TMR0) >> 2;
1192 		reg = &lapic->tmr0;
1193 		data = *(reg + i);
1194 		break;
1195 	case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1196 		i = (offset - APIC_OFFSET_IRR0) >> 2;
1197 		reg = &lapic->irr0;
1198 		data = atomic_load_acq_int(reg + i);
1199 		break;
1200 	case APIC_OFFSET_ESR:
1201 		data = lapic->esr;
1202 		break;
1203 	case APIC_OFFSET_ICR_LOW:
1204 		data = lapic->icr_lo;
1205 		break;
1206 	case APIC_OFFSET_ICR_HI:
1207 		data = lapic->icr_hi;
1208 		break;
1209 	case APIC_OFFSET_CMCI_LVT:
1210 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1211 		data = vlapic_get_lvt(vlapic, offset);
1212 #ifdef INVARIANTS
1213 		reg = vlapic_get_lvtptr(vlapic, offset);
1214 		ASSERT3U(data, ==, *reg);
1215 #endif
1216 		break;
1217 	case APIC_OFFSET_TIMER_ICR:
1218 		data = lapic->icr_timer;
1219 		break;
1220 	case APIC_OFFSET_TIMER_CCR:
1221 		data = vlapic_get_ccr(vlapic);
1222 		break;
1223 	case APIC_OFFSET_TIMER_DCR:
1224 		data = lapic->dcr_timer;
1225 		break;
1226 	case APIC_OFFSET_RRR:
1227 		data = 0;
1228 		break;
1229 
1230 	case APIC_OFFSET_SELF_IPI:
1231 	case APIC_OFFSET_EOI:
1232 		/* Write-only register */
1233 		*outp = 0;
1234 		return (false);
1235 
1236 	default:
1237 		/* Invalid register */
1238 		*outp = 0;
1239 		return (false);
1240 	}
1241 
1242 	*outp = data;
1243 	return (true);
1244 }
1245 
1246 static bool
1247 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data)
1248 {
1249 	struct LAPIC	*lapic = vlapic->apic_page;
1250 	uint32_t	*regptr;
1251 
1252 	ASSERT3U(offset & 0xf, ==, 0);
1253 	ASSERT3U(offset, <, PAGESIZE);
1254 
1255 	switch (offset) {
1256 	case APIC_OFFSET_ID:
1257 		lapic->id = data;
1258 		vlapic_id_write_handler(vlapic);
1259 		break;
1260 	case APIC_OFFSET_TPR:
1261 		vlapic_set_tpr(vlapic, data & 0xff);
1262 		break;
1263 	case APIC_OFFSET_EOI:
1264 		vlapic_process_eoi(vlapic);
1265 		break;
1266 	case APIC_OFFSET_LDR:
1267 		lapic->ldr = data;
1268 		vlapic_ldr_write_handler(vlapic);
1269 		break;
1270 	case APIC_OFFSET_DFR:
1271 		lapic->dfr = data;
1272 		vlapic_dfr_write_handler(vlapic);
1273 		break;
1274 	case APIC_OFFSET_SVR:
1275 		lapic->svr = data;
1276 		vlapic_svr_write_handler(vlapic);
1277 		break;
1278 	case APIC_OFFSET_ICR_LOW:
1279 		lapic->icr_lo = data;
1280 		vlapic_icrlo_write_handler(vlapic);
1281 		break;
1282 	case APIC_OFFSET_ICR_HI:
1283 		lapic->icr_hi = data;
1284 		break;
1285 	case APIC_OFFSET_CMCI_LVT:
1286 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1287 		regptr = vlapic_get_lvtptr(vlapic, offset);
1288 		*regptr = data;
1289 		vlapic_lvt_write_handler(vlapic, offset);
1290 		break;
1291 	case APIC_OFFSET_TIMER_ICR:
1292 		lapic->icr_timer = data;
1293 		vlapic_icrtmr_write_handler(vlapic);
1294 		break;
1295 
1296 	case APIC_OFFSET_TIMER_DCR:
1297 		lapic->dcr_timer = data;
1298 		vlapic_dcr_write_handler(vlapic);
1299 		break;
1300 
1301 	case APIC_OFFSET_ESR:
1302 		vlapic_esr_write_handler(vlapic);
1303 		break;
1304 
1305 	case APIC_OFFSET_SELF_IPI:
1306 		if (vlapic_x2mode(vlapic))
1307 			vlapic_self_ipi_handler(vlapic, data);
1308 		break;
1309 
1310 	case APIC_OFFSET_VER:
1311 	case APIC_OFFSET_APR:
1312 	case APIC_OFFSET_PPR:
1313 	case APIC_OFFSET_RRR:
1314 	case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1315 	case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1316 	case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1317 	case APIC_OFFSET_TIMER_CCR:
1318 		/* Read-only register */
1319 		return (false);
1320 
1321 	default:
1322 		/* Invalid register */
1323 		return (false);
1324 	}
1325 
1326 	return (true);
1327 }
1328 
1329 void
1330 vlapic_reset(struct vlapic *vlapic)
1331 {
1332 	struct LAPIC *lapic = vlapic->apic_page;
1333 	uint32_t *isrptr, *tmrptr, *irrptr;
1334 
1335 	/* Reset any timer-related state first */
1336 	VLAPIC_TIMER_LOCK(vlapic);
1337 	callout_stop(&vlapic->callout);
1338 	lapic->icr_timer = 0;
1339 	lapic->ccr_timer = 0;
1340 	lapic->dcr_timer = 0;
1341 	vlapic_update_divider(vlapic);
1342 	VLAPIC_TIMER_UNLOCK(vlapic);
1343 
1344 	/*
1345 	 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
1346 	 * it is not leftover after the reset.  This is performed after the APIC
1347 	 * timer has been stopped, in case it happened to fire just prior to
1348 	 * being deactivated.
1349 	 */
1350 	if (vlapic->ops.sync_state) {
1351 		(*vlapic->ops.sync_state)(vlapic);
1352 	}
1353 
1354 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1355 	if (vlapic->vcpuid == 0)
1356 		vlapic->msr_apicbase |= APICBASE_BSP;
1357 
1358 	lapic->id = vlapic_get_id(vlapic);
1359 	lapic->version = VLAPIC_VERSION;
1360 	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1361 
1362 	lapic->tpr = 0;
1363 	lapic->apr = 0;
1364 	lapic->ppr = 0;
1365 
1366 	lapic->eoi = 0;
1367 	lapic->ldr = 0;
1368 	lapic->dfr = 0xffffffff;
1369 	lapic->svr = APIC_SVR_VECTOR;
1370 	vlapic->svr_last = lapic->svr;
1371 
1372 	isrptr = &lapic->isr0;
1373 	tmrptr = &lapic->tmr0;
1374 	irrptr = &lapic->irr0;
1375 	for (uint_t i = 0; i < 8; i++) {
1376 		atomic_store_rel_int(&isrptr[i * 4], 0);
1377 		atomic_store_rel_int(&tmrptr[i * 4], 0);
1378 		atomic_store_rel_int(&irrptr[i * 4], 0);
1379 	}
1380 
1381 	lapic->esr = 0;
1382 	vlapic->esr_pending = 0;
1383 	lapic->icr_lo = 0;
1384 	lapic->icr_hi = 0;
1385 
1386 	lapic->lvt_cmci = 0;
1387 	lapic->lvt_timer = 0;
1388 	lapic->lvt_thermal = 0;
1389 	lapic->lvt_pcint = 0;
1390 	lapic->lvt_lint0 = 0;
1391 	lapic->lvt_lint1 = 0;
1392 	lapic->lvt_error = 0;
1393 	vlapic_mask_lvts(vlapic);
1394 }
1395 
1396 void
1397 vlapic_init(struct vlapic *vlapic)
1398 {
1399 	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1400 	KASSERT(vlapic->vcpuid >= 0 &&
1401 	    vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1402 	    ("vlapic_init: vcpuid is not initialized"));
1403 	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1404 	    "initialized"));
1405 
1406 	/*
1407 	 * If the vlapic is configured in x2apic mode then it will be
1408 	 * accessed in the critical section via the MSR emulation code.
1409 	 *
1410 	 * Therefore the timer mutex must be a spinlock because blockable
1411 	 * mutexes cannot be acquired in a critical section.
1412 	 */
1413 	mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL);
1414 	callout_init(&vlapic->callout, 1);
1415 
1416 	vlapic_reset(vlapic);
1417 }
1418 
1419 void
1420 vlapic_cleanup(struct vlapic *vlapic)
1421 {
1422 	callout_drain(&vlapic->callout);
1423 	mutex_destroy(&vlapic->timer_lock);
1424 }
1425 
1426 int
1427 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp,
1428     uint_t size)
1429 {
1430 	ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1431 	ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1432 
1433 	/* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1434 	if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1435 		*valp = UINT64_MAX;
1436 		return (0);
1437 	}
1438 
1439 	const uint16_t off = gpa - DEFAULT_APIC_BASE;
1440 	uint32_t raw = 0;
1441 	(void) vlapic_read(vlapic, off & ~0xf, &raw);
1442 
1443 	/* Shift and mask reads which are small and/or unaligned */
1444 	const uint8_t align = off & 0xf;
1445 	if (align < 4) {
1446 		*valp = (uint64_t)raw << (align * 8);
1447 	} else {
1448 		*valp = 0;
1449 	}
1450 
1451 	return (0);
1452 }
1453 
1454 int
1455 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val,
1456     uint_t size)
1457 {
1458 	ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1459 	ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1460 
1461 	/* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1462 	if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1463 		return (0);
1464 	}
1465 
1466 	const uint16_t off = gpa - DEFAULT_APIC_BASE;
1467 	/* Ignore writes which are not 32-bits wide and 16-byte aligned */
1468 	if ((off & 0xf) != 0 || size != 4) {
1469 		return (0);
1470 	}
1471 
1472 	(void) vlapic_write(vlapic, off, (uint32_t)val);
1473 	return (0);
1474 }
1475 
1476 /* Should attempts to change the APIC base address be rejected with a #GP?  */
1477 int vlapic_gp_on_addr_change = 1;
1478 
1479 static vm_msr_result_t
1480 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
1481 {
1482 	const uint64_t diff = vlapic->msr_apicbase ^ val;
1483 
1484 	/*
1485 	 * Until the LAPIC emulation for switching between xAPIC and x2APIC
1486 	 * modes is more polished, it will remain off-limits from being altered
1487 	 * by the guest.
1488 	 */
1489 	const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC |
1490 	    APICBASE_BSP;
1491 	if ((diff & reserved_bits) != 0) {
1492 		return (VMR_GP);
1493 	}
1494 
1495 	/* We do not presently allow the LAPIC access address to be modified. */
1496 	if ((diff & APICBASE_ADDR_MASK) != 0) {
1497 		/*
1498 		 * Explicitly rebuffing such requests with a #GP is the most
1499 		 * straightforward way to handle the situation, but certain
1500 		 * consumers (such as the KVM unit tests) may balk at the
1501 		 * otherwise unexpected exception.
1502 		 */
1503 		if (vlapic_gp_on_addr_change) {
1504 			return (VMR_GP);
1505 		}
1506 
1507 		/* If silence is required, just ignore the address change. */
1508 		val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE;
1509 	}
1510 
1511 	vlapic->msr_apicbase = val;
1512 	return (VMR_OK);
1513 }
1514 
1515 static __inline uint16_t
1516 vlapic_msr_to_regoff(uint32_t msr)
1517 {
1518 	ASSERT3U(msr, >=, MSR_APIC_000);
1519 	ASSERT3U(msr, <, (MSR_APIC_000 + 0x100));
1520 
1521 	return ((msr - MSR_APIC_000) << 4);
1522 }
1523 
1524 bool
1525 vlapic_owned_msr(uint32_t msr)
1526 {
1527 	if (msr == MSR_APICBASE) {
1528 		return (true);
1529 	}
1530 	if (msr >= MSR_APIC_000 &&
1531 	    msr < (MSR_APIC_000 + 0x100)) {
1532 		return (true);
1533 	}
1534 	return (false);
1535 }
1536 
1537 vm_msr_result_t
1538 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp)
1539 {
1540 	ASSERT(vlapic_owned_msr(msr));
1541 	ASSERT3P(valp, !=, NULL);
1542 
1543 	if (msr == MSR_APICBASE) {
1544 		*valp = vlapic->msr_apicbase;
1545 		return (VMR_OK);
1546 	}
1547 
1548 	/* #GP for x2APIC MSR accesses in xAPIC mode */
1549 	if (!vlapic_x2mode(vlapic)) {
1550 		return (VMR_GP);
1551 	}
1552 
1553 	uint64_t out = 0;
1554 	const uint16_t reg = vlapic_msr_to_regoff(msr);
1555 	switch (reg) {
1556 	case APIC_OFFSET_ICR_LOW: {
1557 		/* Read from ICR register gets entire (64-bit) value */
1558 		uint32_t low = 0, high = 0;
1559 		bool valid;
1560 
1561 		valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high);
1562 		VERIFY(valid);
1563 		valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low);
1564 		VERIFY(valid);
1565 
1566 		*valp = ((uint64_t)high << 32) | low;
1567 		return (VMR_OK);
1568 		}
1569 	case APIC_OFFSET_ICR_HI:
1570 		/* Already covered by ICR_LOW */
1571 		return (VMR_GP);
1572 	default:
1573 		break;
1574 	}
1575 	if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) {
1576 		return (VMR_GP);
1577 	}
1578 	*valp = out;
1579 	return (VMR_OK);
1580 }
1581 
1582 vm_msr_result_t
1583 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val)
1584 {
1585 	ASSERT(vlapic_owned_msr(msr));
1586 
1587 	if (msr == MSR_APICBASE) {
1588 		return (vlapic_set_apicbase(vlapic, val));
1589 	}
1590 
1591 	/* #GP for x2APIC MSR accesses in xAPIC mode */
1592 	if (!vlapic_x2mode(vlapic)) {
1593 		return (VMR_GP);
1594 	}
1595 
1596 	const uint16_t reg = vlapic_msr_to_regoff(msr);
1597 	switch (reg) {
1598 	case APIC_OFFSET_ICR_LOW: {
1599 		/* Write to ICR register sets entire (64-bit) value */
1600 		bool valid;
1601 
1602 		valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32);
1603 		VERIFY(valid);
1604 		valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val);
1605 		VERIFY(valid);
1606 		return (VMR_OK);
1607 		}
1608 	case APIC_OFFSET_ICR_HI:
1609 		/* Already covered by ICR_LOW */
1610 		return (VMR_GP);
1611 	case APIC_OFFSET_ESR:
1612 		/* Only 0 may be written from x2APIC mode */
1613 		if (val != 0) {
1614 			return (VMR_GP);
1615 		}
1616 		break;
1617 	default:
1618 		break;
1619 	}
1620 	if (!vlapic_write(vlapic, reg, val)) {
1621 		return (VMR_GP);
1622 	}
1623 	return (VMR_OK);
1624 }
1625 
1626 void
1627 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1628 {
1629 	struct vlapic *vlapic;
1630 	struct LAPIC *lapic;
1631 
1632 	vlapic = vm_lapic(vm, vcpuid);
1633 
1634 	if (state == X2APIC_DISABLED)
1635 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1636 	else
1637 		vlapic->msr_apicbase |= APICBASE_X2APIC;
1638 
1639 	/*
1640 	 * Reset the local APIC registers whose values are mode-dependent.
1641 	 *
1642 	 * XXX this works because the APIC mode can be changed only at vcpu
1643 	 * initialization time.
1644 	 */
1645 	lapic = vlapic->apic_page;
1646 	lapic->id = vlapic_get_id(vlapic);
1647 	if (vlapic_x2mode(vlapic)) {
1648 		lapic->ldr = x2apic_ldr(vlapic);
1649 		lapic->dfr = 0;
1650 	} else {
1651 		lapic->ldr = 0;
1652 		lapic->dfr = 0xffffffff;
1653 	}
1654 
1655 	if (state == X2APIC_ENABLED) {
1656 		if (vlapic->ops.enable_x2apic_mode)
1657 			(*vlapic->ops.enable_x2apic_mode)(vlapic);
1658 	}
1659 }
1660 
1661 void
1662 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1663     int delmode, int vec)
1664 {
1665 	bool lowprio;
1666 	int vcpuid;
1667 	cpuset_t dmask;
1668 
1669 	if (delmode != IOART_DELFIXED &&
1670 	    delmode != IOART_DELLOPRI &&
1671 	    delmode != IOART_DELEXINT) {
1672 		/* Invalid delivery mode */
1673 		return;
1674 	}
1675 	lowprio = (delmode == IOART_DELLOPRI);
1676 
1677 	/*
1678 	 * We don't provide any virtual interrupt redirection hardware so
1679 	 * all interrupts originating from the ioapic or MSI specify the
1680 	 * 'dest' in the legacy xAPIC format.
1681 	 */
1682 	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1683 
1684 	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1685 		vcpuid--;
1686 		CPU_CLR(vcpuid, &dmask);
1687 		if (delmode == IOART_DELEXINT) {
1688 			(void) vm_inject_extint(vm, vcpuid);
1689 		} else {
1690 			(void) lapic_set_intr(vm, vcpuid, vec, level);
1691 		}
1692 	}
1693 }
1694 
1695 void
1696 vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
1697 {
1698 	/*
1699 	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1700 	 *
1701 	 * This is done by leveraging features like Posted Interrupts (Intel)
1702 	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1703 	 *
1704 	 * If neither of these features are available then fallback to
1705 	 * sending an IPI to 'hostcpu'.
1706 	 */
1707 	if (vlapic->ops.post_intr)
1708 		(*vlapic->ops.post_intr)(vlapic, hostcpu);
1709 	else
1710 		poke_cpu(hostcpu);
1711 }
1712 
1713 void
1714 vlapic_localize_resources(struct vlapic *vlapic)
1715 {
1716 	vmm_glue_callout_localize(&vlapic->callout);
1717 }
1718 
1719 void
1720 vlapic_pause(struct vlapic *vlapic)
1721 {
1722 	VLAPIC_TIMER_LOCK(vlapic);
1723 	callout_stop(&vlapic->callout);
1724 	VLAPIC_TIMER_UNLOCK(vlapic);
1725 
1726 }
1727 
1728 void
1729 vlapic_resume(struct vlapic *vlapic)
1730 {
1731 	VLAPIC_TIMER_LOCK(vlapic);
1732 	if (vlapic->timer_fire_when != 0) {
1733 		vlapic_callout_reset(vlapic);
1734 	}
1735 	VLAPIC_TIMER_UNLOCK(vlapic);
1736 }
1737 
1738 static int
1739 vlapic_data_read(void *datap, const vmm_data_req_t *req)
1740 {
1741 	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1742 	VERIFY3U(req->vdr_version, ==, 1);
1743 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1744 
1745 	struct vlapic *vlapic = datap;
1746 	struct vdi_lapic_v1 *out = req->vdr_data;
1747 
1748 	VLAPIC_TIMER_LOCK(vlapic);
1749 
1750 	if (vlapic->ops.sync_state) {
1751 		(*vlapic->ops.sync_state)(vlapic);
1752 	}
1753 
1754 	out->vl_msr_apicbase = vlapic->msr_apicbase;
1755 	out->vl_esr_pending = vlapic->esr_pending;
1756 	if (vlapic->timer_fire_when != 0) {
1757 		out->vl_timer_target =
1758 		    vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
1759 	} else {
1760 		out->vl_timer_target = 0;
1761 	}
1762 
1763 	const struct LAPIC *lapic = vlapic->apic_page;
1764 	struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
1765 
1766 	/*
1767 	 * While this might appear, at first glance, to be missing some fields,
1768 	 * they are intentionally omitted:
1769 	 * - PPR: its contents are always generated at runtime
1770 	 * - EOI: write-only, and contents are ignored after handling
1771 	 * - RRD: (aka RRR) read-only and always 0
1772 	 * - CCR: calculated from underlying timer data
1773 	 */
1774 	out_page->vlp_id = lapic->id;
1775 	out_page->vlp_version = lapic->version;
1776 	out_page->vlp_tpr = lapic->tpr;
1777 	out_page->vlp_apr = lapic->apr;
1778 	out_page->vlp_ldr = lapic->ldr;
1779 	out_page->vlp_dfr = lapic->dfr;
1780 	out_page->vlp_svr = lapic->svr;
1781 	out_page->vlp_esr = lapic->esr;
1782 	out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1783 	out_page->vlp_icr_timer = lapic->icr_timer;
1784 	out_page->vlp_dcr_timer = lapic->dcr_timer;
1785 
1786 	out_page->vlp_lvt_cmci = lapic->lvt_cmci;
1787 	out_page->vlp_lvt_timer = lapic->lvt_timer;
1788 	out_page->vlp_lvt_thermal = lapic->lvt_thermal;
1789 	out_page->vlp_lvt_pcint = lapic->lvt_pcint;
1790 	out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
1791 	out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
1792 	out_page->vlp_lvt_error = lapic->lvt_error;
1793 
1794 	const uint32_t *isrptr = &lapic->isr0;
1795 	const uint32_t *tmrptr = &lapic->tmr0;
1796 	const uint32_t *irrptr = &lapic->irr0;
1797 	for (uint_t i = 0; i < 8; i++) {
1798 		out_page->vlp_isr[i] = isrptr[i * 4];
1799 		out_page->vlp_tmr[i] = tmrptr[i * 4];
1800 		out_page->vlp_irr[i] = irrptr[i * 4];
1801 	}
1802 	VLAPIC_TIMER_UNLOCK(vlapic);
1803 
1804 	return (0);
1805 }
1806 
1807 static uint8_t
1808 popc8(uint8_t val)
1809 {
1810 	uint8_t cnt;
1811 
1812 	for (cnt = 0; val != 0; val &= (val - 1)) {
1813 		cnt++;
1814 	}
1815 	return (cnt);
1816 }
1817 
1818 /*
1819  * Descriptions for the various failures which can occur when validating
1820  * to-be-written vlapic state.
1821  */
1822 enum vlapic_validation_error {
1823 	VVE_OK,
1824 	VVE_BAD_ID,
1825 	VVE_BAD_VERSION,
1826 	VVE_BAD_MSR_BASE,
1827 	VVE_BAD_ESR,
1828 	VVE_BAD_TPR,
1829 	VVE_LOW_VECTOR,
1830 	VVE_ISR_PRIORITY,
1831 };
1832 
1833 static enum vlapic_validation_error
1834 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
1835 {
1836 	ASSERT(req->vdr_version == 1 &&
1837 	    req->vdr_len >= sizeof (struct vdi_lapic_v1));
1838 	const struct vdi_lapic_v1 *src = req->vdr_data;
1839 
1840 	if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
1841 	    (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
1842 		return (VVE_BAD_ESR);
1843 	}
1844 
1845 	/* Use the same restrictions as the wrmsr accessor for now */
1846 	const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
1847 	    APICBASE_BSP;
1848 	const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
1849 	if ((diff & apicbase_reserved) != 0) {
1850 		return (VVE_BAD_MSR_BASE);
1851 	}
1852 
1853 	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1854 	/*
1855 	 * Demand that ID match for now.  This can be further updated when some
1856 	 * of the x2apic handling is improved.
1857 	 */
1858 	if (page->vlp_id != vlapic_get_id(vlapic)) {
1859 		return (VVE_BAD_ID);
1860 	}
1861 
1862 	if (page->vlp_version != vlapic->apic_page->version) {
1863 		return (VVE_BAD_VERSION);
1864 	}
1865 
1866 	if (page->vlp_tpr > 0xff) {
1867 		return (VVE_BAD_TPR);
1868 	}
1869 
1870 	/* Vectors 0-15 are not expected to be handled by the lapic */
1871 	if ((page->vlp_isr[0] & 0xffff) != 0 ||
1872 	    (page->vlp_irr[0] & 0xffff) != 0 ||
1873 	    (page->vlp_tmr[0] & 0xffff) != 0) {
1874 		return (VVE_LOW_VECTOR);
1875 	}
1876 
1877 	/* Only one interrupt should be in-service for each priority level */
1878 	for (uint_t i = 0; i < 8; i++) {
1879 		if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
1880 		    popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
1881 		    popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
1882 		    popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
1883 			return (VVE_ISR_PRIORITY);
1884 		}
1885 	}
1886 
1887 	return (VVE_OK);
1888 }
1889 
1890 static int
1891 vlapic_data_write(void *datap, const vmm_data_req_t *req)
1892 {
1893 	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1894 	VERIFY3U(req->vdr_version, ==, 1);
1895 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1896 
1897 	struct vlapic *vlapic = datap;
1898 	if (vlapic_data_validate(vlapic, req) != VVE_OK) {
1899 		return (EINVAL);
1900 	}
1901 	const struct vdi_lapic_v1 *src = req->vdr_data;
1902 	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1903 	struct LAPIC *lapic = vlapic->apic_page;
1904 
1905 	VLAPIC_TIMER_LOCK(vlapic);
1906 
1907 	/* Already ensured by vlapic_data_validate() */
1908 	VERIFY3U(page->vlp_id, ==, lapic->id);
1909 	VERIFY3U(page->vlp_version, ==, lapic->version);
1910 
1911 	vlapic->msr_apicbase = src->vl_msr_apicbase;
1912 	vlapic->esr_pending = src->vl_esr_pending;
1913 
1914 	lapic->tpr = page->vlp_tpr;
1915 	lapic->apr = page->vlp_apr;
1916 	lapic->ldr = page->vlp_ldr;
1917 	lapic->dfr = page->vlp_dfr;
1918 	lapic->svr = page->vlp_svr;
1919 	lapic->esr = page->vlp_esr;
1920 	lapic->icr_lo = (uint32_t)page->vlp_icr;
1921 	lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
1922 
1923 	lapic->icr_timer = page->vlp_icr_timer;
1924 	lapic->dcr_timer = page->vlp_dcr_timer;
1925 	vlapic_update_divider(vlapic);
1926 
1927 	/* cleanse LDR/DFR */
1928 	vlapic_ldr_write_handler(vlapic);
1929 	vlapic_dfr_write_handler(vlapic);
1930 
1931 	lapic->lvt_cmci = page->vlp_lvt_cmci;
1932 	lapic->lvt_timer = page->vlp_lvt_timer;
1933 	lapic->lvt_thermal = page->vlp_lvt_thermal;
1934 	lapic->lvt_pcint = page->vlp_lvt_pcint;
1935 	lapic->lvt_lint0 = page->vlp_lvt_lint0;
1936 	lapic->lvt_lint1 = page->vlp_lvt_lint1;
1937 	lapic->lvt_error = page->vlp_lvt_error;
1938 	/* cleanse LVTs */
1939 	vlapic_refresh_lvts(vlapic);
1940 
1941 	uint32_t *isrptr = &lapic->isr0;
1942 	uint32_t *tmrptr = &lapic->tmr0;
1943 	uint32_t *irrptr = &lapic->irr0;
1944 	for (uint_t i = 0; i < 8; i++) {
1945 		isrptr[i * 4] = page->vlp_isr[i];
1946 		tmrptr[i * 4] = page->vlp_tmr[i];
1947 		irrptr[i * 4] = page->vlp_irr[i];
1948 	}
1949 
1950 	if (src->vl_timer_target != 0) {
1951 		vlapic->timer_fire_when =
1952 		    vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
1953 
1954 		if (!vm_is_paused(vlapic->vm)) {
1955 			vlapic_callout_reset(vlapic);
1956 		}
1957 	} else {
1958 		vlapic->timer_fire_when = 0;
1959 	}
1960 
1961 	if (vlapic->ops.sync_state) {
1962 		(*vlapic->ops.sync_state)(vlapic);
1963 	}
1964 	VLAPIC_TIMER_UNLOCK(vlapic);
1965 
1966 	return (0);
1967 }
1968 
1969 static const vmm_data_version_entry_t lapic_v1 = {
1970 	.vdve_class = VDC_LAPIC,
1971 	.vdve_version = 1,
1972 	.vdve_len_expect = sizeof (struct vdi_lapic_v1),
1973 	.vdve_readf = vlapic_data_read,
1974 	.vdve_writef = vlapic_data_write,
1975 };
1976 VMM_DATA_VERSION(lapic_v1);
1977